10 steps to setting up BigQuery and loading JSON data from Google App Scripts to BigQuery 

 

Setting up BigQuery 

 

  1. Create a project in Google Cloud Platform 

 

  1. Navigate to BigQuery in Google Cloud Platform and make a dataset  

  1. A dataset is created by clicking the three small dots beside your project.  

 

 

 

 

 

  1. Enable billing for your account to be able to enable BigQuery API 

  1. Search for billing and in your billing accounts create an account 

 

 

 

  1. Enable BigQuery API in BigQuery by clicking on Data transfers and enabling the API there. 

 

 

 

 

Loading JSON data from Google App Scripts to BigQuery 

 

  1. Open Google App Scripts and start a new project 

 

  1. In your new project, enable BigQuery API by clicking on services --> scroll down to find BigQuery API and click on it --> add it(sorry the picture is in Swedish) 

  1. You can roughly follow the steps on the picutre 

 

 

 

  1. Next, insert the provided script into your Google App Script project 

 

  1. Insert api key, url and table references (project name and dataset name) where the commenting suggests so, marked by “TODO”. 

 

  1. Run the project 

  1. You may have to authorize the script to get access to your google project 

 

  1. Check in BigQuery that the data was successfully loaded to the correct table. 


Fill in the parameters from your enviroment to the script below. 

Pre built example appscript to extract data form bibook api to BigQuery:

function getData() {
  // TODO: insert api-key here
  var header = {
    'X-Bibook-Api-Token': 'api_key_here'
  };
  // TODO: insert end points here
  endpoints = ['general_ledger_aggregated_by_month', 'accounts', 'general_ledger']
  // TODO: insert here projectId and datasetId from  BigQuery
  var tableReference = {
    projectId: "project_name_here",
    datasetId: "dataset_name_here",
    tableId: ""
    };
  // loop all endpoints
  endpoints.forEach(function(endpoint, i) {
    // assign table name as endpoint
    tableReference.tableId = endpoint
    // if table exists, delete it
    try {
      var deleteTable = BigQuery.Tables.remove(tableReference.projectId, tableReference.datasetId, tableReference.tableId);
      Logger.log("Deleted " + tableReference.datasetId + "." + endpoint + " table")
      Logger.log(deleteResults);
    }
    catch {
      Logger.log("There is no table called: " + tableReference.datasetId + "." + endpoint)
    }
    url = "https://bibook.com/bibook/api/v1/{company_id}/" + endpoint + "?page=1&bulk=false";
    var options = {
      'method' : 'get',
      'headers' : header,
      'payload': {}
    };
    // get data from api
    Logger.log("getting data from " + endpoint)
    var response = UrlFetchApp.fetch(url, options);
    Logger.log("Response status: " + response.getResponseCode())
    var json = response.getContentText();
    var data = JSON.parse(json);
    // check total pages to loop 
    total_pages = data.total_pages;
    Logger.log("There are a total of " + total_pages + " pages in " + endpoint);
    // placeholder for current page
    page = 1;
    // get schema for endpoint
    fields = getSchema(data.data)
    // load first page of data to BigQuery
    var jobs = loadToBigQuery (tableReference, data.data, fields);
    // if more pages exists, get and load them
    while (page < total_pages) {
      page += 1
      var new_url = "https://bibook.com/bibook/api/v1/{company_id}/" + endpoint + "?page=" + page + "&bulk=false";
      var next_response = UrlFetchApp.fetch(new_url, options);
      Logger.log("Response status: " + next_response.getResponseCode())
      var next_json = next_response.getContentText();
      var next_data = JSON.parse(next_json);
      var next_job = loadToBigQuery (tableReference, next_data.data, fields);
      Logger.log("Currently at page: " + page + " out of " + total_pages)
      // sleep to avoid exceeding request limit
      Utilities.sleep(800)
    }
    // keep an eye on it
    Logger.log('status of job can be found here:    https://bigquery.cloud.google.com/jobs/' +  tableReference.projectId);
  });
}
// figure out the schema from the JSON data's first element, assuming that datatypes don't change
function getSchema (data) {
  var model = data[0];
  var fields = Object.keys(model).reduce(function(p,c) {
    var t = typeof model[c];
    // change javascript type to accepted sql type in BigQuery
    if (t == "number") {
      t = "numeric"
    }
    else if (model[c] == null) {
      t = "string"
    }
    p.push ( {name: c, type: t} );
    return p;
  },[]);
  return fields
}
function loadToBigQuery (tableReference, data, fields) {
  // define the load job 
  var job = {
    configuration: {
      load: {
        destinationTable: tableReference,
        sourceFormat: "NEWLINE_DELIMITED_JSON",
        writeDisposition: "WRITE_APPEND",
        schema:{
          fields:fields
        }
      },
    }
  };
  // there is a max size that a urlfetch post size can be
  var MAX_POST = 1024 * 1024 * 8;
  // now we can make delimted json, but chunk it up
  var chunk = '', jobs = [];
  data.forEach (function (d,i,a) {
    
    // convert to string with "\n"
    var chunklet = JSON.stringify(d) + "\n";
    if (chunklet.length  > MAX_POST) {
      throw 'chunklet size ' + chunklet.length + ' is greater than max permitted size ' + MAX_POST;
    }
    
    // if the chunk of data exceeds max post size, push the data to BigQuery
    if (chunklet.length + chunk.length > MAX_POST) {
      jobs.push ( BigQuery.Jobs.insert(job, tableReference.projectId, Utilities.newBlob(chunk)) );
      chunk = "";
      // after the first , we move to append
      job.configuration.load.writeDisposition = "WRITE_APPEND";
    }
    
    // add the chunk to the pile
    chunk += chunklet;
   
  });
  
  // finish off
  if (chunk.length) {
    jobs.push ( BigQuery.Jobs.insert(job, tableReference.projectId, Utilities.newBlob(chunk)) );
  }
  return jobs;
}