10 steps to setting up BigQuery and loading JSON data from Google App Scripts to BigQuery
Setting up BigQuery
Create a project in Google Cloud Platform
Navigate to BigQuery in Google Cloud Platform and make a dataset
A dataset is created by clicking the three small dots beside your project.
Enable billing for your account to be able to enable BigQuery API
Search for billing and in your billing accounts create an account
Enable BigQuery API in BigQuery by clicking on Data transfers and enabling the API there.
Loading JSON data from Google App Scripts to BigQuery
Open Google App Scripts and start a new project
In your new project, enable BigQuery API by clicking on services --> scroll down to find BigQuery API and click on it --> add it(sorry the picture is in Swedish)
You can roughly follow the steps on the picutre
Next, insert the provided script into your Google App Script project
Insert api key, url and table references (project name and dataset name) where the commenting suggests so, marked by “TODO”.
Run the project
You may have to authorize the script to get access to your google project
Check in BigQuery that the data was successfully loaded to the correct table.
Fill in the parameters from your enviroment to the script below.
Pre built example appscript to extract data form bibook api to BigQuery:
function getData() { // TODO: insert api-key here var header = { 'X-Bibook-Api-Token': 'api_key_here' }; // TODO: insert end points here endpoints = ['general_ledger_aggregated_by_month', 'accounts', 'general_ledger'] // TODO: insert here projectId and datasetId from BigQuery var tableReference = { projectId: "project_name_here", datasetId: "dataset_name_here", tableId: "" }; // loop all endpoints endpoints.forEach(function(endpoint, i) { // assign table name as endpoint tableReference.tableId = endpoint // if table exists, delete it try { var deleteTable = BigQuery.Tables.remove(tableReference.projectId, tableReference.datasetId, tableReference.tableId); Logger.log("Deleted " + tableReference.datasetId + "." + endpoint + " table") Logger.log(deleteResults); } catch { Logger.log("There is no table called: " + tableReference.datasetId + "." + endpoint) } url = "https://bibook.com/bibook/api/v1/{company_id}/" + endpoint + "?page=1&bulk=false"; var options = { 'method' : 'get', 'headers' : header, 'payload': {} }; // get data from api Logger.log("getting data from " + endpoint) var response = UrlFetchApp.fetch(url, options); Logger.log("Response status: " + response.getResponseCode()) var json = response.getContentText(); var data = JSON.parse(json); // check total pages to loop total_pages = data.total_pages; Logger.log("There are a total of " + total_pages + " pages in " + endpoint); // placeholder for current page page = 1; // get schema for endpoint fields = getSchema(data.data) // load first page of data to BigQuery var jobs = loadToBigQuery (tableReference, data.data, fields); // if more pages exists, get and load them while (page < total_pages) { page += 1 var new_url = "https://bibook.com/bibook/api/v1/{company_id}/" + endpoint + "?page=" + page + "&bulk=false"; var next_response = UrlFetchApp.fetch(new_url, options); Logger.log("Response status: " + next_response.getResponseCode()) var next_json = next_response.getContentText(); var next_data = JSON.parse(next_json); var next_job = loadToBigQuery (tableReference, next_data.data, fields); Logger.log("Currently at page: " + page + " out of " + total_pages) // sleep to avoid exceeding request limit Utilities.sleep(800) } // keep an eye on it Logger.log('status of job can be found here: https://bigquery.cloud.google.com/jobs/' + tableReference.projectId); }); } // figure out the schema from the JSON data's first element, assuming that datatypes don't change function getSchema (data) { var model = data[0]; var fields = Object.keys(model).reduce(function(p,c) { var t = typeof model[c]; // change javascript type to accepted sql type in BigQuery if (t == "number") { t = "numeric" } else if (model[c] == null) { t = "string" } p.push ( {name: c, type: t} ); return p; },[]); return fields } function loadToBigQuery (tableReference, data, fields) { // define the load job var job = { configuration: { load: { destinationTable: tableReference, sourceFormat: "NEWLINE_DELIMITED_JSON", writeDisposition: "WRITE_APPEND", schema:{ fields:fields } }, } }; // there is a max size that a urlfetch post size can be var MAX_POST = 1024 * 1024 * 8; // now we can make delimted json, but chunk it up var chunk = '', jobs = []; data.forEach (function (d,i,a) { // convert to string with "\n" var chunklet = JSON.stringify(d) + "\n"; if (chunklet.length > MAX_POST) { throw 'chunklet size ' + chunklet.length + ' is greater than max permitted size ' + MAX_POST; } // if the chunk of data exceeds max post size, push the data to BigQuery if (chunklet.length + chunk.length > MAX_POST) { jobs.push ( BigQuery.Jobs.insert(job, tableReference.projectId, Utilities.newBlob(chunk)) ); chunk = ""; // after the first , we move to append job.configuration.load.writeDisposition = "WRITE_APPEND"; } // add the chunk to the pile chunk += chunklet; }); // finish off if (chunk.length) { jobs.push ( BigQuery.Jobs.insert(job, tableReference.projectId, Utilities.newBlob(chunk)) ); } return jobs; }