Sample Code for Loading Avro Files to BigQuery with Cloud Storage + Cloud Functions
I’ll introduce sample code for loading Avro files uploaded to Cloud Storage into BigQuery using Cloud Functions.
This article introduces the prerequisites and Cloud Functions sample code for implementing the following flow:
bq mk --dataset \\
--location=asia-northeast1 \\
your-dataset-name
gsutil mb \\
-c standard \\
-l asia-northeast1 \\
-p your-project-name \\
gs://your-bucket-name
gcloud functions deploy loadTableGCSAvro \\
--region asia-northeast1 \\
--runtime nodejs14 \\
--trigger-resource your-bucket-name \\
--trigger-event google.storage.object.finalize
Here’s the sample code for a Cloud Functions that loads Avro files uploaded to Cloud Storage into BigQuery:
'use strict';
const {BigQuery} = require('@google-cloud/bigquery');
const {Storage} = require('@google-cloud/storage');
const bigquery = new BigQuery();
const storage = new Storage();
exports.loadTableGCSAvro = async (file, context) => {
console.info(`Uploaded file: ${file.name}`);
if (file.size === '0') {
console.info('file.size is 0');
return;
}
// Example of file.name: bucketName/fileName/20210801-1235.avro
const fileNames = file.name.split('/');
if (fileNames.length !== 3) {
console.error(`fileName is invalid: ${file.name}`);
return;
}
const datasetId = fileNames[0];
const tableId = fileNames[1];
await bqLoadAvro(file.bucket, file.name, datasetId, tableId);
};
async function bqLoadAvro(bucketName, fileName, datasetId, tableId) {
// @doc https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad
const jobConfigurationLoad = {
sourceFormat: 'AVRO',
useAvroLogicalTypes: true,
writeDisposition: 'WRITE_APPEND'
};
// Load data from a Google Cloud Storage file into the table
const [job] = await bigquery
.dataset(datasetId)
.table(tableId)
.load(storage.bucket(bucketName).file(fileName), jobConfigurationLoad);
// load() waits for the job to finish
console.log(`Job ${job.id} completed.`);
// Check the job's status for errors
const errors = job.status.errors;
if (errors && errors.length > 0) {
throw errors;
}
}
That’s all from the Gemba on loading Avro files uploaded to Cloud Storage into BigQuery using Cloud Functions.