|
| 1 | +export GOOGLE_CLOUD_PROJECT=$(gcloud config get-value project) |
| 2 | + |
| 3 | +gsutil mb gs://$GOOGLE_CLOUD_PROJECT |
| 4 | +gsutil cp -r gs://spls/gsp1153/* gs://$GOOGLE_CLOUD_PROJECT |
| 5 | + |
| 6 | +bq --location=us mk --dataset mainframe_import |
| 7 | + |
| 8 | +gsutil cp gs://$GOOGLE_CLOUD_PROJECT/schema_*.json . |
| 9 | + |
| 10 | +bq load --source_format=NEWLINE_DELIMITED_JSON mainframe_import.accounts gs://$GOOGLE_CLOUD_PROJECT/accounts.json schema_accounts.json |
| 11 | + |
| 12 | +bq load --source_format=NEWLINE_DELIMITED_JSON mainframe_import.transactions gs://$GOOGLE_CLOUD_PROJECT/transactions.json schema_transactions.json |
| 13 | + |
| 14 | +bq query --use_legacy_sql=false \ |
| 15 | +"CREATE OR REPLACE VIEW \`$GOOGLE_CLOUD_PROJECT.mainframe_import.account_transactions\` AS |
| 16 | +SELECT t.*, a.* EXCEPT (id) |
| 17 | +FROM \`mainframe_import.accounts\` AS a |
| 18 | +JOIN \`mainframe_import.transactions\` AS t |
| 19 | +ON a.id = t.account_id" |
| 20 | + |
| 21 | + |
| 22 | + |
| 23 | +bq query --use_legacy_sql=false \ |
| 24 | +"SELECT * FROM \`mainframe_import.transactions\` LIMIT 100" |
| 25 | + |
| 26 | +bq query --use_legacy_sql=false \ |
| 27 | +"SELECT DISTINCT(occupation), COUNT(occupation) |
| 28 | +FROM \`mainframe_import.accounts\` |
| 29 | +GROUP BY occupation" |
| 30 | + |
| 31 | +bq query --use_legacy_sql=false \ |
| 32 | +"SELECT * |
| 33 | +FROM \`mainframe_import.accounts\` |
| 34 | +WHERE salary_range = '110,000+' |
| 35 | +ORDER BY name" |
| 36 | + |
| 37 | +gcloud services enable dataflow.googleapis.com |
| 38 | + |
| 39 | + |
| 40 | +# enter these values by hand into the cloud shell (one by one). |
| 41 | +export CONNECTION_URL=quicklab:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQyN2QxZGZmY2Y0Mjk0N2I5OGJhZmVjYjMzZjQxNTgzYSRkYTFjYWY3YTBmMzY0MTBmOWFkMjhlNjZjMDAwNWFmYg== |
| 42 | +export API_KEY=NERjOFdva0J2cXR4RHZIbmVseEY6LTFBQ1N6QVVRMG1FWEpZTFJHU052Zw== |
| 43 | +# to create and run a dataflow job, |
| 44 | +# cut and paste the following 7 lines into the cloud shell. |
| 45 | +gcloud dataflow flex-template run bqtoelastic-btecky-job --template-file-gcs-location gs://dataflow-templates-us-central1/latest/flex/BigQuery_to_Elasticsearch --region us-central1 --num-workers 1 --parameters index=transactions,maxNumWorkers=1,query='select * from `YOUR_PROJECT_ID.mainframe_import.account_transactions`',connectionUrl=$CONNECTION_URL,apiKey=$API_KEY |
| 46 | + |
0 commit comments