25
25
from __future__ import print_function
26
26
27
27
import json
28
- import os
29
- import sys
30
- import uuid
31
28
32
29
from google .cloud import bigquery
33
30
34
- # pylint: disable=g-bad-import-order
35
- from absl import app as absl_app
36
- from absl import flags
37
31
import tensorflow as tf
38
- # pylint: enable=g-bad-import-order
39
-
40
- from official .utils .flags import core as flags_core
41
- from official .utils .logs import logger
42
32
43
33
44
34
class BigQueryUploader (object ):
45
- """Upload the benchmark and metric info to BigQuery."""
35
+ """Upload the benchmark and metric info from JSON input to BigQuery. """
46
36
47
- def __init__ (self , logging_dir , gcp_project = None , credentials = None ):
37
+ def __init__ (self , gcp_project = None , credentials = None ):
48
38
"""Initialized BigQueryUploader with proper setting.
49
39
50
40
Args:
51
- logging_dir: string, logging directory that contains the benchmark log.
52
41
gcp_project: string, the name of the GCP project that the log will be
53
42
uploaded to. The default project name will be detected from local
54
43
environment if no value is provided.
@@ -58,11 +47,11 @@ def __init__(self, logging_dir, gcp_project=None, credentials=None):
58
47
google.oauth2.service_account.Credentials to load credential from local
59
48
file for the case that the test is run out side of GCP.
60
49
"""
61
- self ._logging_dir = logging_dir
62
50
self ._bq_client = bigquery .Client (
63
51
project = gcp_project , credentials = credentials )
64
52
65
- def upload_benchmark_run (self , dataset_name , table_name , run_id ):
53
+ def upload_benchmark_run_json (
54
+ self , dataset_name , table_name , run_id , run_json ):
66
55
"""Upload benchmark run information to Bigquery.
67
56
68
57
Args:
@@ -72,19 +61,13 @@ def upload_benchmark_run(self, dataset_name, table_name, run_id):
72
61
the data will be uploaded.
73
62
run_id: string, a unique ID that will be attached to the data, usually
74
63
this is a UUID4 format.
64
+ run_json: dict, the JSON data that contains the benchmark run info.
75
65
"""
76
- expected_file = os .path .join (
77
- self ._logging_dir , logger .BENCHMARK_RUN_LOG_FILE_NAME )
78
- with tf .gfile .GFile (expected_file ) as f :
79
- benchmark_json = json .load (f )
80
- benchmark_json ["model_id" ] = run_id
81
- table_ref = self ._bq_client .dataset (dataset_name ).table (table_name )
82
- errors = self ._bq_client .insert_rows_json (table_ref , [benchmark_json ])
83
- if errors :
84
- tf .logging .error (
85
- "Failed to upload benchmark info to bigquery: {}" .format (errors ))
86
-
87
- def upload_metric (self , dataset_name , table_name , run_id ):
66
+ run_json ["model_id" ] = run_id
67
+ self ._upload_json (dataset_name , table_name , [run_json ])
68
+
69
+ def upload_benchmark_metric_json (
70
+ self , dataset_name , table_name , run_id , metric_json_list ):
88
71
"""Upload metric information to Bigquery.
89
72
90
73
Args:
@@ -95,39 +78,57 @@ def upload_metric(self, dataset_name, table_name, run_id):
95
78
benchmark_run table.
96
79
run_id: string, a unique ID that will be attached to the data, usually
97
80
this is a UUID4 format. This should be the same as the benchmark run_id.
81
+ metric_json_list: list, a list of JSON object that record the metric info.
82
+ """
83
+ for m in metric_json_list :
84
+ m ["run_id" ] = run_id
85
+ self ._upload_json (dataset_name , table_name , metric_json_list )
86
+
87
+ def upload_benchmark_run_file (
88
+ self , dataset_name , table_name , run_id , run_json_file ):
89
+ """Upload benchmark run information to Bigquery from input json file.
90
+
91
+ Args:
92
+ dataset_name: string, the name of bigquery dataset where the data will be
93
+ uploaded.
94
+ table_name: string, the name of bigquery table under the dataset where
95
+ the data will be uploaded.
96
+ run_id: string, a unique ID that will be attached to the data, usually
97
+ this is a UUID4 format.
98
+ run_json_file: string, the file path that contains the run JSON data.
99
+ """
100
+ with tf .gfile .GFile (run_json_file ) as f :
101
+ benchmark_json = json .load (f )
102
+ self .upload_benchmark_run_json (
103
+ dataset_name , table_name , run_id , benchmark_json )
104
+
105
+ def upload_metric_file (
106
+ self , dataset_name , table_name , run_id , metric_json_file ):
107
+ """Upload metric information to Bigquery from input json file.
108
+
109
+ Args:
110
+ dataset_name: string, the name of bigquery dataset where the data will be
111
+ uploaded.
112
+ table_name: string, the name of bigquery table under the dataset where
113
+ the metric data will be uploaded. This is different from the
114
+ benchmark_run table.
115
+ run_id: string, a unique ID that will be attached to the data, usually
116
+ this is a UUID4 format. This should be the same as the benchmark run_id.
117
+ metric_json_file: string, the file path that contains the metric JSON
118
+ data.
98
119
"""
99
- expected_file = os .path .join (
100
- self ._logging_dir , logger .METRIC_LOG_FILE_NAME )
101
- with tf .gfile .GFile (expected_file ) as f :
102
- lines = f .readlines ()
120
+ with tf .gfile .GFile (metric_json_file ) as f :
103
121
metrics = []
104
- for line in filter (lambda l : l .strip (), lines ):
105
- metric = json .loads (line )
106
- metric ["run_id" ] = run_id
107
- metrics .append (metric )
108
- table_ref = self ._bq_client .dataset (dataset_name ).table (table_name )
109
- errors = self ._bq_client .insert_rows_json (table_ref , metrics )
110
- if errors :
111
- tf .logging .error (
112
- "Failed to upload benchmark info to bigquery: {}" .format (errors ))
113
-
114
-
115
- def main (_ ):
116
- if not flags .FLAGS .benchmark_log_dir :
117
- print ("Usage: benchmark_uploader.py --benchmark_log_dir=/some/dir" )
118
- sys .exit (1 )
119
-
120
- uploader = BigQueryUploader (
121
- flags .FLAGS .benchmark_log_dir ,
122
- gcp_project = flags .FLAGS .gcp_project )
123
- run_id = str (uuid .uuid4 ())
124
- uploader .upload_benchmark_run (
125
- flags .FLAGS .bigquery_data_set , flags .FLAGS .bigquery_run_table , run_id )
126
- uploader .upload_metric (
127
- flags .FLAGS .bigquery_data_set , flags .FLAGS .bigquery_metric_table , run_id )
128
-
129
-
130
- if __name__ == "__main__" :
131
- flags_core .define_benchmark ()
132
- flags .adopt_module_key_flags (flags_core )
133
- absl_app .run (main = main )
122
+ for line in f :
123
+ metrics .append (json .loads (line .strip ()))
124
+ self .upload_benchmark_metric_json (
125
+ dataset_name , table_name , run_id , metrics )
126
+
127
+ def _upload_json (self , dataset_name , table_name , json_list ):
128
+ # Find the unique table reference based on dataset and table name, so that
129
+ # the data can be inserted to it.
130
+ table_ref = self ._bq_client .dataset (dataset_name ).table (table_name )
131
+ errors = self ._bq_client .insert_rows_json (table_ref , json_list )
132
+ if errors :
133
+ tf .logging .error (
134
+ "Failed to upload benchmark info to bigquery: {}" .format (errors ))
0 commit comments