From 87100759013dda5b3b9aeb2af7632f188dbe314a Mon Sep 17 00:00:00 2001 From: TH Chen Date: Tue, 19 Sep 2017 17:25:01 -0400 Subject: [PATCH] Improve DataFrameClient tag conversion performance In `_convert_dataframe_to_lines`, if only `global_tags` is specified but not `tag_columns`, take a faster route to process the tags. Previously, in such a case, global tags are duplicated as tag columns and processed as if they were tag columns. Such processing is wasteful and results in a slowdown that becomes noticeable when batch loading many thousands of data points with a handful of global tags. --- influxdb/_dataframe_client.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 31ee1c32..62dc8084 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -298,12 +298,6 @@ def _convert_dataframe_to_lines(self, field_columns = list(field_columns) if list(field_columns) else [] tag_columns = list(tag_columns) if list(tag_columns) else [] - # Make global_tags as tag_columns - if global_tags: - for tag in global_tags: - dataframe[tag] = global_tags[tag] - tag_columns.append(tag) - # If field columns but no tag columns, assume rest of columns are tags if field_columns and (not tag_columns): tag_columns = list(column_series[~column_series.isin( @@ -333,6 +327,13 @@ def _convert_dataframe_to_lines(self, # If tag columns exist, make an array of formatted tag keys and values if tag_columns: + + # Make global_tags as tag_columns + if global_tags: + for tag in global_tags: + dataframe[tag] = global_tags[tag] + tag_columns.append(tag) + tag_df = dataframe[tag_columns] tag_df = tag_df.fillna('') # replace NA with empty string tag_df = tag_df.sort_index(axis=1) @@ -345,6 +346,12 @@ def _convert_dataframe_to_lines(self, tags = tags.sum(axis=1) del tag_df + elif global_tags: + tag_string = ''.join( + [",{}={}".format(k, _escape_tag(v)) if v else '' + for k, v in sorted(global_tags.items())] + ) + tags = pd.Series(tag_string, index=dataframe.index) else: tags = ''