From b4ceab9b7d201b270b9a46fdedba2133a85c1f99 Mon Sep 17 00:00:00 2001 From: Debanjan Basu Date: Sat, 30 Mar 2019 11:26:50 +0100 Subject: [PATCH 01/14] [fix] typo in comment + [fix] handles leading comma for the case that the first value column is Null valued --- influxdb/_dataframe_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 1ce6e947..d16e29ca 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -363,7 +363,7 @@ def _convert_dataframe_to_lines(self, tag_df = self._stringify_dataframe( tag_df, numeric_precision, datatype='tag') - # join preprendded tags, leaving None values out + # join prepended tags, leaving None values out tags = tag_df.apply( lambda s: [',' + s.name + '=' + v if v else '' for v in s]) tags = tags.sum(axis=1) @@ -392,6 +392,8 @@ def _convert_dataframe_to_lines(self, field_df.columns[1:]] field_df = field_df.where(~mask_null, '') # drop Null entries fields = field_df.sum(axis=1) + # take out leading , where first column has a Null value + fields = fields.str.lstrip(",") del field_df # Generate line protocol string From 366e7714668cc1fd28b5a5f689e93661f1b9da5f Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 30 Mar 2019 12:43:02 +0100 Subject: [PATCH 02/14] [refactor] consolidated similar logic to a new function --- influxdb/_dataframe_client.py | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index d16e29ca..0ce09ebd 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -358,17 +358,11 @@ def _convert_dataframe_to_lines(self, tag_columns.append(tag) tag_df = dataframe[tag_columns] - tag_df = tag_df.fillna('') # replace NA with empty string - tag_df = tag_df.sort_index(axis=1) - tag_df = self._stringify_dataframe( - tag_df, numeric_precision, datatype='tag') - - # join prepended tags, leaving None values out - tags = tag_df.apply( - lambda s: [',' + s.name + '=' + v if v else '' for v in s]) - tags = tags.sum(axis=1) - - del tag_df + # Keep the positions where Null values are found + mask_null = tag_df.isnull().values + tag_df = self._stringify_dataframe(tag_df, numeric_precision, datatype='tag') + tags = self._lineify_string_df(tag_df,mask_null) + del tag_df, mask_null elif global_tags: tag_string = ''.join( [",{}={}".format(k, _escape_tag(v)) if v else '' @@ -382,22 +376,16 @@ def _convert_dataframe_to_lines(self, field_df = dataframe[field_columns] # Keep the positions where Null values are found mask_null = field_df.isnull().values - field_df = self._stringify_dataframe(field_df, - numeric_precision, - datatype='field') - - field_df = (field_df.columns.values + '=').tolist() + field_df - field_df[field_df.columns[1:]] = ',' + field_df[ - field_df.columns[1:]] - field_df = field_df.where(~mask_null, '') # drop Null entries - fields = field_df.sum(axis=1) - # take out leading , where first column has a Null value - fields = fields.str.lstrip(",") - del field_df + numeric_precision, + datatype='field') + fields = self._lineify_string_df(field_df, mask_null) + del field_df, mask_null # Generate line protocol string measurement = _escape_tag(measurement) + # prepend comma to non-Null tag-rows + tags = ("," + tags).str.replace(r"^,$","") points = (measurement + tags + ' ' + fields + ' ' + time).tolist() return points @@ -453,6 +441,22 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'): return dframe + + def _lineify_string_df(self,string_df,mask_null): + """accepts a dataframe of tag or field df + returns a Series of strings joined by + comma if non-Null values using + vector string operations""" + df = string_df.copy() + df = (df.columns.values + '=').tolist() + df + df[df.columns[1:]] = ',' + df[df.columns[1:]] + df = df.where(~mask_null, '') # drop Null entries + lineified_string_series = df.sum(axis=1) + # take out leading comma where first column has a Null value + lineified_string_series = lineified_string_series.str.lstrip(",") + return lineified_string_series + + def _datetime_to_epoch(self, datetime, time_precision='s'): seconds = (datetime - self.EPOCH).total_seconds() if time_precision == 'h': From 49af5abadda346929f728ff61625e38062adc434 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 30 Mar 2019 12:59:19 +0100 Subject: [PATCH 03/14] [fix] covering scenario where is a string --- influxdb/_dataframe_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 0ce09ebd..29b13336 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -385,7 +385,7 @@ def _convert_dataframe_to_lines(self, # Generate line protocol string measurement = _escape_tag(measurement) # prepend comma to non-Null tag-rows - tags = ("," + tags).str.replace(r"^,$","") + tags = pd.Series("," + tags).str.replace(r"^,$","") points = (measurement + tags + ' ' + fields + ' ' + time).tolist() return points From 8b68f4f9dda45eae6d6eab1ded5cc605d7cb2bd2 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 6 Apr 2019 18:04:59 +0200 Subject: [PATCH 04/14] Revert "[fix] covering scenario where is a string" This reverts commit 49af5abadda346929f728ff61625e38062adc434. --- influxdb/_dataframe_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 29b13336..0ce09ebd 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -385,7 +385,7 @@ def _convert_dataframe_to_lines(self, # Generate line protocol string measurement = _escape_tag(measurement) # prepend comma to non-Null tag-rows - tags = pd.Series("," + tags).str.replace(r"^,$","") + tags = ("," + tags).str.replace(r"^,$","") points = (measurement + tags + ' ' + fields + ' ' + time).tolist() return points From 0df495a8517d97369562c952c3a0562cadae86e5 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 6 Apr 2019 18:16:51 +0200 Subject: [PATCH 05/14] Revert "[refactor] consolidated similar logic to a new function" This reverts commit 366e7714668cc1fd28b5a5f689e93661f1b9da5f. --- influxdb/_dataframe_client.py | 50 ++++++++++++++++------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/influxdb/_dataframe_client.py b/influxdb/_dataframe_client.py index 0ce09ebd..d16e29ca 100644 --- a/influxdb/_dataframe_client.py +++ b/influxdb/_dataframe_client.py @@ -358,11 +358,17 @@ def _convert_dataframe_to_lines(self, tag_columns.append(tag) tag_df = dataframe[tag_columns] - # Keep the positions where Null values are found - mask_null = tag_df.isnull().values - tag_df = self._stringify_dataframe(tag_df, numeric_precision, datatype='tag') - tags = self._lineify_string_df(tag_df,mask_null) - del tag_df, mask_null + tag_df = tag_df.fillna('') # replace NA with empty string + tag_df = tag_df.sort_index(axis=1) + tag_df = self._stringify_dataframe( + tag_df, numeric_precision, datatype='tag') + + # join prepended tags, leaving None values out + tags = tag_df.apply( + lambda s: [',' + s.name + '=' + v if v else '' for v in s]) + tags = tags.sum(axis=1) + + del tag_df elif global_tags: tag_string = ''.join( [",{}={}".format(k, _escape_tag(v)) if v else '' @@ -376,16 +382,22 @@ def _convert_dataframe_to_lines(self, field_df = dataframe[field_columns] # Keep the positions where Null values are found mask_null = field_df.isnull().values + field_df = self._stringify_dataframe(field_df, - numeric_precision, - datatype='field') - fields = self._lineify_string_df(field_df, mask_null) - del field_df, mask_null + numeric_precision, + datatype='field') + + field_df = (field_df.columns.values + '=').tolist() + field_df + field_df[field_df.columns[1:]] = ',' + field_df[ + field_df.columns[1:]] + field_df = field_df.where(~mask_null, '') # drop Null entries + fields = field_df.sum(axis=1) + # take out leading , where first column has a Null value + fields = fields.str.lstrip(",") + del field_df # Generate line protocol string measurement = _escape_tag(measurement) - # prepend comma to non-Null tag-rows - tags = ("," + tags).str.replace(r"^,$","") points = (measurement + tags + ' ' + fields + ' ' + time).tolist() return points @@ -441,22 +453,6 @@ def _stringify_dataframe(dframe, numeric_precision, datatype='field'): return dframe - - def _lineify_string_df(self,string_df,mask_null): - """accepts a dataframe of tag or field df - returns a Series of strings joined by - comma if non-Null values using - vector string operations""" - df = string_df.copy() - df = (df.columns.values + '=').tolist() + df - df[df.columns[1:]] = ',' + df[df.columns[1:]] - df = df.where(~mask_null, '') # drop Null entries - lineified_string_series = df.sum(axis=1) - # take out leading comma where first column has a Null value - lineified_string_series = lineified_string_series.str.lstrip(",") - return lineified_string_series - - def _datetime_to_epoch(self, datetime, time_precision='s'): seconds = (datetime - self.EPOCH).total_seconds() if time_precision == 'h': From c663234559bda82296730a2e61ae8ce5e845099a Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 11:00:46 +0200 Subject: [PATCH 06/14] [tests][feature] added tests to check if first none value results in invalid line protocol --- influxdb/tests/dataframe_client_test.py | 50 +++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index cb380ac5..977cb7cf 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -389,6 +389,56 @@ def test_write_points_from_dataframe_with_numeric_column_names(self): self.assertEqual(m.last_request.body, expected) + def test_write_points_from_dataframe_with_leading_none_column(self): + """Test write points from df in TestDataFrameClient object + to check if leading None column results in invalid line protocol.""" + dataframe = pd.DataFrame( + dict( + first=[1, None, None, 8, 9], + second=[2, None, None, None, 10], + third=[3, 4.1, None, None, 11], + first_tag=["one", None, None, "eight", None], + second_tag=["two", None, None, None, None], + third_tag=["three", "four", None, None, None], + comment=[ + "All columns filled", + "First two of three empty", + "All empty", + "Last two of three empty", + "Empty tags with values", + ] + ), + index=pd.date_range( + start=pd.to_datetime('2018-01-01'), + end=pd.to_datetime('2018-10-01'), + periods=5, + ) + ) + expected = ( + b'foo,first_tag=one,second_tag=two,third_tag=three first=1.0,second=2.0,third=3.0,comment="All columns filled" 1514764800000000000\n' + b'foo,third_tag=four third=4.1,comment="First two of three empty" 1520661600000000000\n' + b'foo comment="All empty" 1526558400000000000\n' + b'foo,first_tag=eight first=8.0,comment="Last two of three empty" 1532455200000000000\n' + b'foo first=9.0,second=10.0,third=11.0,comment="Empty tags with values" 1538352000000000000\n' + ) + + with requests_mock.Mocker() as m: + m.register_uri(requests_mock.POST, + "http://localhost:8086/write", + status_code=204) + + cli = DataFrameClient(database='db') + + cli.write_points(dataframe, 'foo', + tag_columns=[ + "first_tag", + "second_tag", + "third_tag"]) + with open('/tmp/mylog.txt', 'w') as of: + print(m.last_request.body, expected, file=of) + + self.assertEqual(m.last_request.body, expected) + def test_write_points_from_dataframe_with_numeric_precision(self): """Test write points from df with numeric precision.""" now = pd.Timestamp('1970-01-01 00:00+00:00') From e5d96f00a7b8494bd89324bb29853c952b91c431 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 11:05:40 +0200 Subject: [PATCH 07/14] [fix] deleted debug lines --- influxdb/tests/dataframe_client_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 977cb7cf..5c274e40 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -434,9 +434,7 @@ def test_write_points_from_dataframe_with_leading_none_column(self): "first_tag", "second_tag", "third_tag"]) - with open('/tmp/mylog.txt', 'w') as of: - print(m.last_request.body, expected, file=of) - + self.assertEqual(m.last_request.body, expected) def test_write_points_from_dataframe_with_numeric_precision(self): From 1f3fe4e008f6ee3c6357bd595d74b409fc751db5 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 12:48:05 +0200 Subject: [PATCH 08/14] [fix] overspecified date_range args --- influxdb/tests/dataframe_client_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 5c274e40..191c2b1b 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -411,6 +411,7 @@ def test_write_points_from_dataframe_with_leading_none_column(self): index=pd.date_range( start=pd.to_datetime('2018-01-01'), end=pd.to_datetime('2018-10-01'), + freq=None, periods=5, ) ) From cf95296827f78f42ce3f1505fc2c2991cd94afb7 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 12:48:05 +0200 Subject: [PATCH 09/14] [fix] overspecified date_range args --- influxdb/tests/dataframe_client_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 5c274e40..62faf029 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -410,7 +410,7 @@ def test_write_points_from_dataframe_with_leading_none_column(self): ), index=pd.date_range( start=pd.to_datetime('2018-01-01'), - end=pd.to_datetime('2018-10-01'), + freq='1D', periods=5, ) ) From 53c3acf8f03860e3d50d05215f67771a63337b34 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 13:04:24 +0200 Subject: [PATCH 10/14] [fix] removed endpoint in date-range --- influxdb/tests/dataframe_client_test.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 191c2b1b..3f8334fb 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -410,17 +410,16 @@ def test_write_points_from_dataframe_with_leading_none_column(self): ), index=pd.date_range( start=pd.to_datetime('2018-01-01'), - end=pd.to_datetime('2018-10-01'), - freq=None, + freq='1D', periods=5, ) ) expected = ( b'foo,first_tag=one,second_tag=two,third_tag=three first=1.0,second=2.0,third=3.0,comment="All columns filled" 1514764800000000000\n' - b'foo,third_tag=four third=4.1,comment="First two of three empty" 1520661600000000000\n' - b'foo comment="All empty" 1526558400000000000\n' - b'foo,first_tag=eight first=8.0,comment="Last two of three empty" 1532455200000000000\n' - b'foo first=9.0,second=10.0,third=11.0,comment="Empty tags with values" 1538352000000000000\n' + b'foo,third_tag=four third=4.1,comment="First two of three empty" 1514851200000000000\n' + b'foo comment="All empty" 1514937600000000000\n' + b'foo,first_tag=eight first=8.0,comment="Last two of three empty" 1515024000000000000\n' + b'foo first=9.0,second=10.0,third=11.0,comment="Empty tags with values" 1515110400000000000\n' ) with requests_mock.Mocker() as m: From 599c26b2364696ca89e062ff43bc7f7e6ced23b1 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 13:13:34 +0200 Subject: [PATCH 11/14] [fix] reordered columns in test target --- influxdb/tests/dataframe_client_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 3f8334fb..892696aa 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -415,11 +415,11 @@ def test_write_points_from_dataframe_with_leading_none_column(self): ) ) expected = ( - b'foo,first_tag=one,second_tag=two,third_tag=three first=1.0,second=2.0,third=3.0,comment="All columns filled" 1514764800000000000\n' - b'foo,third_tag=four third=4.1,comment="First two of three empty" 1514851200000000000\n' + b'foo,first_tag=one,second_tag=two,third_tag=three comment="All columns filled",first=1.0,second=2.0,third=3.0 1514764800000000000\n' + b'foo,third_tag=four comment="First two of three empty",third=4.1 1514851200000000000\n' b'foo comment="All empty" 1514937600000000000\n' - b'foo,first_tag=eight first=8.0,comment="Last two of three empty" 1515024000000000000\n' - b'foo first=9.0,second=10.0,third=11.0,comment="Empty tags with values" 1515110400000000000\n' + b'foo,first_tag=eight comment="Last two of three empty",first=8.0 1515024000000000000\n' + b'foo comment="Empty tags with values",first=9.0,second=10.0,third=11.0 1515110400000000000\n' ) with requests_mock.Mocker() as m: From 0ad7cfe8720ebbe5340bb10eb7d7455181adc7b5 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 14:49:28 +0200 Subject: [PATCH 12/14] [fix] [test] freeze order of columns --- influxdb/tests/dataframe_client_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 892696aa..1300fb89 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -429,7 +429,16 @@ def test_write_points_from_dataframe_with_leading_none_column(self): cli = DataFrameClient(database='db') - cli.write_points(dataframe, 'foo', + colnames = [ + "first_tag", + "second_tag", + "third_tag", + "comment", + "first", + "second", + "third" + ] + cli.write_points(dataframe[colnames], 'foo', tag_columns=[ "first_tag", "second_tag", From 87d48e922bc454f698258c325424923c80b064a8 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 14:59:31 +0200 Subject: [PATCH 13/14] [refactor] [test] used loc instead of dict-like invocation of columns --- influxdb/tests/dataframe_client_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 1300fb89..4af5f119 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -438,7 +438,7 @@ def test_write_points_from_dataframe_with_leading_none_column(self): "second", "third" ] - cli.write_points(dataframe[colnames], 'foo', + cli.write_points(dataframe.loc[:, colnames], 'foo', tag_columns=[ "first_tag", "second_tag", From 097222d1fe2512cb8754bbf35d52e99006caaba5 Mon Sep 17 00:00:00 2001 From: Debanjan Date: Sat, 18 May 2019 15:15:01 +0200 Subject: [PATCH 14/14] [fix] [test] [lint] cleared up complainsts from flake8 and pep257 --- influxdb/tests/dataframe_client_test.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/influxdb/tests/dataframe_client_test.py b/influxdb/tests/dataframe_client_test.py index 4af5f119..90312ed8 100644 --- a/influxdb/tests/dataframe_client_test.py +++ b/influxdb/tests/dataframe_client_test.py @@ -390,8 +390,7 @@ def test_write_points_from_dataframe_with_numeric_column_names(self): self.assertEqual(m.last_request.body, expected) def test_write_points_from_dataframe_with_leading_none_column(self): - """Test write points from df in TestDataFrameClient object - to check if leading None column results in invalid line protocol.""" + """write_points detect erroneous leading comma for null first field.""" dataframe = pd.DataFrame( dict( first=[1, None, None, 8, 9], @@ -415,11 +414,20 @@ def test_write_points_from_dataframe_with_leading_none_column(self): ) ) expected = ( - b'foo,first_tag=one,second_tag=two,third_tag=three comment="All columns filled",first=1.0,second=2.0,third=3.0 1514764800000000000\n' - b'foo,third_tag=four comment="First two of three empty",third=4.1 1514851200000000000\n' + b'foo,first_tag=one,second_tag=two,third_tag=three' + b' comment="All columns filled",first=1.0,second=2.0,third=3.0' + b' 1514764800000000000\n' + b'foo,third_tag=four' + b' comment="First two of three empty",third=4.1' + b' 1514851200000000000\n' b'foo comment="All empty" 1514937600000000000\n' - b'foo,first_tag=eight comment="Last two of three empty",first=8.0 1515024000000000000\n' - b'foo comment="Empty tags with values",first=9.0,second=10.0,third=11.0 1515110400000000000\n' + b'foo,first_tag=eight' + b' comment="Last two of three empty",first=8.0' + b' 1515024000000000000\n' + b'foo' + b' comment="Empty tags with values",first=9.0,second=10.0' + b',third=11.0' + b' 1515110400000000000\n' ) with requests_mock.Mocker() as m: @@ -443,7 +451,7 @@ def test_write_points_from_dataframe_with_leading_none_column(self): "first_tag", "second_tag", "third_tag"]) - + self.assertEqual(m.last_request.body, expected) def test_write_points_from_dataframe_with_numeric_precision(self):