From 1a44f6d8e6239540648a75adf4d43c15226f2104 Mon Sep 17 00:00:00 2001
From: "Peter C. Norton" <pn@knewton.com>
Date: Thu, 5 Dec 2013 11:31:20 -0500
Subject: [PATCH 1/3] Work in progress - getting influxdb to look something
 like the current kairosdb support.

---
 influxdb/graphite.py | 416 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 416 insertions(+)
 create mode 100644 influxdb/graphite.py

diff --git a/influxdb/graphite.py b/influxdb/graphite.py
new file mode 100644
index 00000000..32e55b8b
--- /dev/null
+++ b/influxdb/graphite.py
@@ -0,0 +1,416 @@
+# -*- python -*-
+
+import time
+from . import util
+from util import tree
+from . import metadata
+from . import reader
+from collections import deque
+import fnmatch
+import re
+
+
+# XXX: remove
+# RETENTION_TAG = "gr-ret" # terse in order to save space on-disk
+SECONDS_PER_MINUTE = 60
+SECONDS_PER_HOUR   = SECONDS_PER_MINUTE * 60
+SECONDS_PER_DAY    = SECONDS_PER_HOUR   * 24
+SECONDS_PER_WEEK   = SECONDS_PER_DAY    * 7
+SECONDS_PER_MONTH  = SECONDS_PER_DAY    * 30 # OK, I'm approximating
+SECONDS_PER_YEAR   = SECONDS_PER_DAY    * 365
+
+# INVALID_CHARS      = re.compile(r'[^A-Za-z0-9-_/.]')
+# RET_SEPERATOR_CHAR = "_" # Character we use to separate the retentions
+# RET_GRAPHITE_CHAR  = ":" # Character graphite uses to separate the retentions
+
+def _graphite_metric_list_retentions(metric_list, storage_schemas):
+    """:type metric_list: list
+    :param metric_list: a list of lists/tuples, each one is the standard graphite format of metrics
+
+    :type storage_schemas: list
+    :param storage_schemas: list of carbon.stroage.Schema objects which will be matched against
+                            each of the graphite metrics and used to tag each metric.
+
+    """
+    def get_retentions(metric_name):
+        for s in storage_schemas:
+            if s.test(metric_name):
+                return _input_retention_resolution(s.options['retentions'].split(','))
+    retentions = [ get_retentions(m[0])[1].replace(RET_GRAPHITE_CHAR, RET_SEPERATOR_CHAR) for m in metric_list ]
+    return retentions
+
+# how graphite will access influxdb
+def graphite_metric_list_to_influxdb_list(metric_list, tags):
+    """
+    The term "tags" here comes from kairosdb.  I don't believe that influxdb uses the term tag, but we're going to use the moral equivalent of a tag.
+
+    :type metric_list: list
+    :param metric_list: A list of lists/tuples, each one being the standard graphite formatting of metrics
+
+    :type tags: dict
+    :param tags: a dict of name: value pairs that will be recorded as tags
+
+    :rtype: list
+    :return: List of dicts formatted appropriately for influxdb
+
+    Use this for entering a large set of metrics that have the same set of tags (e.g. same retentions, etc.)
+    """
+    return [graphite_metric_to_influxdb(m, tags=tags) for m in metric_list]
+
+# how graphite will write lots of points to influxdb
+def graphite_metric_list_with_retentions_to_influxdb_list(metric_list, storage_schemas, pervasive_tags={}):
+    """
+    :type metric_list: list
+    :param metric_list: A list of line-formatted lists or tuples, each one being the standard graphite formatting of metrics
+
+    :type storage_schemas: list
+    :param storage_schemas: A list of carbon.storage.Schema objects to be matched against.
+
+    :type pervasive_tags: dict
+    :param pervasive_tags: tags that will be applied to (almost) all metrics. This won't override the
+                           retentions configuration.
+
+    :rtype: generator
+    :return: generator of dicts formatted appropriately for influxdb
+
+    Use this for entering a large set of metrics that have the disparate retentions.  The expected
+    way to call this from a sender is to first call _graphite_metric_list_retentions()
+
+    XXX this API is getting messy - it should be simpler. -PN
+    """
+    retentions_list = _graphite_metric_list_retentions(metric_list, storage_schemas)
+    for m,r in zip(metric_list, retentions_list):
+        tags = {}
+        if len(pervasive_tags) > 0:
+            tags.update(pervasive_tags)
+        tags[RETENTION_TAG] = r
+        yield graphite_metric_to_influxdb(m, tags=tags)
+
+
+def _fnmatch_expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
+    """
+    :type conn: pyKairosDB.KairosDBConnection
+    :param conn: the connection to the database
+
+    :type name: string
+    :param name: the graphite-like name which can include ".*." to provide wildcard expansion
+
+    :type cache_ttl: int
+    :param cache_ttl: how often to update the cache from KairosDB, in seconds
+
+    :rtype: list
+    :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
+
+    KairosDB doesn't currently support wildcards, so get all metric
+    names and expand them.
+
+    Currently only ".*." or "\*." or ".\*" expansions are supported.
+    Substring expansions aren't supported at this time.
+
+    Graphite-web uses fnmatch or something similar, perhaps this
+    should provide a list and re-use the same functionality.
+
+    This function caches the created tree for cache_ttl seconds and
+    refreshes when the cache has aged beyond the cache_ttl.
+    """
+    all_metric_name_list = metadata.get_all_metric_names(conn)
+    return [ n for n in all_metric_name_list if fnmatch.fnmatch(n, name) ]
+
+def expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
+    """
+    :type conn: pyKairosDB.KairosDBConnection
+    :param conn: the connection to the database
+
+    :type name: string
+    :param name: the graphite-like name which can include ".*." to provide wildcard expansion
+
+    :type cache_ttl: int
+    :param cache_ttl: how often to update the cache from KairosDB, in seconds
+
+    :rtype: list
+    :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
+
+    KairosDB doesn't currently support wildcards, so get all metric
+    names and expand them.
+
+    Currently only ".*." or "\*." or ".\*" expansions are supported.
+    Substring expansions aren't supported at this time.
+
+    Graphite-web uses fnmatch or something similar, perhaps this
+    should provide a list and re-use the same functionality.
+
+    This function caches the created tree for cache_ttl seconds and
+    refreshes when the cache has aged beyond the cache_ttl.
+    """
+
+    if "*" not in name:
+        return [u'{0}'.format(name)]
+
+    if "." in name:
+        name_list = [ u'{0}'.format(n) for n in name.split(".")]
+    else:
+        name_list = [ name ]
+    # print "Name_list is {0}".format(name_list)
+
+    ts        = expand_graphite_wildcard_metric_name.cache_timestamp
+    cache_tree = expand_graphite_wildcard_metric_name.cache_tree
+    if ts == 0 or (time.time() - ts > cache_ttl):
+        all_metric_name_list = metadata.get_all_metric_names(conn)
+        cache_tree           = tree()
+        _make_graphite_name_cache(cache_tree, all_metric_name_list)
+        expand_graphite_wildcard_metric_name.cache_tree      = cache_tree
+        expand_graphite_wildcard_metric_name.cache_timestamp = time.time()
+    if name == "*": # special case for the root of the tree:
+        return cache_tree.keys()
+    if '*' in name and not '.' in name:
+        return [ ctk for ctk in cache_tree.keys() if fnmatch.fnmatch(ctk, name)]
+    expanded_name_list = util.metric_name_wildcard_expansion(cache_tree, name_list)
+    # print "expanded_name_list is {0}".format(expanded_name_list)
+
+    return_list = [ ".".join(en) for en in expanded_name_list]
+    return list(set(return_list))
+
+expand_graphite_wildcard_metric_name.cache_tree = tree()
+expand_graphite_wildcard_metric_name.cache_timestamp = 0
+
+
+def leaf_or_branch(conn, name):
+    """
+    :type conn: pyKairosDB.KairosDBConnection
+    :param conn: Connection to the pyrosdb
+
+    :type name: string
+    :param name: The metric name or part of a name that we're checking for
+
+    :rtype: str
+    :return: Either the string "leaf" or "branch"
+
+    Graphite wants to know if a name is a "leaf" or a "branch" in
+    its ultimate storage location that is, whether or not it can be
+    traversed further
+    """
+    # print "Trying to expand the name {0}".format(name)
+    if name.endswith('*'):
+        wildcard_expansion = expand_graphite_wildcard_metric_name(conn, name)
+    else:
+        wildcard_expansion = expand_graphite_wildcard_metric_name(conn, name + ".*")
+
+    if len(wildcard_expansion) == 1 and wildcard_expansion[0] == name:
+        return "leaf"
+    elif len(wildcard_expansion) > 0:
+        return "branch"
+    else:
+        return "leaf"
+
+
+def _make_graphite_name_cache(cache_tree, list_of_names):
+    """    :type cache_tree: defaultdict
+    :param cache_tree: a defaultdict initialized with the tree() function.  Contains names
+        of entries in the kairosdb, separated by "." per the graphite convention.
+
+    :type list_of_names: list
+    :param list_of_names: list of strings, in order, that will be sought after in the cache tree.
+
+    Given a list of names - all name - that kairosdb has, make a
+    tree of all those names.
+    """
+    for n in list_of_names:
+        util._add_to_cache(cache_tree, n.split('.'))
+
+def graphite_metric_to_influxdb(metric, tags):
+    """:type metric: tuple
+    :param metric: tuple of ("metric_name", timestamp, value)
+
+    :type tags: dict
+    :param tags: a dict of {name: value} strings that will be recorded as tags
+
+    :rtype: dict
+    :return: Re-formatted dict appropriate for influxdb
+
+    Write graphite metrics to influxdb.
+
+    Graphite metrics are a tuple with a metric name, a timestamp, and
+    a value, and they have a storage schema attached, which specifies
+    the time period which should be used for that metric.  This must
+    be recorded in the tags for graphite querying to work
+
+    KairosDB metrics are a hash of # XXX What should it be for influxdb?
+    {
+     "name"      : string,
+     "timestamp" : java long int,
+     "value"     : float,
+     "tags"      : { "name" : "value", "name" : "value"}
+    }
+
+
+    Even though influxdb uses a 64-bit lon int, the python API here expects
+    a float, as returned by time.time().  This module handles
+    converting this when the data is written and read, and doesn't
+    make the user deal with this conversion.
+
+    KairosDB and influxdb only allow alphanumeric and the following punctuation characters:
+
+    ".", "/", "-", and "_".
+
+    Graphite is less restrictive.  Anything that doesn't match the
+    above are converted to an underscore.
+
+    """
+    converted_metric_name = INVALID_CHARS.sub(TAG_SEPERATOR_CHAR, metric[0])
+    return {
+        "name"      : converted_metric_name,
+        "timestamp" : metric[1],
+        "value"     : metric[2],
+        "tags"      : tags
+    }
+
+
+def seconds_from_retention_tag(tag_value, sep=RET_GRAPHITE_CHAR):
+    """:type tag_value: str
+    :param tag_value: the retention info tag
+
+    :rtype: int
+    :return: Number of seconds for the given tag value
+
+    The retention tag is a colon-separated resolution_retention period
+    when it's input, taken from the carbon storage-schemas.conf.  When
+    reading from kairosdb, the ':' is not a legal character to have in
+    a tag, so we input them with an underscore instead.  That
+    separator is configurable so this can be used on tags that are
+    queried as well.
+
+    For this function we're not worried about the retention period, we
+    just care about the resolution of the data.
+
+    So get the first part of it, and expand the number of seconds
+    so we can make a valid comparison.
+
+    """
+    resolution, _ = tag_value.split(sep)
+    if resolution[-1].lower() == "s":
+        return int(resolution[:-1])
+    elif resolution[-1].lower() == "m":
+        return int(resolution[:-1]) * SECONDS_PER_MINUTE
+    elif resolution[-1].lower() == "h":
+        return int(resolution[:-1]) * SECONDS_PER_HOUR
+    elif resolution[-1].lower() == "d":
+        return int(resolution[:-1]) * SECONDS_PER_DAY
+    elif resolution[-1].lower() == "w":
+        return int(resolution[:-1]) * SECONDS_PER_WEEK
+    elif resolution[-1].lower() == "y":
+        return int(resolution[:-1]) * SECONDS_PER_YEAR
+    else: # Seconds is the default
+        return int(resolution)
+
+def _input_retention_resolution(retention_string_list):
+    """
+    :type retention_string_list: list
+    :param retention_string_list: A list of strings containing retention definitions
+
+    :rtype: tuple
+    :return: A tuple containing the (number_of_seconds, highest_resoultion_retention_string),
+             which is an (int, str)
+
+    When inputting data, we want the highest resolution data - the
+    actual metrics can be summarized later based on the same policy or
+    a different one if that works better.
+    """
+    all_tags_set = [t for t in retention_string_list]
+    return min([(seconds_from_retention_tag(tag), tag) for tag in all_tags_set])# return the highest resolution
+
+
+def _lowest_resolution_retention(data, name):
+    """
+    :type data: requests.response.content
+    :param data: Content from the KairosDB query
+
+    :type name: str
+    :param name: The name of the metric whose retention will be extracted.
+
+    :rtype: int
+    :return: The number of seconds in the lowest-resolution retention period in the data
+
+    Graphite needs data to be divided into even time slices.  We
+    must store the slice information when writing data so that it can
+    be read out here.
+
+    The relevant tags are in the README.md for this project.
+    """
+    values = util.get_content_values_by_name(data, name)
+    all_tags_set = set() # easiest case - all tags are the same, otherwise we use the set
+    for result in values:
+        all_tags_set.update(util.get_matching_tags_from_result(result, RETENTION_TAG))
+    return max([ seconds_from_retention_tag(tag, RET_SEPERATOR_CHAR) for tag in all_tags_set])# return the lowest resolution
+
+def read_absolute(conn, metric_name, start_time, end_time):
+    """
+    :type conn: pyKairosDB.KairosDBConnection
+    :param conn: The connection to KairosDB
+
+    :type metric_name: string
+    :param metric_name: The name of the metric to query (graphite does one at a time, though KairosDB can do more)
+
+    :type start_time: float
+    :param start_time: The float representing the number of seconds since the epoch that this query starts at.
+
+    :type end_time: float
+    :param end_time: The float representing the number of seconds since the epoch that this query endsa at.
+
+    :rtype: tuple
+    :return: 2-element tuple - ((start_time, end_time, interval), list_of_metric_values).  Graphite wants evenly-spaced metrics,
+        and None for any interval that doesn't have data.  It infers the time for each update by the order and place of each
+        value provided.
+
+    This function returns the values being queried, in the format that the graphite-web app requires.
+    """
+    def cache_query():
+        def cache_query_closure(query_dict):
+            reader.cache_time(10, query_dict)
+        return cache_query_closure
+    tags = conn.read_absolute([metric_name], start_time, end_time,
+                              query_modifying_function=cache_query(),
+                              only_read_tags=True)
+
+    interval_seconds = _lowest_resolution_retention(tags, metric_name)
+    def modify_query():
+        def modify_query_closure(query_dict):
+            group_by               = reader.default_group_by()
+            group_by["range_size"] = { "value" : interval_seconds, "unit" : "seconds"}
+            aggregator = reader.default_aggregator()
+            aggregator["sampling"] = group_by["range_size"]
+            reader.group_by([group_by], query_dict["metrics"][0])
+            reader.aggregation([aggregator], query_dict["metrics"][0])
+        return modify_query_closure
+    # now that we've gotten the tags and have set the retention time, get data
+    content = conn.read_absolute([metric_name], start_time, end_time,
+        query_modifying_function=modify_query())
+    return_list = list()
+    if len(content['queries'][0]['results']) > 0:
+        # by_interval_dict = dict([(v[1], v[0]) for v in content["queries"][0]["results"][0]["values"] ])
+        value_deque = deque(content["queries"][0]["results"][0]["values"])
+        slots = list()
+        for slot_begin in range(start_time, end_time, interval_seconds):
+            slot_buffer = list()
+            slot_end = slot_begin + interval_seconds
+            slots.append((slot_begin, slot_end))
+            try:
+                if slot_end < value_deque[0][0]: # we haven't caught up with the beginning of the deque
+                    return_list.append(None)
+                    continue
+                if slot_begin > value_deque[-1][0]: # We have nothing more of value
+                    return_list.append(None)
+                    continue
+                if len(value_deque) == 0:
+                    return_list.append(None)
+                    continue
+                while slot_begin <= value_deque[0][0] < slot_end:
+                    slot_buffer.append(value_deque.popleft()[1])
+            except IndexError:
+                return_list.append(None)
+            if len(slot_buffer) < 1:
+                return_list.append(None)
+            else:
+                return_list.append(sum(slot_buffer)/len(slot_buffer)) # take the average of the points for this slot
+    else:
+        return_list = [ None for n in range(start_time, end_time, interval_seconds)]
+    return ((start_time, end_time, interval_seconds), return_list)

From c7db19bd39bcf304dd6046ee44d0d3b99f50b860 Mon Sep 17 00:00:00 2001
From: "Peter C. Norton" <pn@knewton.com>
Date: Thu, 26 Dec 2013 03:32:54 -0500
Subject: [PATCH 2/3] Got a first metric submitted via test code.

---
 influxdb/graphite.py            | 314 +++++++++++++++++---------------
 tests/influxdb/graphite_test.py |  87 +++++++++
 2 files changed, 255 insertions(+), 146 deletions(-)
 create mode 100644 tests/influxdb/graphite_test.py

diff --git a/influxdb/graphite.py b/influxdb/graphite.py
index 32e55b8b..b3612dd3 100644
--- a/influxdb/graphite.py
+++ b/influxdb/graphite.py
@@ -1,17 +1,19 @@
 # -*- python -*-
 
 import time
-from . import util
-from util import tree
-from . import metadata
-from . import reader
+# from . import util
+# from util import tree
+# from . import metadata # XXX add metdata
+# from . import reader
 from collections import deque
 import fnmatch
 import re
 
+# TODO
+# XXX move away from pyKairosDB metadata, reader util.
+#
 
-# XXX: remove
-# RETENTION_TAG = "gr-ret" # terse in order to save space on-disk
+RETENTION_TAG = "gr-ret" # terse in order to save space on-disk
 SECONDS_PER_MINUTE = 60
 SECONDS_PER_HOUR   = SECONDS_PER_MINUTE * 60
 SECONDS_PER_DAY    = SECONDS_PER_HOUR   * 24
@@ -19,9 +21,9 @@
 SECONDS_PER_MONTH  = SECONDS_PER_DAY    * 30 # OK, I'm approximating
 SECONDS_PER_YEAR   = SECONDS_PER_DAY    * 365
 
-# INVALID_CHARS      = re.compile(r'[^A-Za-z0-9-_/.]')
-# RET_SEPERATOR_CHAR = "_" # Character we use to separate the retentions
-# RET_GRAPHITE_CHAR  = ":" # Character graphite uses to separate the retentions
+INVALID_CHARS      = re.compile(r'[^A-Za-z0-9-_.]') # not alpha, dash, underscore and period
+RET_SEPERATOR_CHAR = "_" # Character we use to separate the retentions
+RET_GRAPHITE_CHAR  = ":" # Character graphite uses to separate the retentions
 
 def _graphite_metric_list_retentions(metric_list, storage_schemas):
     """:type metric_list: list
@@ -39,10 +41,11 @@ def get_retentions(metric_name):
     retentions = [ get_retentions(m[0])[1].replace(RET_GRAPHITE_CHAR, RET_SEPERATOR_CHAR) for m in metric_list ]
     return retentions
 
-# how graphite will access influxdb
+# how carbon writes to influxdb
 def graphite_metric_list_to_influxdb_list(metric_list, tags):
-    """
-    The term "tags" here comes from kairosdb.  I don't believe that influxdb uses the term tag, but we're going to use the moral equivalent of a tag.
+    """The term "tags" here comes from kairosdb.  I don't believe that
+    influxdb uses the term tag, but we're going to use the moral
+    equivalent of a tag.
 
     :type metric_list: list
     :param metric_list: A list of lists/tuples, each one being the standard graphite formatting of metrics
@@ -73,7 +76,7 @@ def graphite_metric_list_with_retentions_to_influxdb_list(metric_list, storage_s
     :rtype: generator
     :return: generator of dicts formatted appropriately for influxdb
 
-    Use this for entering a large set of metrics that have the disparate retentions.  The expected
+    Use this for entering a large set of metrics that have disparate retentions.  The expected
     way to call this from a sender is to first call _graphite_metric_list_retentions()
 
     XXX this API is getting messy - it should be simpler. -PN
@@ -87,91 +90,91 @@ def graphite_metric_list_with_retentions_to_influxdb_list(metric_list, storage_s
         yield graphite_metric_to_influxdb(m, tags=tags)
 
 
-def _fnmatch_expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
-    """
-    :type conn: pyKairosDB.KairosDBConnection
-    :param conn: the connection to the database
-
-    :type name: string
-    :param name: the graphite-like name which can include ".*." to provide wildcard expansion
-
-    :type cache_ttl: int
-    :param cache_ttl: how often to update the cache from KairosDB, in seconds
-
-    :rtype: list
-    :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
-
-    KairosDB doesn't currently support wildcards, so get all metric
-    names and expand them.
-
-    Currently only ".*." or "\*." or ".\*" expansions are supported.
-    Substring expansions aren't supported at this time.
-
-    Graphite-web uses fnmatch or something similar, perhaps this
-    should provide a list and re-use the same functionality.
-
-    This function caches the created tree for cache_ttl seconds and
-    refreshes when the cache has aged beyond the cache_ttl.
-    """
-    all_metric_name_list = metadata.get_all_metric_names(conn)
-    return [ n for n in all_metric_name_list if fnmatch.fnmatch(n, name) ]
-
-def expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
-    """
-    :type conn: pyKairosDB.KairosDBConnection
-    :param conn: the connection to the database
-
-    :type name: string
-    :param name: the graphite-like name which can include ".*." to provide wildcard expansion
-
-    :type cache_ttl: int
-    :param cache_ttl: how often to update the cache from KairosDB, in seconds
-
-    :rtype: list
-    :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
-
-    KairosDB doesn't currently support wildcards, so get all metric
-    names and expand them.
-
-    Currently only ".*." or "\*." or ".\*" expansions are supported.
-    Substring expansions aren't supported at this time.
-
-    Graphite-web uses fnmatch or something similar, perhaps this
-    should provide a list and re-use the same functionality.
-
-    This function caches the created tree for cache_ttl seconds and
-    refreshes when the cache has aged beyond the cache_ttl.
-    """
-
-    if "*" not in name:
-        return [u'{0}'.format(name)]
-
-    if "." in name:
-        name_list = [ u'{0}'.format(n) for n in name.split(".")]
-    else:
-        name_list = [ name ]
-    # print "Name_list is {0}".format(name_list)
-
-    ts        = expand_graphite_wildcard_metric_name.cache_timestamp
-    cache_tree = expand_graphite_wildcard_metric_name.cache_tree
-    if ts == 0 or (time.time() - ts > cache_ttl):
-        all_metric_name_list = metadata.get_all_metric_names(conn)
-        cache_tree           = tree()
-        _make_graphite_name_cache(cache_tree, all_metric_name_list)
-        expand_graphite_wildcard_metric_name.cache_tree      = cache_tree
-        expand_graphite_wildcard_metric_name.cache_timestamp = time.time()
-    if name == "*": # special case for the root of the tree:
-        return cache_tree.keys()
-    if '*' in name and not '.' in name:
-        return [ ctk for ctk in cache_tree.keys() if fnmatch.fnmatch(ctk, name)]
-    expanded_name_list = util.metric_name_wildcard_expansion(cache_tree, name_list)
-    # print "expanded_name_list is {0}".format(expanded_name_list)
-
-    return_list = [ ".".join(en) for en in expanded_name_list]
-    return list(set(return_list))
-
-expand_graphite_wildcard_metric_name.cache_tree = tree()
-expand_graphite_wildcard_metric_name.cache_timestamp = 0
+# XXX metadata
+# def _fnmatch_expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
+#     """
+#     :type conn: influxdb.InfluxDBClient
+#     :param conn: the connection to the database
+#
+#     :type name: string
+#     :param name: the graphite-like name which can include ".*." to provide wildcard expansion
+#
+#     :type cache_ttl: int
+#     :param cache_ttl: how often to update the cache from KairosDB, in seconds
+#
+#     :rtype: list
+#     :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
+#
+#     KairosDB doesn't currently support wildcards, so get all metric
+#     names and expand them.
+#
+#     Currently only ".*." or "\*." or ".\*" expansions are supported.
+#     Substring expansions aren't supported at this time.
+#
+#     Graphite-web uses fnmatch or something similar, perhaps this
+#     should provide a list and re-use the same functionality.
+#
+#     This function caches the created tree for cache_ttl seconds and
+#     refreshes when the cache has aged beyond the cache_ttl.
+#     """
+#     all_metric_name_list = metadata.get_all_metric_names(conn)
+#     return [ n for n in all_metric_name_list if fnmatch.fnmatch(n, name) ]
+
+# XXX until I've fixed the reliance on util.
+#  def expand_graphite_wildcard_metric_name(conn, name, cache_ttl=60):
+#      """
+#      :type conn: pyKairosDB.KairosDBConnection
+#      :param conn: the connection to the database
+#
+#      :type name: string
+#      :param name: the graphite-like name which can include ".*." to provide wildcard expansion
+#
+#      :type cache_ttl: int
+#      :param cache_ttl: how often to update the cache from KairosDB, in seconds
+#
+#      :rtype: list
+#      :return: a list of unicode strings.  Each unicode string contains an expanded metric name.
+#
+#      KairosDB doesn't currently support wildcards, so get all metric
+#      names and expand them.
+#
+#      Currently only ".*." or "\*." or ".\*" expansions are supported.
+#      Substring expansions aren't supported at this time.
+#
+#      Graphite-web uses fnmatch or something similar, perhaps this
+#      should provide a list and re-use the same functionality.
+#
+#      This function caches the created tree for cache_ttl seconds and
+#      refreshes when the cache has aged beyond the cache_ttl.
+#      """
+#
+#      if "*" not in name:
+#          return [u'{0}'.format(name)]
+#      if "." in name:
+#          name_list = [ u'{0}'.format(n) for n in name.split(".")]
+#      else:
+#          name_list = [ name ]
+#      # print "Name_list is {0}".format(name_list)
+#      ts        = expand_graphite_wildcard_metric_name.cache_timestamp
+#      cache_tree = expand_graphite_wildcard_metric_name.cache_tree
+#      if ts == 0 or (time.time() - ts > cache_ttl):
+#          all_metric_name_list = metadata.get_all_metric_names(conn)
+#          cache_tree           = tree()
+#          _make_graphite_name_cache(cache_tree, all_metric_name_list)
+#          expand_graphite_wildcard_metric_name.cache_tree      = cache_tree
+#          expand_graphite_wildcard_metric_name.cache_timestamp = time.time()
+#      if name == "*": # special case for the root of the tree:
+#          return cache_tree.keys()
+#      if '*' in name and not '.' in name:
+#          return [ ctk for ctk in cache_tree.keys() if fnmatch.fnmatch(ctk, name)]
+#      expanded_name_list = util.metric_name_wildcard_expansion(cache_tree, name_list)
+#      # print "expanded_name_list is {0}".format(expanded_name_list)
+#
+#      return_list = [ ".".join(en) for en in expanded_name_list]
+#      return list(set(return_list))
+#
+#  expand_graphite_wildcard_metric_name.cache_tree = tree()
+#  expand_graphite_wildcard_metric_name.cache_timestamp = 0
 
 
 def leaf_or_branch(conn, name):
@@ -203,19 +206,20 @@ def leaf_or_branch(conn, name):
         return "leaf"
 
 
-def _make_graphite_name_cache(cache_tree, list_of_names):
-    """    :type cache_tree: defaultdict
-    :param cache_tree: a defaultdict initialized with the tree() function.  Contains names
-        of entries in the kairosdb, separated by "." per the graphite convention.
-
-    :type list_of_names: list
-    :param list_of_names: list of strings, in order, that will be sought after in the cache tree.
-
-    Given a list of names - all name - that kairosdb has, make a
-    tree of all those names.
-    """
-    for n in list_of_names:
-        util._add_to_cache(cache_tree, n.split('.'))
+# XXX fix along with util
+#  def _make_graphite_name_cache(cache_tree, list_of_names):
+#      """    :type cache_tree: defaultdict
+#      :param cache_tree: a defaultdict initialized with the tree() function.  Contains names
+#          of entries in the kairosdb, separated by "." per the graphite convention.
+#
+#      :type list_of_names: list
+#      :param list_of_names: list of strings, in order, that will be sought after in the cache tree.
+#
+#      Given a list of names - all name - that kairosdb has, make a
+#      tree of all those names.
+#      """
+#      for n in list_of_names:
+#          util._add_to_cache(cache_tree, n.split('.'))
 
 def graphite_metric_to_influxdb(metric, tags):
     """:type metric: tuple
@@ -232,36 +236,53 @@ def graphite_metric_to_influxdb(metric, tags):
     Graphite metrics are a tuple with a metric name, a timestamp, and
     a value, and they have a storage schema attached, which specifies
     the time period which should be used for that metric.  This must
-    be recorded in the tags for graphite querying to work
+    be recorded in the columns for graphite querying to work
 
-    KairosDB metrics are a hash of # XXX What should it be for influxdb?
     {
-     "name"      : string,
-     "timestamp" : java long int,
-     "value"     : float,
-     "tags"      : { "name" : "value", "name" : "value"}
+    "name": "some.graphite.metric",
+    "columns": [
+        "time", "value", "gr-ret"
+    ],
+    "points": [
+      [1387998050, 200.0, "60s_90d" ]
+    ]
     }
 
 
-    Even though influxdb uses a 64-bit lon int, the python API here expects
+    Even though influxdb uses a 64-bit long int, the python API here expects
     a float, as returned by time.time().  This module handles
     converting this when the data is written and read, and doesn't
     make the user deal with this conversion.
 
-    KairosDB and influxdb only allow alphanumeric and the following punctuation characters:
+    Influxdb only allows alphanumeric and the following punctuation
+    characters (see http://influxdb.org/docs/api/http.html)
 
-    ".", "/", "-", and "_".
+    "-", "_", and ".".
 
     Graphite is less restrictive.  Anything that doesn't match the
     above are converted to an underscore.
 
+    TODO: Keep an index of metric names that already have updates, and
+    update that metric instead of creating another metric.
+
+    For now this is naive.
     """
-    converted_metric_name = INVALID_CHARS.sub(TAG_SEPERATOR_CHAR, metric[0])
+    converted_metric_name = INVALID_CHARS.sub(RET_SEPERATOR_CHAR, metric[0])
+    if len(tags) > 0:
+        (c, p) = zip(*tags.items())
+        columns_list = list(c)
+        points_list = list(p)
+    else:
+        columns_list = []
+        points_list = []
+    columns_list.extend(("time", "value",))
+    points_list.append(int(metric[1] * 1000)) # Deal with time as a longing counting ms since the epoch instead of seconds
+    points_list.append(metric[2])
+
     return {
         "name"      : converted_metric_name,
-        "timestamp" : metric[1],
-        "value"     : metric[2],
-        "tags"      : tags
+        "columns"   : columns_list,
+        "points"    : [points_list]
     }
 
 
@@ -319,28 +340,29 @@ def _input_retention_resolution(retention_string_list):
     return min([(seconds_from_retention_tag(tag), tag) for tag in all_tags_set])# return the highest resolution
 
 
-def _lowest_resolution_retention(data, name):
-    """
-    :type data: requests.response.content
-    :param data: Content from the KairosDB query
-
-    :type name: str
-    :param name: The name of the metric whose retention will be extracted.
-
-    :rtype: int
-    :return: The number of seconds in the lowest-resolution retention period in the data
-
-    Graphite needs data to be divided into even time slices.  We
-    must store the slice information when writing data so that it can
-    be read out here.
-
-    The relevant tags are in the README.md for this project.
-    """
-    values = util.get_content_values_by_name(data, name)
-    all_tags_set = set() # easiest case - all tags are the same, otherwise we use the set
-    for result in values:
-        all_tags_set.update(util.get_matching_tags_from_result(result, RETENTION_TAG))
-    return max([ seconds_from_retention_tag(tag, RET_SEPERATOR_CHAR) for tag in all_tags_set])# return the lowest resolution
+# XXX: util
+# def _lowest_resolution_retention(data, name):
+#     """
+#     :type data: requests.response.content
+#     :param data: Content from the KairosDB query
+#
+#     :type name: str
+#     :param name: The name of the metric whose retention will be extracted.
+#
+#     :rtype: int
+#     :return: The number of seconds in the lowest-resolution retention period in the data
+#
+#     Graphite needs data to be divided into even time slices.  We
+#     must store the slice information when writing data so that it can
+#     be read out here.
+#
+#     The relevant tags are in the README.md for this project.
+#     """
+#     values = util.get_content_values_by_name(data, name)
+#     all_tags_set = set() # easiest case - all tags are the same, otherwise we use the set
+#     for result in values:
+#         all_tags_set.update(util.get_matching_tags_from_result(result, RETENTION_TAG))
+#     return max([ seconds_from_retention_tag(tag, RET_SEPERATOR_CHAR) for tag in all_tags_set])# return the lowest resolution
 
 def read_absolute(conn, metric_name, start_time, end_time):
     """
diff --git a/tests/influxdb/graphite_test.py b/tests/influxdb/graphite_test.py
new file mode 100644
index 00000000..1c716cc2
--- /dev/null
+++ b/tests/influxdb/graphite_test.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+"""
+unit tests
+"""
+import json
+
+import time
+import requests
+from nose.tools import raises
+from mock import patch
+
+from influxdb import InfluxDBClient
+import influxdb.graphite as graphite
+
+USER='testuser'
+PASS='testpassword'
+PORT=8086
+DBNAME='graphite'
+HOST='10.255.0.2'
+TEST_METRIC_NAME='peter.test.metric'
+
+retention = "60m:1y"
+fixed_ret = "60m_1y"
+retention_tags={"gr-ret":fixed_ret}
+
+def test_single_metric_json():
+    "No connection, just tests json"
+    now = time.time()
+    db_name = "graphite"
+    test_metric = (TEST_METRIC_NAME, now, now)
+
+    influxdb_metric = graphite.graphite_metric_to_influxdb(test_metric, retention_tags)
+
+    expected_result = {'columns': ['time', 'value', 'gr-ret'],
+                       'name': TEST_METRIC_NAME,
+                       'points': [[int(now * 1000), now, '60m_1y']]} # * 1000 for ms since the epoch
+    assert (set(influxdb_metric['columns']) == set(expected_result['columns']))
+    assert (set(influxdb_metric['points'][0]) == set(expected_result['points'][0]))
+    assert (set(influxdb_metric['name']) == set(expected_result['name']))
+    assert (influxdb_metric.keys() == expected_result.keys())
+
+def test_send_to_influxdb():
+    conn = InfluxDBClient(HOST, PORT, USER, PASS, DBNAME)
+    now = time.time()
+    db_name = "graphite"
+    test_metric = (TEST_METRIC_NAME, now, now)
+    influxdb_metric = graphite.graphite_metric_to_influxdb(test_metric, retention_tags)
+
+    print (influxdb_metric)
+    url = "{0}/db/{1}/series?u={2}&p={3}&time_precision={4}".format(conn._baseurl,
+                                                                    conn._database,
+                                                                    conn._username,
+                                                                    conn._password,
+                                                                    'm')
+    print url
+    response = requests.post(url, data=json.dumps([influxdb_metric]))
+    return response
+    print dir(response)
+    print response
+    conn.write_points_with_precision(influxdb_metric, 'm')
+
+def test_example_send_to_influxdb():
+    conn = InfluxDBClient(HOST, PORT, USER, PASS, DBNAME)
+    conn.switch_db(DBNAME)
+    now = time.time()
+    db_name = "graphite"
+    test_metric = (TEST_METRIC_NAME, now, now)
+    influxdb_metric = [ { "name": "response_times",
+                          "columns": ["time", "value"],
+                          "points": [ [1382819388, 234.3],
+                                      [1382819389, 120.1],
+                                      [1382819380, 340.9]
+                                  ] } ]
+
+    print (influxdb_metric)
+    url = "{0}/db/{1}/series?u={2}&p={3}&time_precision={4}".format(conn._baseurl,
+                                                                    conn._database,
+                                                                    conn._username,
+                                                                    conn._password,
+                                                                    'm')
+    print url
+    response = requests.post(url, data=json.dumps([influxdb_metric]))
+    return response
+
+
+def test_metric_list_json():
+    pass

From 920d91423ef5fa08f7b1174b66ab7a499d322efd Mon Sep 17 00:00:00 2001
From: "Peter C. Norton" <pn@knewton.com>
Date: Mon, 30 Dec 2013 05:00:24 -0500
Subject: [PATCH 3/3] Tests that work with ipython

Generating a list of influxdb json documents via retentinos objects
works.

The tests don't run independently, I'm doing them via cut-n-paste in
ipython.
---
 influxdb/graphite.py            |  6 ++--
 tests/influxdb/graphite_test.py | 55 ++++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/influxdb/graphite.py b/influxdb/graphite.py
index b3612dd3..2bc76f44 100644
--- a/influxdb/graphite.py
+++ b/influxdb/graphite.py
@@ -81,8 +81,10 @@ def graphite_metric_list_with_retentions_to_influxdb_list(metric_list, storage_s
 
     XXX this API is getting messy - it should be simpler. -PN
     """
-    retentions_list = _graphite_metric_list_retentions(metric_list, storage_schemas)
-    for m,r in zip(metric_list, retentions_list):
+    # Generate a lazy list of retentions that will match
+    retentions_generator = _graphite_metric_list_retentions(metric_list, storage_schemas)
+    # print retentions_generator.next()
+    for m,r in zip(metric_list, retentions_generator): # XXX use itertools' izip?
         tags = {}
         if len(pervasive_tags) > 0:
             tags.update(pervasive_tags)
diff --git a/tests/influxdb/graphite_test.py b/tests/influxdb/graphite_test.py
index 1c716cc2..b199028b 100644
--- a/tests/influxdb/graphite_test.py
+++ b/tests/influxdb/graphite_test.py
@@ -3,11 +3,13 @@
 unit tests
 """
 import json
-
+import itertools
 import time
 import requests
 from nose.tools import raises
 from mock import patch
+import re
+import pprint
 
 from influxdb import InfluxDBClient
 import influxdb.graphite as graphite
@@ -18,11 +20,35 @@
 DBNAME='graphite'
 HOST='10.255.0.2'
 TEST_METRIC_NAME='peter.test.metric'
+TEST_METRIC_NAME2='foo.test.metric'
+
+TEST_METRIC_NAME_LIST=[TEST_METRIC_NAME, TEST_METRIC_NAME2]
 
 retention = "60m:1y"
 fixed_ret = "60m_1y"
+month_ret = "5m:30d"
+fixed_month_ret = "5m_1m"
+
 retention_tags={"gr-ret":fixed_ret}
 
+class PatternSchema(object):
+    """A work-alike for the patternschema in carbon"""
+    def __init__(self, name, pattern, retentions):
+        self.name = name
+        self.pattern = pattern
+        self.regex = re.compile(pattern)
+        self.options = { 'retentions' : retentions } # comma-separated string
+
+    def test(self, metric):
+        return self.regex.search(metric)
+
+    def matches(self, metric):
+        return bool( self.test(metric) )
+
+
+schema_list = [PatternSchema(retention, "peter.*", retention),
+               PatternSchema(month_ret, "foo.*", month_ret)]
+
 def test_single_metric_json():
     "No connection, just tests json"
     now = time.time()
@@ -43,8 +69,9 @@ def test_send_to_influxdb():
     conn = InfluxDBClient(HOST, PORT, USER, PASS, DBNAME)
     now = time.time()
     db_name = "graphite"
-    test_metric = (TEST_METRIC_NAME, now, now)
-    influxdb_metric = graphite.graphite_metric_to_influxdb(test_metric, retention_tags)
+    test_metric_list = (TEST_METRIC_NAME, now, now)
+    test_metric_list_tags =
+    influxdb_metric = graphite.graphite_metric_list_to_influxdb_list(test_metric, retention_tags)
 
     print (influxdb_metric)
     url = "{0}/db/{1}/series?u={2}&p={3}&time_precision={4}".format(conn._baseurl,
@@ -83,5 +110,23 @@ def test_example_send_to_influxdb():
     return response
 
 
-def test_metric_list_json():
-    pass
+def test_metric_list_with_retentions_json():
+    "No connection, just tests json"
+    now = time.time()
+    nowlist = [ now + n for n in range(5)]
+    db_name = "graphite"
+    retentions = itertools.cycle(schema_list)
+    metrics = itertools.cycle(TEST_METRIC_NAME_LIST)
+    test_metric_list = zip(*[[(metrics.next(), n, n,), retentions.next()]  for n in nowlist])
+
+    influxdb_metric_generator = graphite.graphite_metric_list_with_retentions_to_influxdb_list(
+        test_metric_list[0], test_metric_list[1])
+    pprint.pprint( list(influxdb_metric_generator))
+
+    # expected_result = {'columns': ['time', 'value', 'gr-ret'],
+    #                    'name': TEST_METRIC_NAME,
+    #                    'points': [[int(now * 1000), now, '60m_1y']]} # * 1000 for ms since the epoch
+    # assert (set(influxdb_metric['columns']) == set(expected_result['columns']))
+    # assert (set(influxdb_metric['points'][0]) == set(expected_result['points'][0]))
+    # assert (set(influxdb_metric['name']) == set(expected_result['name']))
+    # assert (influxdb_metric.keys() == expected_result.keys())