diff --git a/README.md b/README.md index 8d8052d..2337e8d 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ The Query class is intended to provide a high level interface for building/editing SQL query strings. +This library should now support python 3. + + + Example usage: ```python >>> from querpy import Query @@ -11,7 +15,7 @@ Example usage: >>> new_query.s += ['col1', 'col2', 'col3'] # can take lists >>> new_query.s += 'col4' # can take single strings >>> new_query.w += 'col1 = 1' # can also take a list (separated by AND) - >>> new_query.w &= 'col2 IS NULL' # handles &= and |= operators + >>> new_query.w |= 'col2 IS NULL' # handles &= and |= operators >>> print new_query SELECT col1, @@ -71,12 +75,11 @@ Suppose you want to extend your query by joining to another table and adding col col1 = 1 OR col2 IS NULL ``` -While this works, we are returning to the land of long strings. We can do the same thing (n.b. we'll LEFT JOIN this time) using the build_join helper function to make the join step more readable and modular: +While this works, we are returning to the land of long strings. We can do the same thing (n.b. we'll LEFT JOIN this time) using the Query.build_join helper function to make the join step more readable and modular: ```python - >>> from querpy import build_join >>> new_query.j.clear() >>> new_query.join_type = 'LEFT' - >>> new_query.j += build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id', 'tbl.city', 'nt.city') + >>> new_query.j += Query.build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id', 'tbl.city', 'nt.city') >>> new_query.join_type = '' # set back to regular join >>> new_query SELECT @@ -94,4 +97,33 @@ When your query string is ready to be passed to the function that will execute t >>> new_query.statement SELECT col2, nt.id FROM ex_db.dbo.ex_table tbl LEFT JOIN ex_db.dbo.new_tbl nt ON tbl.id = nt.id AND tbl.city = nt.city WHERE col1 = 1 OR col2 IS NULL ``` + +You can also prepend a CREATE TABLE <> AS to the SQL +```python + >>> new_query.j += 'ex_db.dbo.new_tbl nt ON tbl.id = nt.id' + >>> new_query.s += 'nt.id' + >>> new_query.g += 'col1' + >>> new_query.l += ' 10, 100' + >>> new_query.ci += ' db_name.tbl_name ' + >>> new_query + CREATE TABLE db_name.tbl_name AS + SELECT + col1, + nt.id + FROM + ex_db.dbo.ex_table tbl + JOIN ex_db.dbo.new_tbl nt ON tbl.id = nt.id + WHERE + col1 = 1 + OR col2 IS NULL + GROUP BY + col1 + LIMIT 10, 100 +``` + + +Suppose you want to extend your query by joining to another table and adding columns from this table: + + NOTE: the SQL constructed is **not** validated. + diff --git a/doc_example.py b/doc_example.py index 99a729a..fab77c6 100644 --- a/doc_example.py +++ b/doc_example.py @@ -12,22 +12,28 @@ ">>> new_query.s += 'col4' # can take single strings", ">>> new_query.w += 'col1 = 1' # can also take a list", ">>> new_query.w |= 'col2 IS NULL' # handles &= and |= operators", - ">>> print new_query", + ">>> print(new_query)", ">>> new_query", ">>> new_query.s.clear() # clear SELECT component", ">>> new_query.s += 'col1'", ">>> new_query", ">>> new_query.s[0] = 'col2'", - ">>> print new_query.s", + ">>> print(new_query.s)", ">>> new_query.j += 'ex_db.dbo.new_tbl nt ON tbl.id = nt.id'", ">>> new_query.s += 'nt.id'", ">>> new_query", - ">>> from querpy import build_join", ">>> new_query.j.clear()", ">>> new_query.join_type = 'LEFT'", - ">>> new_query.j += build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id'," + ">>> new_query.j += Query.build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id'," " 'tbl.city', 'nt.city')", ">>> new_query.join_type = '' # set back to regular join", + ">>> new_query.ci += 'thisDB.thatTable' # set back to regular join", + ">>> new_query.l += ' 10, 100' ", + ">>> new_query.g += 'col1' ", + ">>> new_query.g += 'col3' ", + ">>> new_query.o += 'col1' ", + ">>> new_query.o += 'col3' ", + ">>> new_query", ">>> new_query", ">>> new_query.statement", ] @@ -35,9 +41,9 @@ def main(): for c in commands: - print c + print(c) if c in ('>>> new_query', '>>> new_query.statement'): - print eval(c[4:]) + print(eval(c[4:])) else: exec(c[4:]) diff --git a/doc_example.txt b/doc_example.txt index 4b31073..313a283 100644 --- a/doc_example.txt +++ b/doc_example.txt @@ -53,10 +53,9 @@ SELECT WHERE col1 = 1 OR col2 IS NULL ->>> from querpy import build_join >>> new_query.j.clear() >>> new_query.join_type = 'LEFT' ->>> new_query.j += build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id', 'tbl.city', 'nt.city') +>>> new_query.j += Query.build_join('ex_db.dbo.new_tbl nt', 'tbl.id', 'nt.id', 'tbl.city', 'nt.city') >>> new_query.join_type = '' # set back to regular join >>> new_query SELECT diff --git a/querpy.py b/querpy.py index 2e1cc76..e5673ed 100644 --- a/querpy.py +++ b/querpy.py @@ -1,6 +1,8 @@ """ +querpy.py + The Query class is intended to provide a high level interface for -building/editing SQL queries. +building/editing SQL queries. Built before we understood what SQLAlchemy was. Example usage: >>> new_query = Query() @@ -13,9 +15,9 @@ >>> new_query # should print full query """ -__author__ = 'Paul Garaud' -__version__ = '0.1' -__date__ = '2015-03-19' +__author__ = 'Paul Garaud, Fred Trotter' +__version__ = '0.2' +__date__ = '2022-06-18' import re @@ -23,9 +25,16 @@ class Query(object): - pattern = re.compile('(^\s+|(?<=\s)\s+|\s+$)') - clean_up = re.compile('(?<=WHERE )\s.*?AND|(?<=WHERE )\s.*?OR') + # A series of precompiled regex to perfom various SQL related string tasks + + # These help with merging all of the statements into a single line + whitespace_regex = re.compile('(^\s+|(?<=\s)\s+|\s+$)') + #the design of the where list is such that every element of the list has an 'AND' or 'OR' as a prefix.. + #but the first one after the where does not need that.. so we just look for a WHERE OR or a WHERE AND and remove the 'OR' or 'AND' + where_clean_up = re.compile('(?<=WHERE )\s.*?AND|(?<=WHERE )\s.*?OR') + having_clean_up = re.compile('(?<=HAVING )\s.*?AND|(?<=HAVING )\s.*?OR') + # All of these fmt_(something) help with the SQL pretty print implementation fmt = re.compile('\s(?=FROM)|\s(?=WHERE)|\s(?=GROUP BY)') fmt_after = re.compile( '(?<=SELECT)\s|(?<=FROM)\s|(?<=WHERE)\s|(?<=GROUP BY)\s' @@ -38,26 +47,51 @@ class Query(object): ) ) fmt_commas = re.compile('(?<=,)\s') - fmt_and = re.compile('(?<=WHERE).*$', flags=re.S) - fmt_or = re.compile('OR') + fmt_where_and = re.compile('(?<=WHERE).*$', flags=re.S) + fmt_having_and = re.compile('(?<=HAVING).*$', flags=re.S) + fmt_or = re.compile('\sOR\s') + + + def __init__(self): + self.ci = CreateInsertComponent() self.s = SelectComponent() self.f = QueryComponent('FROM') self.j = JoinComponent() self.w = WhereComponent() self.g = QueryComponent('GROUP BY', sep=',') + self.h = HavingComponent() + self.o = QueryComponent('ORDER BY', sep=',') + self.l = LimitComponent() + @property def statement(self): - elements = [self.s(), self.f(), self.j(), self.w(), self.g()] - full_statement = re.subn(self.clean_up, '', ' '.join(elements))[0] - full_statement = re.subn(self.pattern, '', full_statement)[0] + + where_statement = re.subn(self.where_clean_up, '', self.w())[0] + having_statement = re.subn(self.having_clean_up, '', self.h())[0] + + # Merges the various SQL componenets into a single SQL statement + elements = [self.ci(), self.s(), self.f(), self.j(), where_statement, self.g(), having_statement, self.o(), self.l()] + full_statement = re.subn(self.whitespace_regex, '', ' '.join(elements))[0] # flattens pretty print SQL to a single line by removing whitespace if full_statement: + #Then our regex and merging has worked and return the single line of SQL return full_statement else: return '' + #Some properties that allow us to expose some of the variables in the QueryComponents, as thought they were + #directly on the main Query object + + @property + def is_first_data_add(self): + return self.ci.is_first_data_add + + @is_first_data_add.setter + def is_first_data_add(self, value): + self.ci.is_first_data_add = value + @property def distinct(self): return self.s.distinct @@ -86,20 +120,62 @@ def join_type(self, value): self.j.join_type = value def __str__(self): - query = self.statement + # When we just print the object, we want to assume that we will pretty-print the SQL. + # This section handles the conversion of the single line query, into a pretty printed version.. + # This section could be better implemented using a call to sqlpars + # https://github.com/andialbrecht/sqlparse + # But doing it this way keeps the dependancies low, which is important + query = self.statement # This is the single line query gotten from the statement function query = re.subn(self.fmt, '\n ', query)[0] query = re.subn(self.fmt_after, '\n ', query)[0] query = re.subn(self.fmt_join, '\n ', query)[0] query = re.subn(self.fmt_commas, '\n ', query)[0] - query = re.subn(self.fmt_and, replace_and, query)[0] - query = re.subn(self.fmt_or, '\n OR', query)[0] + query = re.subn(self.fmt_where_and, Query.replace_and, query)[0] + query = re.subn(self.fmt_having_and, Query.replace_and, query)[0] + query = re.subn(self.fmt_or, '\n OR ', query)[0] return query __repr__ = __str__ + @staticmethod + def build_join(*args): + # A static helper function to build a join + # this assumes that the first argument is the table... + # and that every subsequent pair of arguments is something to join 'ON' + + tbl_name = args[0] + args = args[1:] + if len(args) % 2 != 0 or args == (): + raise BaseException( + 'You must provide an even number of columns to join on.' + ) + + args_expr = ['{0} = {1}'.format(args[2 * i], args[2 * i + 1]) + for i in range(int(len(args) / 2))] # int() for Python 3 + args_expr = ' AND '.join(args_expr) + join_str = ' '.join([tbl_name, 'ON', args_expr]) + + return join_str + + @staticmethod + def replace_and(match): + """ + helper function for indenting AND in WHERE clause + """ + string = match.group(0) + raw_newlines = re.subn('AND', '\n AND', string)[0] + out = re.subn('(?<=BETWEEN)( \w+? )\n\s*?(AND)', r'\1\2', raw_newlines)[0] + return out + + + + class QueryComponent(object): + #This is the base class that everything else will be added to.. + # this is where the magic of += is handled, which makes it easy + # to add things quickly to any componene of the overall query.. def __init__(self, header, sep=''): self.header = header + ' ' @@ -110,25 +186,35 @@ def __iadd__(self, item): self.add_item(item) return self - __iand__ = __ior__ = __iadd__ + __iand__ = __ior__ = __iadd__ # lets set the default for &= and |= to be just += to start.. + # Note this is a modification of the original code, which did not have the ability to handle an object + # That knows how to become a string.. like DBTable.... + # TODO write a test that this works correctly for DBTable objects. def add_item(self, item, prefix=''): if prefix: prefix = prefix + ' ' - if type(item) == str: + if isinstance(item, str): # Handle strings self.components.append(''.join([prefix, item])) - elif type(item) == list: + elif isinstance(item, list): # Handle lists items = [''.join([prefix, i]) for i in item] self.components.extend(items) - else: - raise ValueError('Item must be a string or list') + else: # Handle objects by converting them to strings + try: + item_as_string = str(item) + self.components.append(''.join([prefix, item_as_string])) + except Exception: + raise ValueError('Item must be a string, list, or object convertible to string') def clear(self): self.components = list() def __call__(self): + # This is the function that converts the list of items in the querycomponent into a long string + # it is always prefixed by the header.. if self.components: - return self.header + self.sep.join(self.components) + return_me = self.header + self.sep.join(self.components) + return return_me return '' def __getitem__(self, key): @@ -145,8 +231,54 @@ def __str__(self): __repr__ = __str__ +class CreateInsertComponent(QueryComponent): + # Implements the very first part of a CREATE TABLE db.table AS or INSERT INTO db.table + # depending on whether the is_first_data_add setting has been set + # TODO the way this works now, you have to set is_first_data_add BEFORE adding the table + # But it should not care whether it was added first or second.. the is_first_data_add + # Should swap between CREATE TABLE and INSERT INTO whenever it is set.. + # Which means that the behavior of setting the header should happen when the data is being read.. not when it is written... + + is_first_data_add = True + + def __iadd__(self, item): + #we only have the one item.. + self.components = list() # overwrites whatever was there + if self.is_first_data_add: + #Then this is a CREATE TABLE AS clause + self.header = 'CREATE TABLE ' + str(item) + " AS \n" + else: + self.header = 'INSERT INTO ' + str(item) + " \n" + return self + + def __init__(self): + self.header = '' # by default, this is not used. + self.components = list() + + def __call__(self): + return self.header + + +class LimitComponent(QueryComponent): + # Adds support for the limit command + + def __iadd__(self, item): + #we only have the one item.. should be like '10, 100' + #we should add a test to make sure this is correct. + self.components = list() # overwrites whatever was there + self.components.append(item) + self.header = " LIMIT " + self.sep = '' + return self + + def __init__(self): + self.header = '' # by default, this is not used. + self.components = list() # none to start + class SelectComponent(QueryComponent): + # This models the SELECT component, and sends great energy ensuring that the "DISTINCT" and "TOP" syntax are supported + # otherwise the actual columns are just stored as a list, which is handled by the parent class. header = 'SELECT' dist_pattern = re.compile(' DISTINCT') @@ -202,6 +334,11 @@ def top(self, value): class JoinComponent(QueryComponent): + # like most query components, the joins are just a list of strings... + # The exception is that the type of join is stored as a seperate + # one would think that this allows for retyping the join later.. but really it just means that we + # do not need to add the word "join" to our string storage... so it just handling the fact that the type of join + # is listed before the word 'JOIN' while the method of the join is listed after.. def __init__(self, sep = ''): QueryComponent.__init__(self, '', sep) @@ -225,7 +362,7 @@ def __iadd__(self, item): self.add_item(item, join) return self - __iand__ = __ior__ = __iadd__ + __iand__ = __ior__ = __iadd__ # again, to start, lets have &= and |= just be the same function as += def __call__(self): if self.components: @@ -246,10 +383,11 @@ def __iand__(self, item): return self def __ior__(self, item): + # add this to the list, but with a seperator of 'OR', this will be called when someone uses |= self.add_item(item, 'OR') return self - __iadd__ = __iand__ + __iadd__ = __iand__ # unless we use |= (which will invoke our custom built or function) we are using an "AND" def __str__(self): components = self.components @@ -263,29 +401,33 @@ def __str__(self): __repr__ = __str__ +class HavingComponent(QueryComponent): + header = "HAVING" -def build_join(*args): - tbl_name = args[0] - args = args[1:] - if len(args) % 2 != 0 or args == (): - raise BaseException( - 'You must provide an even number of columns to join on.' - ) + def __init__(self, sep=''): + self.header = self.header + ' ' + QueryComponent.__init__(self, self.header, sep) - args_expr = ['{0} = {1}'.format(args[2 * i], args[2 * i + 1]) - for i in range(int(len(args) / 2))] # int() for Python 3 - args_expr = ' AND '.join(args_expr) - join_str = ' '.join([tbl_name, 'ON', args_expr]) + def __iand__(self, item): + self.add_item(item, 'AND') + return self - return join_str + def __ior__(self, item): + # add this to the list, but with a seperator of 'OR', this will be called when someone uses |= + self.add_item(item, 'OR') + return self + __iadd__ = __iand__ # unless we use |= (which will invoke our custom built or function) we are using an "AND" -def replace_and(match): - """ - helper function for indenting AND in WHERE clause - """ - string = match.group(0) - raw_newlines = re.subn('AND', '\n AND', string)[0] - out = re.subn('(?<=BETWEEN)( \w+? )\n\s*?(AND)', r'\1\2', raw_newlines)[0] - return out \ No newline at end of file + def __str__(self): + components = self.components + if components: + components = self.components[:] + components[0] = re.sub('^AND |^OR ', '', components[0]) + to_print = list() + for n, c in enumerate(components): + to_print.append("{0}: '{1}'".format(n, c)) + return 'index: item\n' + ', '.join(to_print) + + __repr__ = __str__ diff --git a/test/unit_tests.py b/test/unit_tests.py index afc36ae..4da6c4d 100644 --- a/test/unit_tests.py +++ b/test/unit_tests.py @@ -1,3 +1,4 @@ +from builtins import zip import unittest as ut from querpy import * @@ -287,7 +288,7 @@ def test_fmt_and(self): 'JOIN tbl2 ON col1 = col2 AND col3 = col4 ' 'WHERE col1 = col3 AND col4 = col5 AND col5 = col6' ) - subbed = re.subn(self.query.fmt_and, replace_and, string)[0] + subbed = re.subn(self.query.fmt_and, Query.replace_and, string)[0] self.assertEqual( subbed, 'JOIN tbl2 ON col1 = col2 AND col3 = col4 ' @@ -334,7 +335,7 @@ def test_top(self): def test_print(self): self.query.s += ['col1', 'col2', 'col3'] self.query.f += 'tbl1 t1' - self.query.j += build_join('tbl2 t2', 't1.id', 't2.id', 't1.city', + self.query.j += Query.build_join('tbl2 t2', 't1.id', 't2.id', 't1.city', 't2.city') self.query.w += ['col1 IS NULL', 'col4 BETWEEN col1 AND col2', 'col2 = t1.id', 'col3 BETWEEN 0 AND 10'] @@ -355,16 +356,16 @@ def setUp(self): self.item2 = ['tbl2 t2', 't2.id', 'oid', 't2.city', 'city'] def test_join_valid_items(self): - to_test1 = build_join(*self.item1) - to_test2 = build_join(*self.item2) + to_test1 = Query.build_join(*self.item1) + to_test2 = Query.build_join(*self.item2) self.assertEqual(to_test1, 'tbl1 t1 ON t1.id = oid') self.assertEqual(to_test2, 'tbl2 t2 ON t2.id = oid AND t2.city = city') def test_invalid_num_items_passed_as_args(self): invalid = self.item2[:-1] - self.assertRaises(BaseException, build_join, invalid) + self.assertRaises(BaseException, Query.build_join, invalid) if __name__ == '__main__': - ut.main() \ No newline at end of file + ut.main()