diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 00000000..1cde398b --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,71 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '25 5 * * 1' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 00000000..3033af97 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,38 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: + - master + pull_request: + branches: [ master ] + schedule: + - cron: '0 12 * * *' + +jobs: + build: + + runs-on: ubuntu-20.04 # keep it on 20.04 to have Python 3.5 and 3.6 available + strategy: + matrix: + python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip flit + flit install --deps=develop + - name: Lint with flake8 + run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics + - name: Test with pytest + run: pytest --cov=sqlparse + - name: Publish to codecov + uses: codecov/codecov-action@v3 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3e091594..00000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -language: python -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - "nightly" - - "pypy" - - "pypy3" -# Enable 3.7 without globally enabling sudo and dist: xenial for other build jobs -# See https://github.com/travis-ci/travis-ci/issues/9815 -matrix: - include: - - python: 3.7 - dist: xenial - sudo: true - -install: - - pip install -Uq pytest pytest-cov codecov - - pytest --version - -script: - - pytest --cov=sqlparse - -after_success: - - codecov diff --git a/AUTHORS b/AUTHORS index b4ff0aff..1717adff 100644 --- a/AUTHORS +++ b/AUTHORS @@ -8,24 +8,34 @@ project: https://bitbucket.org/gutworth/six. Alphabetical list of contributors: * Adam Greenhall +* Aki Ariga * Alexander Beedie * Alexey Malyshev +* ali-tny * andrew deryabin * Andrew Tipton * atronah * casey * Cauê Beloni +* Christian Clauss * circld +* Corey Zumar * Cristian Orellana * Dag Wieers +* Daniel Harding * Darik Gamble * Demetrio92 * Dennis Taylor +* Dvořák Václav +* Erik Cederstrand * Florian Bauer * Fredy Wijaya * Gavin Wahl +* hurcy * Ian Robertson * JacekPliszka +* JavierPan +* Jean-Martin Archer * Jesús Leganés Combarro "Piranna" * Johannes Hoff * John Bodley @@ -33,10 +43,14 @@ Alphabetical list of contributors: * Josh Soref * Kevin Jing Qiu * koljonen +* Likai Liu +* Long Le Xich +* mathilde.oustlant * Michael Schuller * Mike Amy * mulos * Oleg Broytman +* osmnv <80402144+osmnv@users.noreply.github.com> * Patrick Schemitz * Pi Delport * Prudhvi Vatala @@ -48,6 +62,7 @@ Alphabetical list of contributors: * Ryan Wooden * saaj * Shen Longxing +* Simon Heisterkamp * Sjoerd Job Postmus * Soloman Weng * spigwitmer @@ -59,4 +74,6 @@ Alphabetical list of contributors: * Ville Skyttä * vthriller * wayne.wuw +* Will Jones +* William Ivanski * Yago Riveiro diff --git a/CHANGELOG b/CHANGELOG index f8853362..a42577e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,141 @@ +Release 0.4.4 (Apr 18, 2023) +---------------------------- + +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + parser where a regular expression vulnerable to ReDOS (Regular + Expression Denial of Service) was used. See the security advisory + for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2 + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + +Bug Fixes + +* Revert a change from 0.4.0 that changed IN to be a comparison (issue694). + The primary expectation is that IN is treated as a keyword and not as a + comparison operator. That also follows the definition of reserved keywords + for the major SQL syntax definitions. +* Fix regular expressions for string parsing. + +Other + +* sqlparse now uses pyproject.toml instead of setup.cfg (issue685). + + +Release 0.4.3 (Sep 23, 2022) +---------------------------- + +Enhancements + +* Add support for DIV operator (pr664, by chezou). +* Add support for additional SPARK keywords (pr643, by mrmasterplan). +* Avoid tokens copy (pr622, by living180). +* Add REGEXP as a comparision (pr647, by PeterSandwich). +* Add DISTINCTROW keyword for MS Access (issue677). +* Improve parsing of CREATE TABLE AS SELECT (pr662, by chezou). + +Bug Fixes + +* Fix spelling of INDICATOR keyword (pr653, by ptld). +* Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand). +* Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan). +* Handle backtick as valid quote char (issue628, pr629, by codenamelxl). +* Allow any unicode character as valid identifier name (issue641). + +Other + +* Update github actions to test on Python 3.10 as well (pr661, by cclaus). + + +Release 0.4.2 (Sep 10, 2021) +---------------------------- + +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + strip comments filter. In this filter a regular expression that was + vulnerable to ReDOS (Regular Expression Denial of Service) was + used. See the security advisory for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-p5w8-wqhj-9hhf + The vulnerability was discovered by @erik-krogh and @yoff from + GitHub Security Lab (GHSL). Thanks for reporting! + +Enhancements + +* Add ELSIF as keyword (issue584). +* Add CONFLICT and ON_ERROR_STOP keywords (pr595, by j-martin). + +Bug Fixes + +* Fix parsing of backticks (issue588). +* Fix parsing of scientific number (issue399). + + +Release 0.4.1 (Oct 08, 2020) +---------------------------- + +Bug Fixes + +* Just removed a debug print statement, sorry... + + +Release 0.4.0 (Oct 07, 2020) +---------------------------- + +Notable Changes + +* Remove support for end-of-life Python 2.7 and 3.4. Python 3.5+ is now + required. +* Remaining strings that only consist of whitespaces are not treated as + statements anymore. Code that ignored the last element from + sqlparse.split() should be updated accordingly since that function + now doesn't return an empty string as the last element in some + cases (issue496). + +Enhancements + +* Add WINDOW keyword (pr579 by ali-tny). +* Add RLIKE keyword (pr582 by wjones1). + +Bug Fixes + +* Improved parsing of IN(...) statements (issue566, pr567 by hurcy). +* Preserve line breaks when removing comments (issue484). +* Fix parsing error when using square bracket notation (issue583). +* Fix splitting when using DECLARE ... HANDLER (issue581). +* Fix splitting of statements using CASE ... WHEN (issue580). +* Improve formatting of type casts in parentheses. +* Stabilize formatting of invalid SQL statements. + + +Release 0.3.1 (Feb 29, 2020) +---------------------------- + +Enhancements + +* Add HQL keywords (pr475, by matwalk). +* Add support for time zone casts (issue489). +* Enhance formatting of AS keyword (issue507, by john-bodley). +* Stabilize grouping engine when parsing invalid SQL statements. + +Bug Fixes + +* Fix splitting of SQL with multiple statements inside + parentheses (issue485, pr486 by win39). +* Correctly identify NULLS FIRST / NULLS LAST as keywords (issue487). +* Fix splitting of SQL statements that contain dollar signs in + identifiers (issue491). +* Remove support for parsing double slash comments introduced in + 0.3.0 (issue456) as it had some side-effects with other dialects and + doesn't seem to be widely used (issue476). +* Restrict detection of alias names to objects that actually could + have an alias (issue455, adopted some parts of pr509 by john-bodley). +* Fix parsing of date/time literals (issue438, by vashek). +* Fix initialization of TokenList (issue499, pr505 by john-bodley). +* Fix parsing of LIKE (issue493, pr525 by dbczumar). +* Improve parsing of identifiers (pr527 by liulk). + + Release 0.3.0 (Mar 11, 2019) ---------------------------- @@ -94,7 +232,7 @@ Internal Changes Release 0.2.1 (Aug 13, 2016) ---------------------------- +---------------------------- Notable Changes diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 8043b359..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -recursive-include docs source/* -include docs/sqlformat.1 -include docs/Makefile -recursive-include tests *.py *.sql -include LICENSE -include TODO -include AUTHORS -include CHANGELOG -include Makefile -include setup.cfg -include tox.ini diff --git a/Makefile b/Makefile index 0844e5bf..1657822e 100644 --- a/Makefile +++ b/Makefile @@ -22,5 +22,5 @@ clean: release: @rm -rf dist/ - python setup.py sdist upload --sign --identity E0B84F81 - python setup.py bdist_wheel upload --sign --identity E0B84F81 + python -m build + twine upload --sign --identity E0B84F81 dist/* diff --git a/README.rst b/README.rst index 3bc9670e..df4e7e36 100644 --- a/README.rst +++ b/README.rst @@ -1,67 +1,78 @@ python-sqlparse - Parse SQL statements ====================================== -sqlparse is a non-validating SQL parser module for Python. - |buildstatus|_ |coverage|_ +|docs|_ +|packageversion|_ +.. docincludebegin -Install -------- - -From pip, run:: - - $ pip install --upgrade sqlparse - -Consider using the ``--user`` option_. +sqlparse is a non-validating SQL parser for Python. +It provides support for parsing, splitting and formatting SQL statements. -.. _option: https://pip.pypa.io/en/latest/user_guide/#user-installs +The module is compatible with Python 3.5+ and released under the terms of the +`New BSD license `_. -From the repository, run:: +Visit the project page at https://github.com/andialbrecht/sqlparse for +further information about this project. - python setup.py install -to install python-sqlparse on your system. +Quick Start +----------- -python-sqlparse is compatible with Python 2.7 and Python 3 (>= 3.3). +.. code-block:: sh + $ pip install sqlparse -Run Tests ---------- +.. code-block:: python -To run the test suite run:: + >>> import sqlparse - tox + >>> # Split a string containing two SQL statements: + >>> raw = 'select * from foo; select * from bar;' + >>> statements = sqlparse.split(raw) + >>> statements + ['select * from foo;', 'select * from bar;'] -Note, you'll need tox installed, of course. + >>> # Format the first statement and print it out: + >>> first = statements[0] + >>> print(sqlparse.format(first, reindent=True, keyword_case='upper')) + SELECT * + FROM foo; + >>> # Parsing a SQL statement: + >>> parsed = sqlparse.parse('select * from foo')[0] + >>> parsed.tokens + [, , >> Links ----- -Project Page - https://github.com/andialbrecht/sqlparse +Project page + https://github.com/andialbrecht/sqlparse -Documentation - https://sqlparse.readthedocs.io/en/latest/ - -Discussions - https://groups.google.com/forum/#!forum/sqlparse +Bug tracker + https://github.com/andialbrecht/sqlparse/issues -Issues/Bugs - https://github.com/andialbrecht/sqlparse/issues +Documentation + https://sqlparse.readthedocs.io/ Online Demo - https://sqlformat.org/ + https://sqlformat.org/ -python-sqlparse is licensed under the BSD license. +sqlparse is licensed under the BSD license. Parts of the code are based on pygments written by Georg Brandl and others. pygments-Homepage: http://pygments.org/ -.. |buildstatus| image:: https://secure.travis-ci.org/andialbrecht/sqlparse.png?branch=master -.. _buildstatus: https://travis-ci.org/#!/andialbrecht/sqlparse -.. |coverage| image:: https://coveralls.io/repos/andialbrecht/sqlparse/badge.svg?branch=master&service=github -.. _coverage: https://coveralls.io/github/andialbrecht/sqlparse?branch=master +.. |buildstatus| image:: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml/badge.svg +.. _buildstatus: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml +.. |coverage| image:: https://codecov.io/gh/andialbrecht/sqlparse/branch/master/graph/badge.svg +.. _coverage: https://codecov.io/gh/andialbrecht/sqlparse +.. |docs| image:: https://readthedocs.org/projects/sqlparse/badge/?version=latest +.. _docs: https://sqlparse.readthedocs.io/en/latest/?badge=latest +.. |packageversion| image:: https://img.shields.io/pypi/v/sqlparse?color=%2334D058&label=pypi%20package +.. _packageversion: https://pypi.org/project/sqlparse diff --git a/docs/source/api.rst b/docs/source/api.rst index e9fcdaa3..40193d0b 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -46,6 +46,12 @@ The :meth:`~sqlparse.format` function accepts the following keyword arguments. ``reindent`` If ``True`` the indentations of the statements are changed. +``reindent_aligned`` + If ``True`` the indentations of the statements are changed, and statements are aligned by keywords. + +``use_space_around_operators`` + If ``True`` spaces are used around all operators. + ``indent_tabs`` If ``True`` tabs instead of spaces are used for indentation. @@ -53,9 +59,14 @@ The :meth:`~sqlparse.format` function accepts the following keyword arguments. The width of the indentation, defaults to 2. ``wrap_after`` - The column limit for wrapping comma-separated lists. If unspecified, it + The column limit (in characters) for wrapping comma-separated lists. If unspecified, it puts every item in the list on its own line. ``output_format`` If given the output is additionally formatted to be used as a variable in a programming language. Allowed values are "python" and "php". + +``comma_first`` + If ``True`` comma-first notation for column names is used. + + diff --git a/docs/source/conf.py b/docs/source/conf.py index 70bd69a5..a5be6409 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- -# # python-sqlparse documentation build configuration file, created by # sphinx-quickstart on Thu Feb 26 08:19:28 2009. # diff --git a/docs/source/extending.rst b/docs/source/extending.rst new file mode 100644 index 00000000..0c10924b --- /dev/null +++ b/docs/source/extending.rst @@ -0,0 +1,76 @@ +Extending :mod:`sqlparse` +========================= + +.. module:: sqlparse + :synopsis: Extending parsing capability of sqlparse. + +The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous +PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since +some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse +therefore exposes a mechanism to configure the fundamental keywords and regular +expressions that parse the language as described below. + +If you find an adaptation that works for your specific use-case. Please consider +contributing it back to the community by opening a PR on +`GitHub `_. + +Configuring the Lexer +--------------------- + +The lexer is a singleton class that breaks down the stream of characters into language +tokens. It does this by using a sequence of regular expressions and keywords that are +listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar +definitions directly, the lexer is default initialized in its method called +``default_initialization()``. As an api user, you can adapt the Lexer configuration by +applying your own configuration logic. To do so, start out by clearing previous +configurations with ``.clear()``, then apply the SQL list with +``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``. + +You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below), +leaving parts out, or by making up your own master list. + +See the expected types of the arguments by inspecting their structure in +``sqlparse.keywords``. +(For compatibility with python 3.4, this library does not use type-hints.) + +The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as +a keyword to the lexer: + +.. code-block:: python + + import re + + import sqlparse + from sqlparse import keywords + from sqlparse.lexer import Lexer + + # get the lexer singleton object to configure it + lex = Lexer.get_default_instance() + + # Clear the default configurations. + # After this call, reg-exps and keyword dictionaries need to be loaded + # to make the lexer functional again. + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + # slice the default SQL_REGEX to inject the custom object + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + + # add the default keyword dictionaries + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + # add a custom keyword dictionary + lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + + # no configuration is passed here. The lexer is used as a singleton. + sqlparse.parse("select * from foo zorder by bar;") diff --git a/docs/source/index.rst b/docs/source/index.rst index 032318a5..e18d2b3c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,33 +6,9 @@ python-sqlparse =============== -:mod:`sqlparse` is a non-validating SQL parser for Python. -It provides support for parsing, splitting and formatting SQL statements. - -The module is compatible with Python 2.7 and Python 3 (>= 3.3) -and released under the terms of the `New BSD license -`_. - -Visit the project page at https://github.com/andialbrecht/sqlparse for -further information about this project. - - -tl;dr ------ - -.. code-block:: bash - - $ pip install sqlparse - $ python - >>> import sqlparse - >>> print(sqlparse.format('select * from foo', reindent=True)) - select * - from foo - >>> parsed = sqlparse.parse('select * from foo')[0] - >>> parsed.tokens - [, , >> - +.. include:: ../../README.rst + :start-after: docincludebegin + :end-before: Links Contents -------- @@ -44,7 +20,9 @@ Contents api analyzing ui + extending changes + license indices @@ -59,3 +37,7 @@ Bug tracker Documentation https://sqlparse.readthedocs.io/ + +Online Demo + https://sqlformat.org/ + diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 1a9913b1..1d3c9498 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -6,7 +6,7 @@ Download & Installation ----------------------- The latest released version can be obtained from the `Python Package -Index (PyPI) `_. To extract the +Index (PyPI) `_. To extract and install the module system-wide run .. code-block:: bash @@ -48,7 +48,7 @@ SQL statements can be beautified by using the :meth:`~sqlparse.format` function. .. code-block:: python >>> sql = 'select * from foo where id in (select id from bar);' - >>> print sqlparse.format(sql, reindent=True, keyword_case='upper') + >>> print(sqlparse.format(sql, reindent=True, keyword_case='upper')) SELECT * FROM foo WHERE id IN @@ -120,7 +120,7 @@ To check out the latest sources of this module run to check out the latest sources from the repository. -:mod:`sqlparse` is currently tested under Python 2.7, >=3.3 and pypy. Tests are +:mod:`sqlparse` is currently tested under Python 3.5+ and PyPy. Tests are automatically run on each commit and for each pull request on Travis: https://travis-ci.org/andialbrecht/sqlparse @@ -135,9 +135,4 @@ above. Ideally a Python 2 and a Python 3 version should be tested locally. Please file bug reports and feature requests on the project site at -https://github.com/andialbrecht/sqlparse/issues/new or if you have -code to contribute upload it to https://codereview.appspot.com/ and -add albrecht.andi@googlemail.com as reviewer. - -For more information about the review tool and how to use it visit -it's project page: https://github.com/rietveld-codereview/rietveld +https://github.com/andialbrecht/sqlparse/issues/new. diff --git a/docs/source/license.rst b/docs/source/license.rst new file mode 100644 index 00000000..01f3963c --- /dev/null +++ b/docs/source/license.rst @@ -0,0 +1,4 @@ +License +======= + +.. include:: ../../LICENSE \ No newline at end of file diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py index eb289927..11ff4f38 100644 --- a/examples/column_defs_lowlevel.py +++ b/examples/column_defs_lowlevel.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This example is part of python-sqlparse and is released under @@ -17,18 +16,25 @@ def extract_definitions(token_list): # assumes that token_list is a parenthesis definitions = [] tmp = [] - # grab the first token, ignoring whitespace. idx=1 to skip open ( - tidx, token = token_list.token_next(1) - while token and not token.match(sqlparse.tokens.Punctuation, ')'): - tmp.append(token) - # grab the next token, this times including whitespace - tidx, token = token_list.token_next(tidx, skip_ws=False) - # split on ",", except when on end of statement - if token and token.match(sqlparse.tokens.Punctuation, ','): - definitions.append(tmp) + par_level = 0 + for token in token_list.flatten(): + if token.is_whitespace: + continue + elif token.match(sqlparse.tokens.Punctuation, '('): + par_level += 1 + continue + if token.match(sqlparse.tokens.Punctuation, ')'): + if par_level == 0: + break + else: + par_level += 1 + elif token.match(sqlparse.tokens.Punctuation, ','): + if tmp: + definitions.append(tmp) tmp = [] - tidx, token = token_list.token_next(tidx) - if tmp and isinstance(tmp[0], sqlparse.sql.Identifier): + else: + tmp.append(token) + if tmp: definitions.append(tmp) return definitions @@ -46,5 +52,5 @@ def extract_definitions(token_list): columns = extract_definitions(par) for column in columns: - print('NAME: {name:10} DEFINITION: {definition}'.format( - name=column[0], definition=''.join(str(t) for t in column[1:]))) + print('NAME: {name!s:12} DEFINITION: {definition}'.format( + name=column[0], definition=' '.join(str(t) for t in column[1:]))) diff --git a/examples/extract_table_names.py b/examples/extract_table_names.py index a3ca3339..9837806f 100644 --- a/examples/extract_table_names.py +++ b/examples/extract_table_names.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This example is part of python-sqlparse and is released under @@ -32,10 +31,9 @@ def extract_from_part(parsed): for item in parsed.tokens: if from_seen: if is_subselect(item): - for x in extract_from_part(item): - yield x + yield from extract_from_part(item) elif item.ttype is Keyword: - raise StopIteration + return else: yield item elif item.ttype is Keyword and item.value.upper() == 'FROM': @@ -67,4 +65,4 @@ def extract_tables(sql): """ tables = ', '.join(extract_tables(sql)) - print('Tables: {0}'.format(tables)) + print('Tables: {}'.format(tables)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..338a53ce --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "sqlparse" +description = "A non-validating SQL parser." +authors = [{name = "Andi Albrecht", email = "albrecht.andi@gmail.com"}] +readme = "README.rst" +dynamic = ["version"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Database", + "Topic :: Software Development", +] +requires-python = ">=3.5" + +[project.urls] +Home = "https://github.com/andialbrecht/sqlparse" +Documentation = "https://sqlparse.readthedocs.io/" +"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/" +Source = "https://github.com/andialbrecht/sqlparse" +Tracker = "https://github.com/andialbrecht/sqlparse/issues" + +[project.scripts] +sqlformat = "sqlparse.__main__:main" + +[project.optional-dependencies] +dev = [ + "flake8", + "build", +] +test = [ + "pytest", + "pytest-cov", +] +doc = [ + "sphinx", +] + +[tool.flit.sdist] +include = [ + "docs/source/", + "docs/sqlformat.1", + "docs/Makefile", + "tests/*.py", "tests/files/*.sql", + "LICENSE", + "TODO", + "AUTHORS", + "CHANGELOG", + "Makefile", + "tox.ini", +] + +[tool.coverage.run] +omit = ["sqlparse/__main__.py"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index c4dee1df..00000000 --- a/setup.cfg +++ /dev/null @@ -1,20 +0,0 @@ -[bdist_wheel] -universal = 1 - -[metadata] -license_file = LICENSE - -[tool:pytest] -xfail_strict = True - -[flake8] -exclude = - sqlparse/compat.py -ignore = - W503, - E731 - -[coverage:run] -branch = False -omit = - sqlparse/__main__.py diff --git a/setup.py b/setup.py deleted file mode 100644 index 345d0ce4..00000000 --- a/setup.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2018 the sqlparse authors and contributors -# -# -# This setup script is part of python-sqlparse and is released under -# the BSD License: https://opensource.org/licenses/BSD-3-Clause - -import re - -from setuptools import setup, find_packages - - -def get_version(): - """Parse __init__.py for version number instead of importing the file.""" - VERSIONFILE = 'sqlparse/__init__.py' - VSRE = r'^__version__ = [\'"]([^\'"]*)[\'"]' - with open(VERSIONFILE) as f: - verstrline = f.read() - mo = re.search(VSRE, verstrline, re.M) - if mo: - return mo.group(1) - raise RuntimeError('Unable to find version in {fn}'.format(fn=VERSIONFILE)) - - -LONG_DESCRIPTION = """ -``sqlparse`` is a non-validating SQL parser module. -It provides support for parsing, splitting and formatting SQL statements. - -Visit the `project page `_ for -additional information and documentation. - -**Example Usage** - - -Splitting SQL statements:: - - >>> import sqlparse - >>> sqlparse.split('select * from foo; select * from bar;') - [u'select * from foo; ', u'select * from bar;'] - - -Formatting statements:: - - >>> sql = 'select * from foo where id in (select id from bar);' - >>> print sqlparse.format(sql, reindent=True, keyword_case='upper') - SELECT * - FROM foo - WHERE id IN - (SELECT id - FROM bar); - - -Parsing:: - - >>> sql = 'select * from someschema.mytable where id = 1' - >>> res = sqlparse.parse(sql) - >>> res - (,) - >>> stmt = res[0] - >>> str(stmt) # converting it back to unicode - 'select * from someschema.mytable where id = 1' - >>> # This is how the internal representation looks like: - >>> stmt.tokens - (, - , - , - , - , - , - , - , - ) - -""" - -setup( - name='sqlparse', - version=get_version(), - author='Andi Albrecht', - author_email='albrecht.andi@gmail.com', - url='https://github.com/andialbrecht/sqlparse', - description='Non-validating SQL parser', - long_description=LONG_DESCRIPTION, - license='BSD', - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*", - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Topic :: Database', - 'Topic :: Software Development', - ], - packages=find_packages(exclude=('tests',)), - entry_points={ - 'console_scripts': [ - 'sqlformat = sqlparse.__main__:main', - ] - }, -) diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py index 69044d6a..122595b3 100644 --- a/sqlparse/__init__.py +++ b/sqlparse/__init__.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -16,9 +15,8 @@ from sqlparse import filters from sqlparse import formatter -from sqlparse.compat import text_type -__version__ = '0.3.0' +__version__ = '0.4.4' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] @@ -58,7 +56,7 @@ def format(sql, encoding=None, **options): options = formatter.validate_options(options) stack = formatter.build_filter_stack(stack, options) stack.postprocess.append(filters.SerializerUnicode()) - return u''.join(stack.run(sql, encoding)) + return ''.join(stack.run(sql, encoding)) def split(sql, encoding=None): @@ -69,4 +67,4 @@ def split(sql, encoding=None): :returns: A list of strings. """ stack = engine.FilterStack() - return [text_type(stmt).strip() for stmt in stack.run(sql, encoding)] + return [str(stmt).strip() for stmt in stack.run(sql, encoding)] diff --git a/sqlparse/__main__.py b/sqlparse/__main__.py index 867d75d5..2bf25132 100644 --- a/sqlparse/__main__.py +++ b/sqlparse/__main__.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under diff --git a/sqlparse/cli.py b/sqlparse/cli.py index 25555a59..7a8aacbf 100755 --- a/sqlparse/cli.py +++ b/sqlparse/cli.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -23,10 +22,8 @@ import argparse import sys from io import TextIOWrapper -from codecs import open, getreader import sqlparse -from sqlparse.compat import PY2 from sqlparse.exceptions import SQLParseError @@ -62,16 +59,16 @@ def create_parser(): metavar='CHOICE', dest='keyword_case', choices=_CASE_CHOICES, - help='change case of keywords, CHOICE is one of {0}'.format( - ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES))) + help='change case of keywords, CHOICE is one of {}'.format( + ', '.join('"{}"'.format(x) for x in _CASE_CHOICES))) group.add_argument( '-i', '--identifiers', metavar='CHOICE', dest='identifier_case', choices=_CASE_CHOICES, - help='change case of identifiers, CHOICE is one of {0}'.format( - ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES))) + help='change case of identifiers, CHOICE is one of {}'.format( + ', '.join('"{}"'.format(x) for x in _CASE_CHOICES))) group.add_argument( '-l', '--language', @@ -153,7 +150,7 @@ def create_parser(): def _error(msg): """Print msg and optionally exit with return code exit_.""" - sys.stderr.write(u'[ERROR] {0}\n'.format(msg)) + sys.stderr.write('[ERROR] {}\n'.format(msg)) return 1 @@ -162,29 +159,26 @@ def main(args=None): args = parser.parse_args(args) if args.filename == '-': # read from stdin - if PY2: - data = getreader(args.encoding)(sys.stdin).read() - else: - wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding) - try: - data = wrapper.read() - finally: - wrapper.detach() + wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding) + try: + data = wrapper.read() + finally: + wrapper.detach() else: try: - with open(args.filename, 'r', args.encoding) as f: + with open(args.filename, encoding=args.encoding) as f: data = ''.join(f.readlines()) - except IOError as e: + except OSError as e: return _error( - u'Failed to read {0}: {1}'.format(args.filename, e)) + 'Failed to read {}: {}'.format(args.filename, e)) close_stream = False if args.outfile: try: - stream = open(args.outfile, 'w', args.encoding) + stream = open(args.outfile, 'w', encoding=args.encoding) close_stream = True - except IOError as e: - return _error(u'Failed to open {0}: {1}'.format(args.outfile, e)) + except OSError as e: + return _error('Failed to open {}: {}'.format(args.outfile, e)) else: stream = sys.stdout @@ -192,7 +186,7 @@ def main(args=None): try: formatter_opts = sqlparse.formatter.validate_options(formatter_opts) except SQLParseError as e: - return _error(u'Invalid options: {0}'.format(e)) + return _error('Invalid options: {}'.format(e)) s = sqlparse.format(data, **formatter_opts) stream.write(s) diff --git a/sqlparse/compat.py b/sqlparse/compat.py deleted file mode 100644 index d2214bed..00000000 --- a/sqlparse/compat.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2009-2018 the sqlparse authors and contributors -# -# -# This module is part of python-sqlparse and is released under -# the BSD License: https://opensource.org/licenses/BSD-3-Clause - -"""Python 2/3 compatibility. - -This module only exists to avoid a dependency on six -for very trivial stuff. We only need to take care of -string types, buffers and metaclasses. - -Parts of the code is copied directly from six: -https://bitbucket.org/gutworth/six -""" - -import sys -from io import TextIOBase - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - - -if PY3: - def unicode_compatible(cls): - return cls - - text_type = str - string_types = (str,) - from io import StringIO - file_types = (StringIO, TextIOBase) - - -elif PY2: - def unicode_compatible(cls): - cls.__unicode__ = cls.__str__ - cls.__str__ = lambda x: x.__unicode__().encode('utf-8') - return cls - - text_type = unicode - string_types = (str, unicode,) - from StringIO import StringIO - file_types = (file, StringIO, TextIOBase) diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py index 0b3f3eb5..6d54d514 100644 --- a/sqlparse/engine/__init__.py +++ b/sqlparse/engine/__init__.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py index fc77fd64..9665a224 100644 --- a/sqlparse/engine/filter_stack.py +++ b/sqlparse/engine/filter_stack.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -13,7 +12,7 @@ from sqlparse.engine.statement_splitter import StatementSplitter -class FilterStack(object): +class FilterStack: def __init__(self): self.preprocess = [] self.stmtprocess = [] diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index afc91236..86d8fc64 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -88,6 +87,56 @@ def post(tlist, pidx, tidx, nidx): _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) +def group_tzcasts(tlist): + def match(token): + return token.ttype == T.Keyword.TZCast + + def valid_prev(token): + return token is not None + + def valid_next(token): + return token is not None and ( + token.is_whitespace + or token.match(T.Keyword, 'AS') + or token.match(*sql.TypedLiteral.M_CLOSE) + ) + + def post(tlist, pidx, tidx, nidx): + return pidx, nidx + + _group(tlist, sql.Identifier, match, valid_prev, valid_next, post) + + +def group_typed_literal(tlist): + # definitely not complete, see e.g.: + # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax + # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals + # https://www.postgresql.org/docs/9.1/datatype-datetime.html + # https://www.postgresql.org/docs/9.1/functions-datetime.html + def match(token): + return imt(token, m=sql.TypedLiteral.M_OPEN) + + def match_to_extend(token): + return isinstance(token, sql.TypedLiteral) + + def valid_prev(token): + return token is not None + + def valid_next(token): + return token is not None and token.match(*sql.TypedLiteral.M_CLOSE) + + def valid_final(token): + return token is not None and token.match(*sql.TypedLiteral.M_EXTEND) + + def post(tlist, pidx, tidx, nidx): + return tidx, nidx + + _group(tlist, sql.TypedLiteral, match, valid_prev, valid_next, + post, extend=False) + _group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final, + post, extend=True) + + def group_period(tlist): def match(token): return token.match(T.Punctuation, '.') @@ -121,7 +170,7 @@ def valid_prev(token): return token.normalized == 'NULL' or not token.is_keyword def valid_next(token): - ttypes = T.DML, T.DDL + ttypes = T.DML, T.DDL, T.CTE return not imt(token, t=ttypes) and token is not None def post(tlist, pidx, tidx, nidx): @@ -149,7 +198,7 @@ def post(tlist, pidx, tidx, nidx): def group_comparison(tlist): sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier, - sql.Operation) + sql.Operation, sql.TypedLiteral) ttypes = T_NUMERICAL + T_STRING + T_NAME def match(token): @@ -204,13 +253,16 @@ def post(tlist, pidx, tidx, nidx): def group_operator(tlist): ttypes = T_NUMERICAL + T_STRING + T_NAME sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function, - sql.Identifier, sql.Operation) + sql.Identifier, sql.Operation, sql.TypedLiteral) def match(token): return imt(token, t=(T.Operator, T.Wildcard)) def valid(token): - return imt(token, i=sqlcls, t=ttypes) + return imt(token, i=sqlcls, t=ttypes) \ + or (token and token.match( + T.Keyword, + ('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP'))) def post(tlist, pidx, tidx, nidx): tlist[tidx].ttype = T.Operator @@ -289,12 +341,15 @@ def group_aliased(tlist): def group_functions(tlist): has_create = False has_table = False + has_as = False for tmp_token in tlist.tokens: - if tmp_token.value == 'CREATE': + if tmp_token.value.upper() == 'CREATE': has_create = True - if tmp_token.value == 'TABLE': + if tmp_token.value.upper() == 'TABLE': has_table = True - if has_create and has_table: + if tmp_token.value == 'AS': + has_as = True + if has_create and has_table and not has_as: return tidx, token = tlist.token_next_by(t=T.Name) @@ -358,6 +413,8 @@ def group(stmt): group_identifier, group_order, group_typecasts, + group_tzcasts, + group_typed_literal, group_operator, group_comparison, group_as, @@ -385,6 +442,8 @@ def _group(tlist, cls, match, pidx, prev_ = None, None for idx, token in enumerate(list(tlist)): tidx = idx - tidx_offset + if tidx < 0: # tidx shouldn't get negative + continue if token.is_whitespace: continue diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py index 444b46a9..a991959a 100644 --- a/sqlparse/engine/statement_splitter.py +++ b/sqlparse/engine/statement_splitter.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -9,7 +8,7 @@ from sqlparse import sql, tokens as T -class StatementSplitter(object): +class StatementSplitter: """Filter that split stream at individual statements""" def __init__(self): @@ -27,11 +26,13 @@ def _reset(self): def _change_splitlevel(self, ttype, value): """Get the new split level (increase, decrease or remain equal)""" - # ANSI - # if normal token return - # wouldn't parenthesis increase/decrease a level? - # no, inside a parenthesis can't start new statement - if ttype not in T.Keyword: + + # parenthesis increase/decrease a level + if ttype is T.Punctuation and value == '(': + return 1 + elif ttype is T.Punctuation and value == ')': + return -1 + elif ttype not in T.Keyword: # if normal token return return 0 # Everything after here is ttype = T.Keyword @@ -65,7 +66,7 @@ def _change_splitlevel(self, ttype, value): self._begin_depth = max(0, self._begin_depth - 1) return -1 - if (unified in ('IF', 'FOR', 'WHILE') + if (unified in ('IF', 'FOR', 'WHILE', 'CASE') and self._is_create and self._begin_depth > 0): return 1 @@ -102,5 +103,5 @@ def process(self, stream): self.consume_ws = True # Yield pending statement (if any) - if self.tokens: + if self.tokens and not all(t.is_whitespace for t in self.tokens): yield sql.Statement(self.tokens) diff --git a/sqlparse/exceptions.py b/sqlparse/exceptions.py index 01e60f7b..11285da0 100644 --- a/sqlparse/exceptions.py +++ b/sqlparse/exceptions.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under diff --git a/sqlparse/filters/__init__.py b/sqlparse/filters/__init__.py index c60d84d7..5bd6b325 100644 --- a/sqlparse/filters/__init__.py +++ b/sqlparse/filters/__init__.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py index d3433c94..dc609263 100644 --- a/sqlparse/filters/aligned_indent.py +++ b/sqlparse/filters/aligned_indent.py @@ -1,17 +1,15 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause from sqlparse import sql, tokens as T -from sqlparse.compat import text_type from sqlparse.utils import offset, indent -class AlignedIndentFilter(object): +class AlignedIndentFilter: join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?' r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|' r'(CROSS\s+|NATURAL\s+)?)?JOIN\b') @@ -73,7 +71,7 @@ def _process_case(self, tlist): end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1] cases.append((None, [end_token])) - condition_width = [len(' '.join(map(text_type, cond))) if cond else 0 + condition_width = [len(' '.join(map(str, cond))) if cond else 0 for cond, _ in cases] max_cond_width = max(condition_width) @@ -82,8 +80,7 @@ def _process_case(self, tlist): stmt = cond[0] if cond else value[0] if i > 0: - tlist.insert_before(stmt, self.nl( - offset_ - len(text_type(stmt)))) + tlist.insert_before(stmt, self.nl(offset_ - len(str(stmt)))) if cond: ws = sql.Token(T.Whitespace, self.char * ( max_cond_width - condition_width[i])) @@ -105,12 +102,12 @@ def _split_kwds(self, tlist): # joins, group/order by are special case. only consider the first # word as aligner if ( - token.match(T.Keyword, self.join_words, regex=True) or - token.match(T.Keyword, self.by_words, regex=True) + token.match(T.Keyword, self.join_words, regex=True) + or token.match(T.Keyword, self.by_words, regex=True) ): token_indent = token.value.split()[0] else: - token_indent = text_type(token) + token_indent = str(token) tlist.insert_before(token, self.nl(token_indent)) tidx += 1 tidx, token = self._next_token(tlist, tidx) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 52b86170..6905f2d6 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -1,22 +1,36 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause +import re + from sqlparse import sql, tokens as T from sqlparse.utils import split_unquoted_newlines -class StripCommentsFilter(object): +class StripCommentsFilter: + @staticmethod def _process(tlist): def get_next_comment(): # TODO(andi) Comment types should be unified, see related issue38 return tlist.token_next_by(i=sql.Comment, t=T.Comment) + def _get_insert_token(token): + """Returns either a whitespace or the line breaks from token.""" + # See issue484 why line breaks should be preserved. + # Note: The actual value for a line break is replaced by \n + # in SerializerUnicode which will be executed in the + # postprocessing state. + m = re.search(r'((\r|\n)+) *$', token.value) + if m is not None: + return sql.Token(T.Whitespace.Newline, m.groups()[0]) + else: + return sql.Token(T.Whitespace, ' ') + tidx, token = get_next_comment() while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) @@ -27,15 +41,12 @@ def get_next_comment(): or prev_.is_whitespace or prev_.match(T.Punctuation, '(') or next_.is_whitespace or next_.match(T.Punctuation, ')')): # Insert a whitespace to ensure the following SQL produces - # a valid SQL (see #425). For example: - # - # Before: select a--comment\nfrom foo - # After: select a from foo - if prev_ is not None and next_ is None: - tlist.tokens.insert(tidx, sql.Token(T.Whitespace, ' ')) + # a valid SQL (see #425). + if prev_ is not None and not prev_.match(T.Punctuation, '('): + tlist.tokens.insert(tidx, _get_insert_token(token)) tlist.tokens.remove(token) else: - tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') + tlist.tokens[tidx] = _get_insert_token(token) tidx, token = get_next_comment() @@ -45,7 +56,7 @@ def process(self, stmt): return stmt -class StripWhitespaceFilter(object): +class StripWhitespaceFilter: def _stripws(self, tlist): func_name = '_stripws_{cls}'.format(cls=type(tlist).__name__) func = getattr(self, func_name.lower(), self._stripws_default) @@ -90,7 +101,7 @@ def process(self, stmt, depth=0): return stmt -class SpacesAroundOperatorsFilter(object): +class SpacesAroundOperatorsFilter: @staticmethod def _process(tlist): @@ -118,7 +129,7 @@ def process(self, stmt): # --------------------------- # postprocess -class SerializerUnicode(object): +class SerializerUnicode: @staticmethod def process(stmt): lines = split_unquoted_newlines(stmt) diff --git a/sqlparse/filters/output.py b/sqlparse/filters/output.py index 3fbc46d1..253537e0 100644 --- a/sqlparse/filters/output.py +++ b/sqlparse/filters/output.py @@ -1,16 +1,14 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause from sqlparse import sql, tokens as T -from sqlparse.compat import text_type -class OutputFilter(object): +class OutputFilter: varname_prefix = '' def __init__(self, varname='sql'): @@ -23,11 +21,11 @@ def _process(self, stream, varname, has_nl): def process(self, stmt): self.count += 1 if self.count > 1: - varname = u'{f.varname}{f.count}'.format(f=self) + varname = '{f.varname}{f.count}'.format(f=self) else: varname = self.varname - has_nl = len(text_type(stmt).strip().splitlines()) > 1 + has_nl = len(str(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py index acec8ca4..9fb232f0 100644 --- a/sqlparse/filters/reindent.py +++ b/sqlparse/filters/reindent.py @@ -1,17 +1,15 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause from sqlparse import sql, tokens as T -from sqlparse.compat import text_type from sqlparse.utils import offset, indent -class ReindentFilter(object): +class ReindentFilter: def __init__(self, width=2, char=' ', wrap_after=0, n='\n', comma_first=False, indent_after_first=False, indent_columns=False): @@ -42,7 +40,7 @@ def leading_ws(self): return self.offset + self.indent * self.width def _get_offset(self, token): - raw = u''.join(map(text_type, self._flatten_up_to_token(token))) + raw = ''.join(map(str, self._flatten_up_to_token(token))) line = (raw or '\n').splitlines()[-1] # Now take current offset into account and return relative offset. return len(line) - len(self.char * self.leading_ws) @@ -71,7 +69,7 @@ def _split_kwds(self, tlist): tidx, token = self._next_token(tlist) while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) - uprev = text_type(prev_) + uprev = str(prev_) if prev_ and prev_.is_whitespace: del tlist.tokens[pidx] @@ -104,9 +102,10 @@ def _process(self, tlist): def _process_where(self, tlist): tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE')) + if not token: + return # issue121, errors in statement fixed?? tlist.insert_before(tidx, self.nl()) - with indent(self): self._process_default(tlist) @@ -114,6 +113,8 @@ def _process_parenthesis(self, tlist): ttypes = T.Keyword.DML, T.Keyword.DDL _, is_dml_dll = tlist.token_next_by(t=ttypes) fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN) + if first is None: + return with indent(self, 1 if is_dml_dll else 0): tlist.tokens.insert(0, self.nl()) if is_dml_dll else None @@ -234,7 +235,7 @@ def process(self, stmt): self._process(stmt) if self._last_stmt is not None: - nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n' + nl = '\n' if str(self._last_stmt).endswith('\n') else '\n\n' stmt.tokens.insert(0, sql.Token(T.Whitespace, nl)) self._last_stmt = stmt diff --git a/sqlparse/filters/right_margin.py b/sqlparse/filters/right_margin.py index 16581389..3e670562 100644 --- a/sqlparse/filters/right_margin.py +++ b/sqlparse/filters/right_margin.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -9,11 +8,10 @@ import re from sqlparse import sql, tokens as T -from sqlparse.compat import text_type # FIXME: Doesn't work -class RightMarginFilter(object): +class RightMarginFilter: keep_together = ( # sql.TypeCast, sql.Identifier, sql.Alias, ) @@ -32,14 +30,14 @@ def _process(self, group, stream): elif token.is_group and type(token) not in self.keep_together: token.tokens = self._process(token, token.tokens) else: - val = text_type(token) + val = str(token) if len(self.line) + len(val) > self.width: match = re.search(r'^ +', self.line) if match is not None: indent = match.group() else: indent = '' - yield sql.Token(T.Whitespace, '\n{0}'.format(indent)) + yield sql.Token(T.Whitespace, '\n{}'.format(indent)) self.line = indent self.line += val yield token diff --git a/sqlparse/filters/tokens.py b/sqlparse/filters/tokens.py index 93182b19..cc00a844 100644 --- a/sqlparse/filters/tokens.py +++ b/sqlparse/filters/tokens.py @@ -1,21 +1,19 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause from sqlparse import tokens as T -from sqlparse.compat import text_type -class _CaseFilter(object): +class _CaseFilter: ttype = None def __init__(self, case=None): case = case or 'upper' - self.convert = getattr(text_type, case) + self.convert = getattr(str, case) def process(self, stream): for ttype, value in stream: @@ -38,7 +36,7 @@ def process(self, stream): yield ttype, value -class TruncateStringFilter(object): +class TruncateStringFilter: def __init__(self, width, char): self.width = width self.char = char diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py index 89627596..1d1871cf 100644 --- a/sqlparse/formatter.py +++ b/sqlparse/formatter.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -17,32 +16,32 @@ def validate_options(options): kwcase = options.get('keyword_case') if kwcase not in [None, 'upper', 'lower', 'capitalize']: raise SQLParseError('Invalid value for keyword_case: ' - '{0!r}'.format(kwcase)) + '{!r}'.format(kwcase)) idcase = options.get('identifier_case') if idcase not in [None, 'upper', 'lower', 'capitalize']: raise SQLParseError('Invalid value for identifier_case: ' - '{0!r}'.format(idcase)) + '{!r}'.format(idcase)) ofrmt = options.get('output_format') if ofrmt not in [None, 'sql', 'python', 'php']: raise SQLParseError('Unknown output format: ' - '{0!r}'.format(ofrmt)) + '{!r}'.format(ofrmt)) strip_comments = options.get('strip_comments', False) if strip_comments not in [True, False]: raise SQLParseError('Invalid value for strip_comments: ' - '{0!r}'.format(strip_comments)) + '{!r}'.format(strip_comments)) space_around_operators = options.get('use_space_around_operators', False) if space_around_operators not in [True, False]: raise SQLParseError('Invalid value for use_space_around_operators: ' - '{0!r}'.format(space_around_operators)) + '{!r}'.format(space_around_operators)) strip_ws = options.get('strip_whitespace', False) if strip_ws not in [True, False]: raise SQLParseError('Invalid value for strip_whitespace: ' - '{0!r}'.format(strip_ws)) + '{!r}'.format(strip_ws)) truncate_strings = options.get('truncate_strings') if truncate_strings is not None: @@ -50,17 +49,17 @@ def validate_options(options): truncate_strings = int(truncate_strings) except (ValueError, TypeError): raise SQLParseError('Invalid value for truncate_strings: ' - '{0!r}'.format(truncate_strings)) + '{!r}'.format(truncate_strings)) if truncate_strings <= 1: raise SQLParseError('Invalid value for truncate_strings: ' - '{0!r}'.format(truncate_strings)) + '{!r}'.format(truncate_strings)) options['truncate_strings'] = truncate_strings options['truncate_char'] = options.get('truncate_char', '[...]') indent_columns = options.get('indent_columns', False) if indent_columns not in [True, False]: raise SQLParseError('Invalid value for indent_columns: ' - '{0!r}'.format(indent_columns)) + '{!r}'.format(indent_columns)) elif indent_columns: options['reindent'] = True # enforce reindent options['indent_columns'] = indent_columns @@ -68,27 +67,27 @@ def validate_options(options): reindent = options.get('reindent', False) if reindent not in [True, False]: raise SQLParseError('Invalid value for reindent: ' - '{0!r}'.format(reindent)) + '{!r}'.format(reindent)) elif reindent: options['strip_whitespace'] = True reindent_aligned = options.get('reindent_aligned', False) if reindent_aligned not in [True, False]: raise SQLParseError('Invalid value for reindent_aligned: ' - '{0!r}'.format(reindent)) + '{!r}'.format(reindent)) elif reindent_aligned: options['strip_whitespace'] = True indent_after_first = options.get('indent_after_first', False) if indent_after_first not in [True, False]: raise SQLParseError('Invalid value for indent_after_first: ' - '{0!r}'.format(indent_after_first)) + '{!r}'.format(indent_after_first)) options['indent_after_first'] = indent_after_first indent_tabs = options.get('indent_tabs', False) if indent_tabs not in [True, False]: raise SQLParseError('Invalid value for indent_tabs: ' - '{0!r}'.format(indent_tabs)) + '{!r}'.format(indent_tabs)) elif indent_tabs: options['indent_char'] = '\t' else: diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index 602051a0..b45f3e0f 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -1,97 +1,96 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause -import re - from sqlparse import tokens - -def is_keyword(value): - val = value.upper() - return (KEYWORDS_COMMON.get(val) - or KEYWORDS_ORACLE.get(val) - or KEYWORDS_PLPGSQL.get(val) - or KEYWORDS.get(val, tokens.Name)), value - - -SQL_REGEX = { - 'root': [ - (r'(--|//|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), - (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), - - (r'(--|//|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), - (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), - - (r'(\r\n|\r|\n)', tokens.Newline), - (r'\s+?', tokens.Whitespace), - - (r':=', tokens.Assignment), - (r'::', tokens.Punctuation), - - (r'\*', tokens.Wildcard), - - (r"`(``|[^`])*`", tokens.Name), - (r"´(´´|[^´])*´", tokens.Name), - (r'(\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal), - - (r'\?', tokens.Name.Placeholder), - (r'%(\(\w+\))?s', tokens.Name.Placeholder), - (r'(?=~!]+', tokens.Operator.Comparison), - (r'[+/@#%^&|`?^-]+', tokens.Operator), - ]} - -FLAGS = re.IGNORECASE | re.UNICODE -SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']] +# object() only supports "is" and is useful as a marker +# use this marker to specify that the given regex in SQL_REGEX +# shall be processed further through a lookup in the KEYWORDS dictionaries +PROCESS_AS_KEYWORD = object() + + +SQL_REGEX = [ + (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), + (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), + + (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), + (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), + + (r'(\r\n|\r|\n)', tokens.Newline), + (r'\s+?', tokens.Whitespace), + + (r':=', tokens.Assignment), + (r'::', tokens.Punctuation), + + (r'\*', tokens.Wildcard), + + (r"`(``|[^`])*`", tokens.Name), + (r"´(´´|[^´])*´", tokens.Name), + (r'((?=~!]+', tokens.Operator.Comparison), + (r'[+/@#%^&|^-]+', tokens.Operator), +] KEYWORDS = { 'ABORT': tokens.Keyword, @@ -191,7 +190,7 @@ def is_keyword(value): 'CONVERSION': tokens.Keyword, 'CONVERT': tokens.Keyword, 'COPY': tokens.Keyword, - 'CORRESPONTING': tokens.Keyword, + 'CORRESPONDING': tokens.Keyword, 'COUNT': tokens.Keyword, 'CREATEDB': tokens.Keyword, 'CREATEUSER': tokens.Keyword, @@ -235,6 +234,7 @@ def is_keyword(value): 'DISABLE': tokens.Keyword, 'DISCONNECT': tokens.Keyword, 'DISPATCH': tokens.Keyword, + 'DIV': tokens.Operator, 'DO': tokens.Keyword, 'DOMAIN': tokens.Keyword, 'DYNAMIC': tokens.Keyword, @@ -289,7 +289,6 @@ def is_keyword(value): 'GRANTED': tokens.Keyword, 'GROUPING': tokens.Keyword, - 'HANDLER': tokens.Keyword, 'HAVING': tokens.Keyword, 'HIERARCHY': tokens.Keyword, 'HOLD': tokens.Keyword, @@ -309,7 +308,7 @@ def is_keyword(value): 'INCREMENT': tokens.Keyword, 'INDEX': tokens.Keyword, - 'INDITCATOR': tokens.Keyword, + 'INDICATOR': tokens.Keyword, 'INFIX': tokens.Keyword, 'INHERITS': tokens.Keyword, 'INITIAL': tokens.Keyword, @@ -426,11 +425,11 @@ def is_keyword(value): 'PARAMETER': tokens.Keyword, 'PARAMETERS': tokens.Keyword, 'PARAMETER_MODE': tokens.Keyword, - 'PARAMATER_NAME': tokens.Keyword, - 'PARAMATER_ORDINAL_POSITION': tokens.Keyword, + 'PARAMETER_NAME': tokens.Keyword, + 'PARAMETER_ORDINAL_POSITION': tokens.Keyword, 'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword, 'PARAMETER_SPECIFIC_NAME': tokens.Keyword, - 'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword, + 'PARAMETER_SPECIFIC_SCHEMA': tokens.Keyword, 'PARTIAL': tokens.Keyword, 'PASCAL': tokens.Keyword, 'PCTFREE': tokens.Keyword, @@ -634,14 +633,20 @@ def is_keyword(value): 'DATE': tokens.Name.Builtin, 'DEC': tokens.Name.Builtin, 'DECIMAL': tokens.Name.Builtin, + 'FILE_TYPE': tokens.Name.Builtin, 'FLOAT': tokens.Name.Builtin, 'INT': tokens.Name.Builtin, 'INT8': tokens.Name.Builtin, 'INTEGER': tokens.Name.Builtin, 'INTERVAL': tokens.Name.Builtin, 'LONG': tokens.Name.Builtin, + 'NATURALN': tokens.Name.Builtin, + 'NVARCHAR': tokens.Name.Builtin, 'NUMBER': tokens.Name.Builtin, 'NUMERIC': tokens.Name.Builtin, + 'PLS_INTEGER': tokens.Name.Builtin, + 'POSITIVE': tokens.Name.Builtin, + 'POSITIVEN': tokens.Name.Builtin, 'REAL': tokens.Name.Builtin, 'ROWID': tokens.Name.Builtin, 'ROWLABEL': tokens.Name.Builtin, @@ -649,11 +654,18 @@ def is_keyword(value): 'SERIAL': tokens.Name.Builtin, 'SERIAL8': tokens.Name.Builtin, 'SIGNED': tokens.Name.Builtin, + 'SIGNTYPE': tokens.Name.Builtin, + 'SIMPLE_DOUBLE': tokens.Name.Builtin, + 'SIMPLE_FLOAT': tokens.Name.Builtin, + 'SIMPLE_INTEGER': tokens.Name.Builtin, 'SMALLINT': tokens.Name.Builtin, + 'SYS_REFCURSOR': tokens.Name.Builtin, 'SYSDATE': tokens.Name, 'TEXT': tokens.Name.Builtin, 'TINYINT': tokens.Name.Builtin, 'UNSIGNED': tokens.Name.Builtin, + 'UROWID': tokens.Name.Builtin, + 'UTL_FILE': tokens.Name.Builtin, 'VARCHAR': tokens.Name.Builtin, 'VARCHAR2': tokens.Name.Builtin, 'VARYING': tokens.Name.Builtin, @@ -727,6 +739,7 @@ def is_keyword(value): 'DOUBLE': tokens.Keyword, 'DUMP': tokens.Keyword, + 'ELSIF': tokens.Keyword, 'EVENTS': tokens.Keyword, 'EXCEPTIONS': tokens.Keyword, 'EXPLAIN': tokens.Keyword, @@ -815,6 +828,8 @@ def is_keyword(value): # PostgreSQL Syntax KEYWORDS_PLPGSQL = { + 'CONFLICT': tokens.Keyword, + 'WINDOW': tokens.Keyword, 'PARTITION': tokens.Keyword, 'OVER': tokens.Keyword, 'PERFORM': tokens.Keyword, @@ -822,6 +837,7 @@ def is_keyword(value): 'PLPGSQL': tokens.Keyword, 'INHERIT': tokens.Keyword, 'INDEXES': tokens.Keyword, + 'ON_ERROR_STOP': tokens.Keyword, 'BYTEA': tokens.Keyword, 'BIGSERIAL': tokens.Keyword, @@ -854,3 +870,92 @@ def is_keyword(value): 'IN': tokens.Keyword, 'LOOP': tokens.Keyword, } + +# Hive Syntax +KEYWORDS_HQL = { + 'EXPLODE': tokens.Keyword, + 'DIRECTORY': tokens.Keyword, + 'DISTRIBUTE': tokens.Keyword, + 'INCLUDE': tokens.Keyword, + 'LOCATE': tokens.Keyword, + 'OVERWRITE': tokens.Keyword, + 'POSEXPLODE': tokens.Keyword, + + 'ARRAY_CONTAINS': tokens.Keyword, + 'CMP': tokens.Keyword, + 'COLLECT_LIST': tokens.Keyword, + 'CONCAT': tokens.Keyword, + 'CONDITION': tokens.Keyword, + 'DATE_ADD': tokens.Keyword, + 'DATE_SUB': tokens.Keyword, + 'DECODE': tokens.Keyword, + 'DBMS_OUTPUT': tokens.Keyword, + 'ELEMENTS': tokens.Keyword, + 'EXCHANGE': tokens.Keyword, + 'EXTENDED': tokens.Keyword, + 'FLOOR': tokens.Keyword, + 'FOLLOWING': tokens.Keyword, + 'FROM_UNIXTIME': tokens.Keyword, + 'FTP': tokens.Keyword, + 'HOUR': tokens.Keyword, + 'INLINE': tokens.Keyword, + 'INSTR': tokens.Keyword, + 'LEN': tokens.Keyword, + 'MAP': tokens.Name.Builtin, + 'MAXELEMENT': tokens.Keyword, + 'MAXINDEX': tokens.Keyword, + 'MAX_PART_DATE': tokens.Keyword, + 'MAX_PART_INT': tokens.Keyword, + 'MAX_PART_STRING': tokens.Keyword, + 'MINELEMENT': tokens.Keyword, + 'MININDEX': tokens.Keyword, + 'MIN_PART_DATE': tokens.Keyword, + 'MIN_PART_INT': tokens.Keyword, + 'MIN_PART_STRING': tokens.Keyword, + 'NOW': tokens.Keyword, + 'NVL': tokens.Keyword, + 'NVL2': tokens.Keyword, + 'PARSE_URL_TUPLE': tokens.Keyword, + 'PART_LOC': tokens.Keyword, + 'PART_COUNT': tokens.Keyword, + 'PART_COUNT_BY': tokens.Keyword, + 'PRINT': tokens.Keyword, + 'PUT_LINE': tokens.Keyword, + 'RANGE': tokens.Keyword, + 'REDUCE': tokens.Keyword, + 'REGEXP_REPLACE': tokens.Keyword, + 'RESIGNAL': tokens.Keyword, + 'RTRIM': tokens.Keyword, + 'SIGN': tokens.Keyword, + 'SIGNAL': tokens.Keyword, + 'SIN': tokens.Keyword, + 'SPLIT': tokens.Keyword, + 'SQRT': tokens.Keyword, + 'STACK': tokens.Keyword, + 'STR': tokens.Keyword, + 'STRING': tokens.Name.Builtin, + 'STRUCT': tokens.Name.Builtin, + 'SUBSTR': tokens.Keyword, + 'SUMMARY': tokens.Keyword, + 'TBLPROPERTIES': tokens.Keyword, + 'TIMESTAMP': tokens.Name.Builtin, + 'TIMESTAMP_ISO': tokens.Keyword, + 'TO_CHAR': tokens.Keyword, + 'TO_DATE': tokens.Keyword, + 'TO_TIMESTAMP': tokens.Keyword, + 'TRUNC': tokens.Keyword, + 'UNBOUNDED': tokens.Keyword, + 'UNIQUEJOIN': tokens.Keyword, + 'UNIX_TIMESTAMP': tokens.Keyword, + 'UTC_TIMESTAMP': tokens.Keyword, + 'VIEWS': tokens.Keyword, + + 'EXIT': tokens.Keyword, + 'BREAK': tokens.Keyword, + 'LEAVE': tokens.Keyword, +} + + +KEYWORDS_MSACCESS = { + 'DISTINCTROW': tokens.Keyword, +} diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index fd007a4c..9d25c9e6 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -1,31 +1,104 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause """SQL Lexer""" +import re # This code is based on the SqlLexer in pygments. # http://pygments.org/ # It's separated from the rest of pygments to increase performance # and to allow some customizations. -from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX -from sqlparse.compat import text_type, file_types +from io import TextIOBase + +from sqlparse import tokens, keywords from sqlparse.utils import consume -class Lexer(object): - """Lexer - Empty class. Leaving for backwards-compatibility - """ +class Lexer: + """The Lexer supports configurable syntax. + To add support for additional keywords, use the `add_keywords` method.""" + + _default_intance = None + + # Development notes: + # - This class is prepared to be able to support additional SQL dialects + # in the future by adding additional functions that take the place of + # the function default_initialization() + # - The lexer class uses an explicit singleton behavior with the + # instance-getter method get_default_instance(). This mechanism has + # the advantage that the call signature of the entry-points to the + # sqlparse library are not affected. Also, usage of sqlparse in third + # party code does not need to be adapted. On the other hand, singleton + # behavior is not thread safe, and the current implementation does not + # easily allow for multiple SQL dialects to be parsed in the same + # process. Such behavior can be supported in the future by passing a + # suitably initialized lexer object as an additional parameter to the + # entry-point functions (such as `parse`). Code will need to be written + # to pass down and utilize such an object. The current implementation + # is prepared to support this thread safe approach without the + # default_instance part needing to change interface. + + @classmethod + def get_default_instance(cls): + """Returns the lexer instance used internally + by the sqlparse core functions.""" + if cls._default_intance is None: + cls._default_intance = cls() + cls._default_intance.default_initialization() + return cls._default_intance + + def default_initialization(self): + """Initialize the lexer with default dictionaries. + Useful if you need to revert custom syntax settings.""" + self.clear() + self.set_SQL_REGEX(keywords.SQL_REGEX) + self.add_keywords(keywords.KEYWORDS_COMMON) + self.add_keywords(keywords.KEYWORDS_ORACLE) + self.add_keywords(keywords.KEYWORDS_PLPGSQL) + self.add_keywords(keywords.KEYWORDS_HQL) + self.add_keywords(keywords.KEYWORDS_MSACCESS) + self.add_keywords(keywords.KEYWORDS) + + def clear(self): + """Clear all syntax configurations. + Useful if you want to load a reduced set of syntax configurations. + After this call, regexps and keyword dictionaries need to be loaded + to make the lexer functional again.""" + self._SQL_REGEX = [] + self._keywords = [] + + def set_SQL_REGEX(self, SQL_REGEX): + """Set the list of regex that will parse the SQL.""" + FLAGS = re.IGNORECASE | re.UNICODE + self._SQL_REGEX = [ + (re.compile(rx, FLAGS).match, tt) + for rx, tt in SQL_REGEX + ] + + def add_keywords(self, keywords): + """Add keyword dictionaries. Keywords are looked up in the same order + that dictionaries were added.""" + self._keywords.append(keywords) + + def is_keyword(self, value): + """Checks for a keyword. + + If the given value is in one of the KEYWORDS_* dictionary + it's considered a keyword. Otherwise, tokens.Name is returned. + """ + val = value.upper() + for kwdict in self._keywords: + if val in kwdict: + return kwdict[val], value + else: + return tokens.Name, value - @staticmethod - def get_tokens(text, encoding=None): + def get_tokens(self, text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism @@ -38,10 +111,10 @@ def get_tokens(text, encoding=None): ``stack`` is the initial stack (default: ``['root']``) """ - if isinstance(text, file_types): + if isinstance(text, TextIOBase): text = text.read() - if isinstance(text, text_type): + if isinstance(text, str): pass elif isinstance(text, bytes): if encoding: @@ -52,20 +125,20 @@ def get_tokens(text, encoding=None): except UnicodeDecodeError: text = text.decode('unicode-escape') else: - raise TypeError(u"Expected text or file-like object, got {!r}". + raise TypeError("Expected text or file-like object, got {!r}". format(type(text))) iterable = enumerate(text) for pos, char in iterable: - for rexmatch, action in SQL_REGEX: + for rexmatch, action in self._SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() - elif callable(action): - yield action(m.group()) + elif action is keywords.PROCESS_AS_KEYWORD: + yield self.is_keyword(m.group()) consume(iterable, m.end() - pos - 1) break @@ -79,4 +152,4 @@ def tokenize(sql, encoding=None): Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ - return Lexer().get_tokens(sql, encoding) + return Lexer.get_default_instance().get_tokens(sql, encoding) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index a752555c..1ccfbdbe 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -1,23 +1,42 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause """This module contains classes representing syntactical elements of SQL.""" -from __future__ import print_function import re from sqlparse import tokens as T -from sqlparse.compat import string_types, text_type, unicode_compatible from sqlparse.utils import imt, remove_quotes -@unicode_compatible -class Token(object): +class NameAliasMixin: + """Implements get_real_name and get_alias.""" + + def get_real_name(self): + """Returns the real name (object name) of this identifier.""" + # a.b + dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.')) + return self._get_first_name(dot_idx, real_name=True) + + def get_alias(self): + """Returns the alias for this identifier or ``None``.""" + + # "name AS alias" + kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS')) + if kw is not None: + return self._get_first_name(kw_idx + 1, keywords=True) + + # "name alias" or "complicated column expression alias" + _, ws = self.token_next_by(t=T.Whitespace) + if len(self.tokens) > 2 and ws is not None: + return self._get_first_name(reverse=True) + + +class Token: """Base class for all other classes in this module. It represents a single token and has two instance attributes: @@ -29,7 +48,7 @@ class Token(object): 'is_group', 'is_whitespace') def __init__(self, ttype, value): - value = text_type(value) + value = str(value) self.value = value self.ttype = ttype self.parent = None @@ -49,15 +68,15 @@ def __repr__(self): cls = self._get_repr_name() value = self._get_repr_value() - q = u'"' if value.startswith("'") and value.endswith("'") else u"'" - return u"<{cls} {q}{value}{q} at 0x{id:2X}>".format( + q = '"' if value.startswith("'") and value.endswith("'") else "'" + return "<{cls} {q}{value}{q} at 0x{id:2X}>".format( id=id(self), **locals()) def _get_repr_name(self): return str(self.ttype).split('.')[-1] def _get_repr_value(self): - raw = text_type(self) + raw = str(self) if len(raw) > 7: raw = raw[:6] + '...' return re.sub(r'\s+', ' ', raw) @@ -82,7 +101,7 @@ def match(self, ttype, values, regex=False): if not type_matched or values is None: return type_matched - if isinstance(values, string_types): + if isinstance(values, str): values = (values,) if regex: @@ -127,7 +146,6 @@ def has_ancestor(self, other): return False -@unicode_compatible class TokenList(Token): """A group of tokens. @@ -139,12 +157,12 @@ class TokenList(Token): def __init__(self, tokens=None): self.tokens = tokens or [] - [setattr(token, 'parent', self) for token in tokens] - super(TokenList, self).__init__(None, text_type(self)) + [setattr(token, 'parent', self) for token in self.tokens] + super().__init__(None, str(self)) self.is_group = True def __str__(self): - return u''.join(token.value for token in self.flatten()) + return ''.join(token.value for token in self.flatten()) # weird bug # def __len__(self): @@ -167,14 +185,14 @@ def _pprint_tree(self, max_depth=None, depth=0, f=None, _pre=''): value = token._get_repr_value() last = idx == (token_count - 1) - pre = u'`- ' if last else u'|- ' + pre = '`- ' if last else '|- ' - q = u'"' if value.startswith("'") and value.endswith("'") else u"'" - print(u"{_pre}{pre}{idx} {cls} {q}{value}{q}" + q = '"' if value.startswith("'") and value.endswith("'") else "'" + print("{_pre}{pre}{idx} {cls} {q}{value}{q}" .format(**locals()), file=f) if token.is_group and (max_depth is None or depth < max_depth): - parent_pre = u' ' if last else u'| ' + parent_pre = ' ' if last else '| ' token._pprint_tree(max_depth, depth + 1, f, _pre + parent_pre) def get_token_at_offset(self, offset): @@ -193,8 +211,7 @@ def flatten(self): """ for token in self.tokens: if token.is_group: - for item in token.flatten(): - yield item + yield from token.flatten() else: yield token @@ -217,16 +234,16 @@ def _token_matching(self, funcs, start=0, end=None, reverse=False): if reverse: assert end is None - for idx in range(start - 2, -1, -1): - token = self.tokens[idx] - for func in funcs: - if func(token): - return idx, token + indexes = range(start - 2, -1, -1) else: - for idx, token in enumerate(self.tokens[start:end], start=start): - for func in funcs: - if func(token): - return idx, token + if end is None: + end = len(self.tokens) + indexes = range(start, end) + for idx in indexes: + token = self.tokens[idx] + for func in funcs: + if func(token): + return idx, token return None, None def token_first(self, skip_ws=True, skip_cm=False): @@ -239,15 +256,14 @@ def token_first(self, skip_ws=True, skip_cm=False): ignored too. """ # this on is inconsistent, using Comment instead of T.Comment... - funcs = lambda tk: not ((skip_ws and tk.is_whitespace) - or (skip_cm and imt(tk, - t=T.Comment, i=Comment))) - return self._token_matching(funcs)[1] + def matcher(tk): + return not ((skip_ws and tk.is_whitespace) + or (skip_cm and imt(tk, t=T.Comment, i=Comment))) + return self._token_matching(matcher)[1] def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None): - funcs = lambda tk: imt(tk, i, m, t) idx += 1 - return self._token_matching(funcs, idx, end) + return self._token_matching(lambda tk: imt(tk, i, m, t), idx, end) def token_not_matching(self, funcs, idx): funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs @@ -277,10 +293,11 @@ def token_next(self, idx, skip_ws=True, skip_cm=False, _reverse=False): if idx is None: return None, None idx += 1 # alot of code usage current pre-compensates for this - funcs = lambda tk: not ((skip_ws and tk.is_whitespace) - or (skip_cm and imt(tk, - t=T.Comment, i=Comment))) - return self._token_matching(funcs, idx, reverse=_reverse) + + def matcher(tk): + return not ((skip_ws and tk.is_whitespace) + or (skip_cm and imt(tk, t=T.Comment, i=Comment))) + return self._token_matching(matcher, idx, reverse=_reverse) def token_index(self, token, start=0): """Return list index of token.""" @@ -305,7 +322,7 @@ def group_tokens(self, grp_cls, start, end, include_end=True, grp = start grp.tokens.extend(subtokens) del self.tokens[start_idx + 1:end_idx] - grp.value = text_type(start) + grp.value = str(start) else: subtokens = self.tokens[start_idx:end_idx] grp = grp_cls(subtokens) @@ -341,16 +358,7 @@ def has_alias(self): def get_alias(self): """Returns the alias for this identifier or ``None``.""" - - # "name AS alias" - kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS')) - if kw is not None: - return self._get_first_name(kw_idx + 1, keywords=True) - - # "name alias" or "complicated column expression alias" - _, ws = self.token_next_by(t=T.Whitespace) - if len(self.tokens) > 2 and ws is not None: - return self._get_first_name(reverse=True) + return None def get_name(self): """Returns the name of this identifier. @@ -363,9 +371,7 @@ def get_name(self): def get_real_name(self): """Returns the real name (object name) of this identifier.""" - # a.b - dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.')) - return self._get_first_name(dot_idx, real_name=True) + return None def get_parent_name(self): """Return name of the parent object if any. @@ -407,33 +413,34 @@ def get_type(self): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) - if first_token is None: + token = self.token_first(skip_cm=True) + if token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. return 'UNKNOWN' - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized + elif token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return token.normalized - elif first_token.ttype == T.Keyword.CTE: + elif token.ttype == T.Keyword.CTE: # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - fidx = self.token_index(first_token) - tidx, token = self.token_next(fidx, skip_ws=True) - if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_next(tidx, skip_ws=True) + tidx = self.token_index(token) + while tidx is not None: + tidx, token = self.token_next(tidx, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + tidx, token = self.token_next(tidx, skip_ws=True) - if dml_keyword is not None \ - and dml_keyword.ttype == T.Keyword.DML: - return dml_keyword.normalized + if token is not None \ + and token.ttype == T.Keyword.DML: + return token.normalized # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' -class Identifier(TokenList): +class Identifier(NameAliasMixin, TokenList): """Represents an identifier. Identifiers may have aliases or typecasts. @@ -477,6 +484,13 @@ def get_identifiers(self): yield token +class TypedLiteral(TokenList): + """A typed literal, such as "date '2001-09-28'" or "interval '2 hours'".""" + M_OPEN = [(T.Name.Builtin, None), (T.Keyword, "TIMESTAMP")] + M_CLOSE = T.String.Single, None + M_EXTEND = T.Keyword, ("DAY", "HOUR", "MINUTE", "MONTH", "SECOND", "YEAR") + + class Parenthesis(TokenList): """Tokens between parenthesis.""" M_OPEN = T.Punctuation, '(' @@ -599,7 +613,7 @@ def get_cases(self, skip_ws=False): return ret -class Function(TokenList): +class Function(NameAliasMixin, TokenList): """A function or procedure call.""" def get_parameters(self): diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py index eefc0b49..d92bbdcf 100644 --- a/sqlparse/tokens.py +++ b/sqlparse/tokens.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 3283274d..512f0385 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- # -# Copyright (C) 2009-2018 the sqlparse authors and contributors +# Copyright (C) 2009-2020 the sqlparse authors and contributors # # # This module is part of python-sqlparse and is released under @@ -10,7 +9,6 @@ import re from collections import deque from contextlib import contextmanager -from sqlparse.compat import text_type # This regular expression replaces the home-cooked parser that was here before. # It is much faster, but requires an extra post-processing step to get the @@ -40,7 +38,7 @@ def split_unquoted_newlines(stmt): Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite character is inside of a string.""" - text = text_type(stmt) + text = str(stmt) lines = SPLIT_REGEX.split(text) outputlines = [''] for line in lines: @@ -57,7 +55,7 @@ def remove_quotes(val): """Helper that removes surrounding quotes from strings.""" if val is None: return - if val[0] in ('"', "'") and val[0] == val[-1]: + if val[0] in ('"', "'", '`') and val[0] == val[-1]: val = val[1:-1] return val diff --git a/tests/conftest.py b/tests/conftest.py index f2473a43..939c481d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Helpers for testing.""" import io @@ -35,7 +33,7 @@ def make_load_file(filename, encoding='utf-8'): # https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function/33879151#33879151 # Syntax is noisy and requires specific variable names # And seems to be limited to only 1 argument. - with io.open(filepath(filename), encoding=encoding) as f: + with open(filepath(filename), encoding=encoding) as f: return f.read().strip() return make_load_file @@ -44,6 +42,6 @@ def make_load_file(filename, encoding='utf-8'): @pytest.fixture() def get_stream(filepath): def make_stream(filename, encoding='utf-8'): - return io.open(filepath(filename), encoding=encoding) + return open(filepath(filename), encoding=encoding) return make_stream diff --git a/tests/files/casewhen_procedure.sql b/tests/files/casewhen_procedure.sql new file mode 100644 index 00000000..e590d49b --- /dev/null +++ b/tests/files/casewhen_procedure.sql @@ -0,0 +1,8 @@ +create procedure procName() +begin + select case when column = 'value' then column else 0 end; +end; +create procedure procName() +begin + select 1; +end; diff --git a/tests/files/mysql_handler.sql b/tests/files/mysql_handler.sql new file mode 100644 index 00000000..702374ef --- /dev/null +++ b/tests/files/mysql_handler.sql @@ -0,0 +1,10 @@ +create procedure proc1() +begin + declare handler for foo begin end; + select 1; +end; + +create procedure proc2() +begin + select 1; +end; diff --git a/tests/files/slashcomment.sql b/tests/files/slashcomment.sql deleted file mode 100644 index 90e3089d..00000000 --- a/tests/files/slashcomment.sql +++ /dev/null @@ -1,5 +0,0 @@ -select * from user; -//select * from host; -select * from user; -select * // foo; -from foo; diff --git a/tests/test_cli.py b/tests/test_cli.py index 5f1ea0e5..b681a60b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - import subprocess import sys @@ -71,75 +69,54 @@ def test_stdout(filepath, load_file, capsys): def test_script(): # Call with the --help option as a basic sanity check. - cmd = "{0:s} -m sqlparse.cli --help".format(sys.executable) + cmd = "{:s} -m sqlparse.cli --help".format(sys.executable) assert subprocess.call(cmd.split()) == 0 -def test_encoding_utf8_stdout(filepath, load_file, capfd): - path = filepath('encoding_utf8.sql') - expected = load_file('encoding_utf8.sql', 'utf-8') - sys.stdout.encoding = 'utf-8' - sqlparse.cli.main([path]) - out, _ = capfd.readouterr() - assert out == expected - - -def test_encoding_utf8_output_file(filepath, load_file, tmpdir): - in_path = filepath('encoding_utf8.sql') - expected = load_file('encoding_utf8.sql', 'utf-8') - out_path = tmpdir.dirname + '/encoding_utf8.out.sql' - sqlparse.cli.main([in_path, '-o', out_path]) - out = load_file(out_path, 'utf-8') - assert out == expected - - -def test_encoding_gbk_stdout(filepath, load_file, capfd): - path = filepath('encoding_gbk.sql') - expected = load_file('encoding_gbk.sql', 'gbk') - sys.stdout.encoding = 'gbk' - sqlparse.cli.main([path, '--encoding', 'gbk']) +@pytest.mark.parametrize('fpath, encoding', ( + ('encoding_utf8.sql', 'utf-8'), + ('encoding_gbk.sql', 'gbk'), +)) +def test_encoding_stdout(fpath, encoding, filepath, load_file, capfd): + path = filepath(fpath) + expected = load_file(fpath, encoding) + sqlparse.cli.main([path, '--encoding', encoding]) out, _ = capfd.readouterr() assert out == expected -def test_encoding_gbk_output_file(filepath, load_file, tmpdir): - in_path = filepath('encoding_gbk.sql') - expected = load_file('encoding_gbk.sql', 'gbk') - out_path = tmpdir.dirname + '/encoding_gbk.out.sql' - sqlparse.cli.main([in_path, '--encoding', 'gbk', '-o', out_path]) - out = load_file(out_path, 'gbk') +@pytest.mark.parametrize('fpath, encoding', ( + ('encoding_utf8.sql', 'utf-8'), + ('encoding_gbk.sql', 'gbk'), +)) +def test_encoding_output_file(fpath, encoding, filepath, load_file, tmpdir): + in_path = filepath(fpath) + expected = load_file(fpath, encoding) + out_path = tmpdir.dirname + '/encoding_out.sql' + sqlparse.cli.main([in_path, '--encoding', encoding, '-o', out_path]) + out = load_file(out_path, encoding) assert out == expected -def test_encoding_stdin_utf8(filepath, load_file, capfd): - path = filepath('encoding_utf8.sql') - expected = load_file('encoding_utf8.sql', 'utf-8') +@pytest.mark.parametrize('fpath, encoding', ( + ('encoding_utf8.sql', 'utf-8'), + ('encoding_gbk.sql', 'gbk'), +)) +def test_encoding_stdin(fpath, encoding, filepath, load_file, capfd): + path = filepath(fpath) + expected = load_file(fpath, encoding) old_stdin = sys.stdin - with open(path, 'r') as f: + with open(path) as f: sys.stdin = f - sys.stdout.encoding = 'utf-8' - sqlparse.cli.main(['-']) + sqlparse.cli.main(['-', '--encoding', encoding]) sys.stdin = old_stdin out, _ = capfd.readouterr() assert out == expected -def test_encoding_stdin_gbk(filepath, load_file, capfd): - path = filepath('encoding_gbk.sql') - expected = load_file('encoding_gbk.sql', 'gbk') - old_stdin = sys.stdin - with open(path, 'r') as stream: - sys.stdin = stream - sys.stdout.encoding = 'gbk' - sqlparse.cli.main(['-', '--encoding', 'gbk']) - sys.stdin = old_stdin - out, _ = capfd.readouterr() - assert out == expected - - def test_encoding(filepath, capsys): path = filepath('test_cp1251.sql') - expected = u'insert into foo values (1); -- Песня про надежду\n' + expected = 'insert into foo values (1); -- Песня про надежду\n' sqlparse.cli.main([path, '--encoding=cp1251']) out, _ = capsys.readouterr() assert out == expected diff --git a/tests/test_format.py b/tests/test_format.py index 811e0833..70bb8055 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - import pytest import sqlparse from sqlparse.exceptions import SQLParseError -class TestFormat(object): +class TestFormat: def test_keywordcase(self): sql = 'select * from bar; -- select foo\n' res = sqlparse.format(sql, keyword_case='upper') @@ -43,26 +41,26 @@ def test_identifiercase_quotes(self): def test_strip_comments_single(self): sql = 'select *-- statement starts here\nfrom foo' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select * from foo' + assert res == 'select *\nfrom foo' sql = 'select * -- statement starts here\nfrom foo' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select * from foo' + assert res == 'select *\nfrom foo' sql = 'select-- foo\nfrom -- bar\nwhere' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select from where' + assert res == 'select\nfrom\nwhere' sql = 'select *-- statement starts here\n\nfrom foo' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select * from foo' + assert res == 'select *\n\nfrom foo' sql = 'select * from foo-- statement starts here\nwhere' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select * from foo where' + assert res == 'select * from foo\nwhere' sql = 'select a-- statement starts here\nfrom foo' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select a from foo' + assert res == 'select a\nfrom foo' sql = '--comment\nselect a-- statement starts here\n' \ 'from foo--comment\nf' res = sqlparse.format(sql, strip_comments=True) - assert res == 'select a from foo f' + assert res == 'select a\nfrom foo\nf' def test_strip_comments_invalid_option(self): sql = 'select-- foo\nfrom -- bar\nwhere' @@ -86,6 +84,23 @@ def test_strip_comments_multi(self): res = sqlparse.format(sql, strip_comments=True) assert res == 'select (select 2)' + def test_strip_comments_preserves_linebreak(self): + sql = 'select * -- a comment\r\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + assert res == 'select *\nfrom foo' + sql = 'select * -- a comment\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + assert res == 'select *\nfrom foo' + sql = 'select * -- a comment\rfrom foo' + res = sqlparse.format(sql, strip_comments=True) + assert res == 'select *\nfrom foo' + sql = 'select * -- a comment\r\n\r\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + assert res == 'select *\n\nfrom foo' + sql = 'select * -- a comment\n\nfrom foo' + res = sqlparse.format(sql, strip_comments=True) + assert res == 'select *\n\nfrom foo' + def test_strip_ws(self): f = lambda sql: sqlparse.format(sql, strip_whitespace=True) s = 'select\n* from foo\n\twhere ( 1 = 2 )\n' @@ -123,7 +138,7 @@ def test_notransform_of_quoted_crlf(self): == "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\n") -class TestFormatReindentAligned(object): +class TestFormatReindentAligned: @staticmethod def formatter(sql): return sqlparse.format(sql, reindent_aligned=True) @@ -294,7 +309,7 @@ def test_window_functions(self): ' from table']) -class TestSpacesAroundOperators(object): +class TestSpacesAroundOperators: @staticmethod def formatter(sql): return sqlparse.format(sql, use_space_around_operators=True) @@ -321,7 +336,7 @@ def test_wildcard_vs_mult(self): assert self.formatter(sql) == 'select a * b - c from table' -class TestFormatReindent(object): +class TestFormatReindent: def test_option(self): with pytest.raises(SQLParseError): sqlparse.format('foo', reindent=2) @@ -598,7 +613,7 @@ def test_insert_values(self): ' , (5, 6)']) -class TestOutputFormat(object): +class TestOutputFormat: def test_python(self): sql = 'select * from foo;' f = lambda sql: sqlparse.format(sql, output_format='python') @@ -663,7 +678,7 @@ def test_format_column_ordering(): def test_truncate_strings(): - sql = "update foo set value = '{0}';".format('x' * 1000) + sql = "update foo set value = '{}';".format('x' * 1000) formatted = sqlparse.format(sql, truncate_strings=10) assert formatted == "update foo set value = 'xxxxxxxxxx[...]';" formatted = sqlparse.format(sql, truncate_strings=3, truncate_char='YYY') diff --git a/tests/test_grouping.py b/tests/test_grouping.py index 63a01f2d..03d16c5d 100644 --- a/tests/test_grouping.py +++ b/tests/test_grouping.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - import pytest import sqlparse @@ -33,6 +31,40 @@ def test_grouping_assignment(s): assert isinstance(parsed.tokens[0], sql.Assignment) +@pytest.mark.parametrize('s', ["x > DATE '2020-01-01'", "x > TIMESTAMP '2020-01-01 00:00:00'"]) +def test_grouping_typed_literal(s): + parsed = sqlparse.parse(s)[0] + assert isinstance(parsed[0][4], sql.TypedLiteral) + + +@pytest.mark.parametrize('s, a, b', [ + ('select a from b where c < d + e', sql.Identifier, sql.Identifier), + ('select a from b where c < d + interval \'1 day\'', sql.Identifier, sql.TypedLiteral), + ('select a from b where c < d + interval \'6\' month', sql.Identifier, sql.TypedLiteral), + ('select a from b where c < current_timestamp - interval \'1 day\'', sql.Token, sql.TypedLiteral), +]) +def test_compare_expr(s, a, b): + parsed = sqlparse.parse(s)[0] + assert str(parsed) == s + assert isinstance(parsed.tokens[2], sql.Identifier) + assert isinstance(parsed.tokens[6], sql.Identifier) + assert isinstance(parsed.tokens[8], sql.Where) + assert len(parsed.tokens) == 9 + where = parsed.tokens[8] + assert isinstance(where.tokens[2], sql.Comparison) + assert len(where.tokens) == 3 + comparison = where.tokens[2] + assert isinstance(comparison.tokens[0], sql.Identifier) + assert comparison.tokens[2].ttype is T.Operator.Comparison + assert isinstance(comparison.tokens[4], sql.Operation) + assert len(comparison.tokens) == 5 + operation = comparison.tokens[4] + assert isinstance(operation.tokens[0], a) + assert operation.tokens[2].ttype is T.Operator + assert isinstance(operation.tokens[4], b) + assert len(operation.tokens) == 5 + + def test_grouping_identifiers(): s = 'select foo.bar from "myscheme"."table" where fail. order' parsed = sqlparse.parse(s)[0] @@ -127,6 +159,14 @@ def test_grouping_identifier_invalid_in_middle(): assert p[3].ttype == T.Whitespace assert str(p[2]) == 'foo.' +@pytest.mark.parametrize('s', ['foo as (select *)', 'foo as(select *)']) +def test_grouping_identifer_as(s): + # issue507 + p = sqlparse.parse(s)[0] + assert isinstance(p.tokens[0], sql.Identifier) + token = p.tokens[0].tokens[2] + assert token.ttype == T.Keyword + assert token.normalized == 'AS' def test_grouping_identifier_as_invalid(): # issue8 @@ -284,6 +324,11 @@ def test_grouping_alias_case(): assert p.tokens[0].get_alias() == 'foo' +def test_grouping_alias_ctas(): + p = sqlparse.parse('CREATE TABLE tbl1 AS SELECT coalesce(t1.col1, 0) AS col1 FROM t1')[0] + assert p.tokens[10].get_alias() == 'col1' + assert isinstance(p.tokens[10].tokens[0], sql.Function) + def test_grouping_subquery_no_parens(): # Not totally sure if this is the right approach... # When a THEN clause contains a subquery w/o parenthesis around it *and* @@ -294,9 +339,10 @@ def test_grouping_subquery_no_parens(): assert isinstance(p.tokens[0], sql.Case) -def test_grouping_alias_returns_none(): - # see issue185 - p = sqlparse.parse('foo.bar')[0] +@pytest.mark.parametrize('s', ['foo.bar', 'x, y', 'x > y', 'x / y']) +def test_grouping_alias_returns_none(s): + # see issue185 and issue445 + p = sqlparse.parse(s)[0] assert len(p.tokens) == 1 assert p.tokens[0].get_alias() is None @@ -348,10 +394,6 @@ def f(sql): assert f(' update foo').get_type() == 'UPDATE' assert f('\nupdate foo').get_type() == 'UPDATE' assert f('foo').get_type() == 'UNKNOWN' - # Statements that have a whitespace after the closing semicolon - # are parsed as two statements where later only consists of the - # trailing whitespace. - assert f('\n').get_type() == 'UNKNOWN' def test_identifier_with_operators(): @@ -435,15 +477,54 @@ def test_comparison_with_parenthesis(): assert comp.right.ttype is T.Number.Integer -def test_comparison_with_strings(): +@pytest.mark.parametrize('operator', ( + '=', '!=', '>', '<', '<=', '>=', '~', '~~', '!~~', + 'LIKE', 'NOT LIKE', 'ILIKE', 'NOT ILIKE', +)) +def test_comparison_with_strings(operator): # issue148 - p = sqlparse.parse("foo = 'bar'")[0] + p = sqlparse.parse("foo {} 'bar'".format(operator))[0] assert len(p.tokens) == 1 assert isinstance(p.tokens[0], sql.Comparison) assert p.tokens[0].right.value == "'bar'" assert p.tokens[0].right.ttype == T.String.Single +def test_like_and_ilike_comparison(): + def validate_where_clause(where_clause, expected_tokens): + assert len(where_clause.tokens) == len(expected_tokens) + for where_token, expected_token in zip(where_clause, expected_tokens): + expected_ttype, expected_value = expected_token + if where_token.ttype is not None: + assert where_token.match(expected_ttype, expected_value, regex=True) + else: + # Certain tokens, such as comparison tokens, do not define a ttype that can be + # matched against. For these tokens, we ensure that the token instance is of + # the expected type and has a value conforming to specified regular expression + import re + assert (isinstance(where_token, expected_ttype) + and re.match(expected_value, where_token.value)) + + [p1] = sqlparse.parse("select * from mytable where mytable.mycolumn LIKE 'expr%' limit 5;") + [p1_where] = [token for token in p1 if isinstance(token, sql.Where)] + validate_where_clause(p1_where, [ + (T.Keyword, "where"), + (T.Whitespace, None), + (sql.Comparison, r"mytable.mycolumn LIKE.*"), + (T.Whitespace, None), + ]) + + [p2] = sqlparse.parse( + "select * from mytable where mycolumn NOT ILIKE '-expr' group by othercolumn;") + [p2_where] = [token for token in p2 if isinstance(token, sql.Where)] + validate_where_clause(p2_where, [ + (T.Keyword, "where"), + (T.Whitespace, None), + (sql.Comparison, r"mycolumn NOT ILIKE.*"), + (T.Whitespace, None), + ]) + + def test_comparison_with_functions(): # issue230 p = sqlparse.parse('foo = DATE(bar.baz)')[0] @@ -468,9 +549,20 @@ def test_comparison_with_functions(): assert p.tokens[0].right.value == 'bar.baz' +def test_comparison_with_typed_literal(): + p = sqlparse.parse("foo = DATE 'bar.baz'")[0] + assert len(p.tokens) == 1 + comp = p.tokens[0] + assert isinstance(comp, sql.Comparison) + assert len(comp.tokens) == 5 + assert comp.left.value == 'foo' + assert isinstance(comp.right, sql.TypedLiteral) + assert comp.right.value == "DATE 'bar.baz'" + + @pytest.mark.parametrize('start', ['FOR', 'FOREACH']) def test_forloops(start): - p = sqlparse.parse('{0} foo in bar LOOP foobar END LOOP'.format(start))[0] + p = sqlparse.parse('{} foo in bar LOOP foobar END LOOP'.format(start))[0] assert (len(p.tokens)) == 1 assert isinstance(p.tokens[0], sql.For) @@ -550,3 +642,15 @@ def test_aliased_literal_without_as(): p = sqlparse.parse('1 foo')[0].tokens assert len(p) == 1 assert p[0].get_alias() == 'foo' + + +def test_grouping_as_cte(): + p = sqlparse.parse('foo AS WITH apple AS 1, banana AS 2')[0].tokens + assert len(p) > 4 + assert p[0].get_alias() is None + assert p[2].value == 'AS' + assert p[4].value == 'WITH' + +def test_grouping_create_table(): + p = sqlparse.parse("create table db.tbl (a string)")[0].tokens + assert p[4].value == "db.tbl" diff --git a/tests/test_keywords.py b/tests/test_keywords.py index c197f367..b26e9b45 100644 --- a/tests/test_keywords.py +++ b/tests/test_keywords.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- import pytest from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX +from sqlparse.lexer import Lexer class TestSQLREGEX: @@ -10,5 +9,5 @@ class TestSQLREGEX: '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype diff --git a/tests/test_parse.py b/tests/test_parse.py index f2a2bda6..5feef5a7 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """Tests sqlparse.parse().""" +from io import StringIO import pytest import sqlparse -from sqlparse import sql, tokens as T -from sqlparse.compat import StringIO, text_type +from sqlparse import sql, tokens as T, keywords +from sqlparse.lexer import Lexer def test_parse_tokenize(): @@ -102,6 +101,12 @@ def test_parse_square_brackets_notation_isnt_too_greedy(): assert t[0].tokens[-1].get_real_name() == '[bar]' +def test_parse_square_brackets_notation_isnt_too_greedy2(): + # see issue583 + t = sqlparse.parse('[(foo[i])]')[0].tokens + assert isinstance(t[0], sql.SquareBrackets) # not Identifier! + + def test_parse_keyword_like_identifier(): # see issue47 t = sqlparse.parse('foo.key')[0].tokens @@ -128,6 +133,12 @@ def test_parse_nested_function(): assert type(t[0]) is sql.Function +def test_parse_div_operator(): + p = sqlparse.parse('col1 DIV 5 AS div_col1')[0].tokens + assert p[0].tokens[0].tokens[2].ttype is T.Operator + assert p[0].get_alias() == 'div_col1' + + def test_quoted_identifier(): t = sqlparse.parse('select x.y as "z" from foo')[0].tokens assert isinstance(t[2], sql.Identifier) @@ -138,6 +149,7 @@ def test_quoted_identifier(): @pytest.mark.parametrize('name', [ 'foo', '_foo', # issue175 '1_data', # valid MySQL table name, see issue337 + '業者名稱', # valid at least for SQLite3, see issue641 ]) def test_valid_identifier_names(name): t = sqlparse.parse(name)[0].tokens @@ -184,11 +196,16 @@ def test_placeholder(ph): assert p[0].ttype is T.Name.Placeholder -@pytest.mark.parametrize('num', ['6.67428E-8', '1.988e33', '1e-12']) -def test_scientific_numbers(num): +@pytest.mark.parametrize('num, expected', [ + ('6.67428E-8', T.Number.Float), + ('1.988e33', T.Number.Float), + ('1e-12', T.Number.Float), + ('e1', None), +]) +def test_scientific_numbers(num, expected): p = sqlparse.parse(num)[0].tokens assert len(p) == 1 - assert p[0].ttype is T.Number.Float + assert p[0].ttype is expected def test_single_quotes_are_strings(): @@ -332,7 +349,8 @@ def test_pprint(): "| | `- 0 Name 'd0'", "| |- 10 Punctuation ','", "| |- 11 Whitespace ' '", - "| `- 12 Float 'e0'", + "| `- 12 Identifier 'e0'", + "| `- 0 Name 'e0'", "|- 3 Whitespace ' '", "|- 4 Keyword 'from'", "|- 5 Whitespace ' '", @@ -409,42 +427,42 @@ def test_dbldollar_as_literal(sql, is_literal): def test_non_ascii(): - _test_non_ascii = u"insert into test (id, name) values (1, 'тест');" + _test_non_ascii = "insert into test (id, name) values (1, 'тест');" s = _test_non_ascii stmts = sqlparse.parse(s) assert len(stmts) == 1 statement = stmts[0] - assert text_type(statement) == s + assert str(statement) == s assert statement._pprint_tree() is None s = _test_non_ascii.encode('utf-8') stmts = sqlparse.parse(s, 'utf-8') assert len(stmts) == 1 statement = stmts[0] - assert text_type(statement) == _test_non_ascii + assert str(statement) == _test_non_ascii assert statement._pprint_tree() is None def test_get_real_name(): # issue 369 - s = u"update a t set t.b=1" + s = "update a t set t.b=1" stmts = sqlparse.parse(s) assert len(stmts) == 1 - assert 'a' == stmts[0].get_real_name() - assert 't' == stmts[0].get_alias() + assert 'a' == stmts[0].tokens[2].get_real_name() + assert 't' == stmts[0].tokens[2].get_alias() def test_from_subquery(): # issue 446 - s = u'from(select 1)' + s = 'from(select 1)' stmts = sqlparse.parse(s) assert len(stmts) == 1 assert len(stmts[0].tokens) == 2 assert stmts[0].tokens[0].value == 'from' assert stmts[0].tokens[0].ttype == T.Keyword - s = u'from (select 1)' + s = 'from (select 1)' stmts = sqlparse.parse(s) assert len(stmts) == 1 assert len(stmts[0].tokens) == 3 @@ -472,3 +490,79 @@ def test_parenthesis(): T.Newline, T.Newline, T.Punctuation] + + +def test_configurable_keywords(): + sql = """select * from foo BACON SPAM EGGS;""" + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo BACON"), + (None, "SPAM EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + Lexer.get_default_instance().add_keywords( + { + "BACON": sqlparse.tokens.Name.Builtin, + "SPAM": sqlparse.tokens.Keyword, + "EGGS": sqlparse.tokens.Keyword, + } + ) + + tokens = sqlparse.parse(sql)[0] + + # reset the syntax for later tests. + Lexer.get_default_instance().default_initialization() + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo"), + (sqlparse.tokens.Name.Builtin, "BACON"), + (sqlparse.tokens.Keyword, "SPAM"), + (sqlparse.tokens.Keyword, "EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + +def test_configurable_regex(): + lex = Lexer.get_default_instance() + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + tokens = sqlparse.parse("select * from foo zorder by bar;")[0] + + # reset the syntax for later tests. + Lexer.get_default_instance().default_initialization() + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + )[4] == (sqlparse.tokens.Keyword, "zorder by") diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 1d52ea78..bc8b7dd3 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -1,10 +1,7 @@ -# -*- coding: utf-8 -*- - import pytest import sqlparse from sqlparse import sql, tokens as T -from sqlparse.compat import PY2 def test_issue9(): @@ -20,9 +17,9 @@ def test_issue9(): def test_issue13(): - parsed = sqlparse.parse(("select 'one';\n" - "select 'two\\'';\n" - "select 'three';")) + parsed = sqlparse.parse("select 'one';\n" + "select 'two\\'';\n" + "select 'three';") assert len(parsed) == 3 assert str(parsed[1]).strip() == "select 'two\\'';" @@ -73,8 +70,8 @@ def test_issue39(): def test_issue40(): # make sure identifier lists in subselects are grouped - p = sqlparse.parse(('SELECT id, name FROM ' - '(SELECT id, name FROM bar) as foo'))[0] + p = sqlparse.parse('SELECT id, name FROM ' + '(SELECT id, name FROM bar) as foo')[0] assert len(p.tokens) == 7 assert p.tokens[2].__class__ == sql.IdentifierList assert p.tokens[-1].__class__ == sql.Identifier @@ -149,7 +146,7 @@ def test_issue83(): def test_comment_encoding_when_reindent(): # There was an UnicodeEncodeError in the reindent filter that # casted every comment followed by a keyword to str. - sql = u'select foo -- Comment containing Ümläuts\nfrom bar' + sql = 'select foo -- Comment containing Ümläuts\nfrom bar' formatted = sqlparse.format(sql, reindent=True) assert formatted == sql @@ -158,11 +155,9 @@ def test_parse_sql_with_binary(): # See https://github.com/andialbrecht/sqlparse/pull/88 # digest = '‚|ËêŠplL4¡h‘øN{' digest = '\x82|\xcb\x0e\xea\x8aplL4\xa1h\x91\xf8N{' - sql = "select * from foo where bar = '{0}'".format(digest) + sql = "select * from foo where bar = '{}'".format(digest) formatted = sqlparse.format(sql, reindent=True) - tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest) - if PY2: - tformatted = tformatted.decode('unicode-escape') + tformatted = "select *\nfrom foo\nwhere bar = '{}'".format(digest) assert formatted == tformatted @@ -180,7 +175,7 @@ def test_format_accepts_encoding(load_file): # issue20 sql = load_file('test_cp1251.sql', 'cp1251') formatted = sqlparse.format(sql, reindent=True, encoding='cp1251') - tformatted = u'insert into foo\nvalues (1); -- Песня про надежду' + tformatted = 'insert into foo\nvalues (1); -- Песня про надежду' assert formatted == tformatted @@ -275,7 +270,7 @@ def test_issue186_get_type(): def test_issue212_py2unicode(): - t1 = sql.Token(T.String, u'schöner ') + t1 = sql.Token(T.String, 'schöner ') t2 = sql.Token(T.String, 'bug') token_list = sql.TokenList([t1, t2]) assert str(token_list) == 'schöner bug' @@ -337,11 +332,9 @@ def test_issue315_utf8_by_default(): '\x9b\xb2.' '\xec\x82\xac\xeb\x9e\x91\xed\x95\xb4\xec\x9a\x94' ) - sql = "select * from foo where bar = '{0}'".format(digest) + sql = "select * from foo where bar = '{}'".format(digest) formatted = sqlparse.format(sql, reindent=True) - tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest) - if PY2: - tformatted = tformatted.decode('utf-8') + tformatted = "select *\nfrom foo\nwhere bar = '{}'".format(digest) assert formatted == tformatted @@ -373,3 +366,73 @@ def test_issue469_copy_as_psql_command(): '\\copy select * from foo', keyword_case='upper', identifier_case='capitalize') assert formatted == '\\copy SELECT * FROM Foo' + + +@pytest.mark.xfail(reason='Needs to be fixed') +def test_issue484_comments_and_newlines(): + formatted = sqlparse.format('\n'.join([ + 'Create table myTable', + '(', + ' myId TINYINT NOT NULL, --my special comment', + ' myName VARCHAR2(100) NOT NULL', + ')']), + strip_comments=True) + assert formatted == ('\n'.join([ + 'Create table myTable', + '(', + ' myId TINYINT NOT NULL,', + ' myName VARCHAR2(100) NOT NULL', + ')'])) + + +def test_issue485_split_multi(): + p_sql = '''CREATE OR REPLACE RULE ruled_tab_2rules AS ON INSERT +TO public.ruled_tab +DO instead ( +select 1; +select 2; +);''' + assert len(sqlparse.split(p_sql)) == 1 + + +def test_issue489_tzcasts(): + p = sqlparse.parse('select bar at time zone \'UTC\' as foo')[0] + assert p.tokens[-1].has_alias() is True + assert p.tokens[-1].get_alias() == 'foo' + + +def test_issue562_tzcasts(): + # Test that whitespace between 'from' and 'bar' is retained + formatted = sqlparse.format( + 'SELECT f(HOUR from bar AT TIME ZONE \'UTC\') from foo', reindent=True + ) + assert formatted == \ + 'SELECT f(HOUR\n from bar AT TIME ZONE \'UTC\')\nfrom foo' + + +def test_as_in_parentheses_indents(): + # did raise NoneType has no attribute is_group in _process_parentheses + formatted = sqlparse.format('(as foo)', reindent=True) + assert formatted == '(as foo)' + + +def test_format_invalid_where_clause(): + # did raise ValueError + formatted = sqlparse.format('where, foo', reindent=True) + assert formatted == 'where, foo' + + +def test_splitting_at_and_backticks_issue588(): + splitted = sqlparse.split( + 'grant foo to user1@`myhost`; grant bar to user1@`myhost`;') + assert len(splitted) == 2 + assert splitted[-1] == 'grant bar to user1@`myhost`;' + + +def test_comment_between_cte_clauses_issue632(): + p, = sqlparse.parse(""" + WITH foo AS (), + -- A comment before baz subquery + baz AS () + SELECT * FROM baz;""") + assert p.get_type() == "SELECT" diff --git a/tests/test_split.py b/tests/test_split.py index ccb84a89..e79750e8 100644 --- a/tests/test_split.py +++ b/tests/test_split.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- - # Tests splitting functions. import types +from io import StringIO import pytest import sqlparse -from sqlparse.compat import StringIO, text_type def test_split_semicolon(): @@ -20,8 +18,8 @@ def test_split_semicolon(): def test_split_backslash(): - stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';") - assert len(stmts) == 3 + stmts = sqlparse.parse("select '\'; select '\'';") + assert len(stmts) == 2 @pytest.mark.parametrize('fn', ['function.sql', @@ -33,7 +31,7 @@ def test_split_create_function(load_file, fn): sql = load_file(fn) stmts = sqlparse.parse(sql) assert len(stmts) == 1 - assert text_type(stmts[0]) == sql + assert str(stmts[0]) == sql def test_split_dashcomments(load_file): @@ -52,22 +50,6 @@ def test_split_dashcomments_eol(s): assert len(stmts) == 1 -def test_split_slashcomments(load_file): - sql = load_file('slashcomment.sql') - stmts = sqlparse.parse(sql) - assert len(stmts) == 3 - assert ''.join(str(q) for q in stmts) == sql - - -@pytest.mark.parametrize('s', ['select foo; // comment\n', - 'select foo; // comment\r', - 'select foo; // comment\r\n', - 'select foo; // comment']) -def test_split_slashcomments_eol(s): - stmts = sqlparse.parse(s) - assert len(stmts) == 1 - - def test_split_begintag(load_file): sql = load_file('begintag.sql') stmts = sqlparse.parse(sql) @@ -90,12 +72,12 @@ def test_split_dropif(): def test_split_comment_with_umlaut(): - sql = (u'select * from foo;\n' - u'-- Testing an umlaut: ä\n' - u'select * from bar;') + sql = ('select * from foo;\n' + '-- Testing an umlaut: ä\n' + 'select * from bar;') stmts = sqlparse.parse(sql) assert len(stmts) == 2 - assert ''.join(text_type(q) for q in stmts) == sql + assert ''.join(str(q) for q in stmts) == sql def test_split_comment_end_of_line(): @@ -115,6 +97,12 @@ def test_split_casewhen(): assert len(stmts) == 2 +def test_split_casewhen_procedure(load_file): + # see issue580 + stmts = sqlparse.split(load_file('casewhen_procedure.sql')) + assert len(stmts) == 2 + + def test_split_cursor_declare(): sql = ('DECLARE CURSOR "foo" AS SELECT 1;\n' 'SELECT 2;') @@ -141,11 +129,11 @@ def test_split_stream(): def test_split_encoding_parsestream(): stream = StringIO("SELECT 1; SELECT 2;") stmts = list(sqlparse.parsestream(stream)) - assert isinstance(stmts[0].tokens[0].value, text_type) + assert isinstance(stmts[0].tokens[0].value, str) def test_split_unicode_parsestream(): - stream = StringIO(u'SELECT ö') + stream = StringIO('SELECT ö') stmts = list(sqlparse.parsestream(stream)) assert str(stmts[0]) == 'SELECT ö' @@ -157,6 +145,13 @@ def test_split_simple(): assert stmts[1] == 'select * from bar;' +def test_split_ignores_empty_newlines(): + stmts = sqlparse.split('select foo;\nselect bar;\n') + assert len(stmts) == 2 + assert stmts[0] == 'select foo;' + assert stmts[1] == 'select bar;' + + def test_split_quotes_with_new_line(): stmts = sqlparse.split('select "foo\nbar"') assert len(stmts) == 1 @@ -165,3 +160,9 @@ def test_split_quotes_with_new_line(): stmts = sqlparse.split("select 'foo\n\bar'") assert len(stmts) == 1 assert stmts[0] == "select 'foo\n\bar'" + + +def test_split_mysql_handler_for(load_file): + # see issue581 + stmts = sqlparse.split(load_file('mysql_handler.sql')) + assert len(stmts) == 2 diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index fcd1102b..af0ba163 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- - import types +from io import StringIO import pytest import sqlparse from sqlparse import lexer from sqlparse import sql, tokens as T -from sqlparse.compat import StringIO def test_tokenize_simple(): @@ -152,7 +150,7 @@ def test_stream_error(): 'INNER JOIN', 'LEFT INNER JOIN']) def test_parse_join(expr): - p = sqlparse.parse('{0} foo'.format(expr))[0] + p = sqlparse.parse('{} foo'.format(expr))[0] assert len(p.tokens) == 3 assert p.tokens[0].ttype is T.Keyword @@ -171,6 +169,13 @@ def test_parse_endifloop(s): assert p.tokens[0].ttype is T.Keyword +@pytest.mark.parametrize('s', ['NULLS FIRST', 'NULLS LAST']) +def test_parse_nulls(s): # issue487 + p = sqlparse.parse(s)[0] + assert len(p.tokens) == 1 + assert p.tokens[0].ttype is T.Keyword + + @pytest.mark.parametrize('s', [ 'foo', 'Foo', @@ -197,6 +202,40 @@ def test_parse_order_by(): assert p.tokens[0].ttype is T.Keyword +def test_parse_window_as(): + p = sqlparse.parse('WINDOW w AS')[0] + assert len(p.tokens) == 5 + assert p.tokens[0].ttype is T.Keyword + + +@pytest.mark.parametrize('s', ( + "LIKE", "ILIKE", "NOT LIKE", "NOT ILIKE", + "NOT LIKE", "NOT ILIKE", +)) +def test_like_and_ilike_parsed_as_comparisons(s): + p = sqlparse.parse(s)[0] + assert len(p.tokens) == 1 + assert p.tokens[0].ttype == T.Operator.Comparison + + +@pytest.mark.parametrize('s', ( + "LIKEaaa", "bILIKE", "aaILIKEbb", "NOTLIKE", "NOTILIKE", +)) +def test_near_like_and_ilike_parsed_appropriately(s): + p = sqlparse.parse(s)[0] + assert len(p.tokens) == 1 + assert isinstance(p.tokens[0], sql.Identifier) + + +@pytest.mark.parametrize('s', ( + 'AT TIME ZONE \'UTC\'', +)) +def test_parse_tzcast(s): + p = sqlparse.parse(s)[0] + assert len(p.tokens) == 1 + assert p.tokens[0].ttype == T.Keyword.TZCast + + def test_cli_commands(): p = sqlparse.parse('\\copy')[0] assert len(p.tokens) == 1 diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..d020f3fa --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,12 @@ +import pytest + +from sqlparse import utils + + +@pytest.mark.parametrize('value, expected', ( + [None, None], + ['\'foo\'', 'foo'], + ['"foo"', 'foo'], + ['`foo`', 'foo'])) +def test_remove_quotes(value, expected): + assert utils.remove_quotes(value) == expected diff --git a/tox.ini b/tox.ini index 2e5010a8..0087d50e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,22 +1,17 @@ [tox] skip_missing_interpreters = True envlist = - py27 - py34 py35 py36 py37 py38 - pypy_54 + pypy3 flake8 [testenv] deps = pytest pytest-cov - pytest-travis-fold -passenv = - TRAVIS commands = sqlformat --version pytest --cov=sqlparse {posargs}