diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 00000000..1cde398b
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,71 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '25 5 * * 1'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
+        # Learn more:
+        # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v2
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v2
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v2
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
new file mode 100644
index 00000000..3033af97
--- /dev/null
+++ b/.github/workflows/python-app.yml
@@ -0,0 +1,38 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python application
+
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+    branches: [ master ]
+  schedule:
+    - cron: '0 12 * * *'
+
+jobs:
+  build:
+
+    runs-on: ubuntu-20.04  # keep it on 20.04 to have Python 3.5 and 3.6 available
+    strategy:
+      matrix:
+        python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip flit
+        flit install --deps=develop
+    - name: Lint with flake8
+      run: flake8 sqlparse --count --max-complexity=31 --show-source --statistics
+    - name: Test with pytest
+      run: pytest --cov=sqlparse
+    - name: Publish to codecov
+      uses: codecov/codecov-action@v3
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 3e091594..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-language: python
-python:
-  - "2.7"
-  - "3.4"
-  - "3.5"
-  - "3.6"
-  - "nightly"
-  - "pypy"
-  - "pypy3"
-# Enable 3.7 without globally enabling sudo and dist: xenial for other build jobs
-# See https://github.com/travis-ci/travis-ci/issues/9815
-matrix:
-  include:
-    - python: 3.7
-      dist: xenial
-      sudo: true
-
-install:
-  - pip install -Uq pytest pytest-cov codecov
-  - pytest --version
-
-script:
-  - pytest --cov=sqlparse
-
-after_success:
-    - codecov
diff --git a/AUTHORS b/AUTHORS
index b4ff0aff..1717adff 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,24 +8,34 @@ project: https://bitbucket.org/gutworth/six.
 
 Alphabetical list of contributors:
 * Adam Greenhall <agreenhall@lyft.com>
+* Aki Ariga <chezou+github@gmail.com>
 * Alexander Beedie <ayembee@gmail.com>
 * Alexey Malyshev <nostrict@gmail.com>
+* ali-tny <aliteeney@googlemail.com>
 * andrew deryabin <github@djsf.com>
 * Andrew Tipton <andrew.tipton@compareglobalgroup.com>
 * atronah <atronah.ds@gmail.com>
 * casey <casey@cloudera.com>
 * Cauê Beloni <cbeloni@gmail.com>
+* Christian Clauss <cclauss@me.com>
 * circld <circld1@gmail.com>
+* Corey Zumar <corey.zumar@databricks.com>
 * Cristian Orellana <cristiano@groupon.com>
 * Dag Wieers <dag@wieers.com>
+* Daniel Harding <dharding@living180.net>
 * Darik Gamble <darik.gamble@gmail.com>
 * Demetrio92 <Demetrio.Rodriguez.T@gmail.com>
 * Dennis Taylor <dennis.taylor@clio.com>
+* Dvořák Václav <Vaclav.Dvorak@ysoft.com>
+* Erik Cederstrand <erik@adamatics.com>
 * Florian Bauer <florian.bauer@zmdi.com>
 * Fredy Wijaya <fredy.wijaya@gmail.com>
 * Gavin Wahl <gwahl@fusionbox.com>
+* hurcy <cinyoung.hur@gmail.com>
 * Ian Robertson <ian.robertson@capitalone.com>
 * JacekPliszka <Jacek.Pliszka@gmail.com>
+* JavierPan <PeterSandwich@users.noreply.github.com>
+* Jean-Martin Archer <jm@jmartin.ca>
 * Jesús Leganés Combarro "Piranna" <piranna@gmail.com>
 * Johannes Hoff <johshoff@gmail.com>
 * John Bodley <john.bodley@airbnb.com>
@@ -33,10 +43,14 @@ Alphabetical list of contributors:
 * Josh Soref <jsoref@users.noreply.github.com>
 * Kevin Jing Qiu <kevin.jing.qiu@gmail.com>
 * koljonen <koljonen@outlook.com>
+* Likai Liu <liulk@likai.org>
+* Long Le Xich <codenamelxl@users.noreply.github.com>
+* mathilde.oustlant <mathilde.oustlant@ext.cdiscount.com>
 * Michael Schuller <chick@mschuller.net>
 * Mike Amy <cocoade@googlemail.com>
 * mulos <daniel.strackbein@gmail.com>
 * Oleg Broytman <phd@phdru.name>
+* osmnv <80402144+osmnv@users.noreply.github.com>
 * Patrick Schemitz <patrick.schemitz@digitalbriefkasten.de>
 * Pi Delport <pjdelport@gmail.com>
 * Prudhvi Vatala <pvatala@gmail.com>
@@ -48,6 +62,7 @@ Alphabetical list of contributors:
 * Ryan Wooden <rygwdn@gmail.com>
 * saaj <id@saaj.me>
 * Shen Longxing <shenlongxing2012@gmail.com>
+* Simon Heisterkamp <she@delegate.dk>
 * Sjoerd Job Postmus
 * Soloman Weng <soloman1124@gmail.com>
 * spigwitmer <itgpmc@gmail.com>
@@ -59,4 +74,6 @@ Alphabetical list of contributors:
 * Ville Skyttä <ville.skytta@iki.fi>
 * vthriller <farreva232@yandex.ru>
 * wayne.wuw <wayne.wuw@alibaba-inc.com>
+* Will Jones <willjones127@gmail.com>
+* William Ivanski <william.ivanski@gmail.com>
 * Yago Riveiro <yago.riveiro@gmail.com>
diff --git a/CHANGELOG b/CHANGELOG
index f8853362..a42577e1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,141 @@
+Release 0.4.4 (Apr 18, 2023)
+----------------------------
+
+Notable Changes
+
+* IMPORTANT: This release fixes a security vulnerability in the
+  parser where a regular expression vulnerable to ReDOS (Regular
+  Expression Denial of Service) was used. See the security advisory
+  for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2
+  The vulnerability was discovered by @erik-krogh from GitHub
+  Security Lab (GHSL). Thanks for reporting!
+
+Bug Fixes
+
+* Revert a change from 0.4.0 that changed IN to be a comparison (issue694).
+  The primary expectation is that IN is treated as a keyword and not as a
+  comparison operator. That also follows the definition of reserved keywords
+  for the major SQL syntax definitions.
+* Fix regular expressions for string parsing.
+
+Other
+
+* sqlparse now uses pyproject.toml instead of setup.cfg (issue685).
+
+
+Release 0.4.3 (Sep 23, 2022)
+----------------------------
+
+Enhancements
+
+* Add support for DIV operator (pr664, by chezou).
+* Add support for additional SPARK keywords (pr643, by mrmasterplan).
+* Avoid tokens copy (pr622, by living180).
+* Add REGEXP as a comparision (pr647, by PeterSandwich).
+* Add DISTINCTROW keyword for MS Access (issue677).
+* Improve parsing of CREATE TABLE AS SELECT (pr662, by chezou).
+
+Bug Fixes
+
+* Fix spelling of INDICATOR keyword (pr653, by ptld).
+* Fix formatting error in EXTRACT function (issue562, issue670, pr676, by ecederstrand).
+* Fix bad parsing of create table statements that use lower case (issue217, pr642, by mrmasterplan).
+* Handle backtick as valid quote char (issue628, pr629, by codenamelxl).
+* Allow any unicode character as valid identifier name (issue641).
+
+Other
+
+* Update github actions to test on Python 3.10 as well (pr661, by cclaus).
+
+
+Release 0.4.2 (Sep 10, 2021)
+----------------------------
+
+Notable Changes
+
+* IMPORTANT: This release fixes a security vulnerability in the
+  strip comments filter. In this filter a regular expression that was
+  vulnerable to ReDOS (Regular Expression Denial of Service) was
+  used. See the security advisory for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-p5w8-wqhj-9hhf
+  The vulnerability was discovered by @erik-krogh and @yoff from
+  GitHub Security Lab (GHSL). Thanks for reporting!
+
+Enhancements
+
+* Add ELSIF as keyword (issue584).
+* Add CONFLICT and ON_ERROR_STOP keywords (pr595, by j-martin).
+
+Bug Fixes
+
+* Fix parsing of backticks (issue588).
+* Fix parsing of scientific number (issue399).
+
+
+Release 0.4.1 (Oct 08, 2020)
+----------------------------
+
+Bug Fixes
+
+* Just removed a debug print statement, sorry...
+
+
+Release 0.4.0 (Oct 07, 2020)
+----------------------------
+
+Notable Changes
+
+* Remove support for end-of-life Python 2.7 and 3.4. Python 3.5+ is now
+  required.
+* Remaining strings that only consist of whitespaces are not treated as
+  statements anymore. Code that ignored the last element from
+  sqlparse.split() should be updated accordingly since that function
+  now doesn't return an empty string as the last element in some
+  cases (issue496).
+
+Enhancements
+
+* Add WINDOW keyword (pr579 by ali-tny).
+* Add RLIKE keyword (pr582 by wjones1).
+
+Bug Fixes
+
+* Improved parsing of IN(...) statements (issue566, pr567 by hurcy).
+* Preserve line breaks when removing comments (issue484).
+* Fix parsing error when using square bracket notation (issue583).
+* Fix splitting when using DECLARE ... HANDLER (issue581).
+* Fix splitting of statements using CASE ... WHEN (issue580).
+* Improve formatting of type casts in parentheses.
+* Stabilize formatting of invalid SQL statements.
+
+
+Release 0.3.1 (Feb 29, 2020)
+----------------------------
+
+Enhancements
+
+* Add HQL keywords (pr475, by matwalk).
+* Add support for time zone casts (issue489).
+* Enhance formatting of AS keyword (issue507, by john-bodley).
+* Stabilize grouping engine when parsing invalid SQL statements.
+
+Bug Fixes
+
+* Fix splitting of SQL with multiple statements inside
+  parentheses (issue485, pr486 by win39).
+* Correctly identify NULLS FIRST / NULLS LAST as keywords (issue487).
+* Fix splitting of SQL statements that contain dollar signs in
+  identifiers (issue491).
+* Remove support for parsing double slash comments introduced in
+  0.3.0 (issue456) as it had some side-effects with other dialects and
+  doesn't seem to be widely used (issue476).
+* Restrict detection of alias names to objects that actually could
+  have an alias (issue455, adopted some parts of pr509 by john-bodley).
+* Fix parsing of date/time literals (issue438, by vashek).
+* Fix initialization of TokenList (issue499, pr505 by john-bodley).
+* Fix parsing of LIKE (issue493, pr525 by dbczumar).
+* Improve parsing of identifiers (pr527 by liulk).
+
+
 Release 0.3.0 (Mar 11, 2019)
 ----------------------------
 
@@ -94,7 +232,7 @@ Internal Changes
 
 
 Release 0.2.1 (Aug 13, 2016)
----------------------------
+----------------------------
 
 Notable Changes
 
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 8043b359..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,11 +0,0 @@
-recursive-include docs source/*
-include docs/sqlformat.1
-include docs/Makefile
-recursive-include tests *.py *.sql
-include LICENSE
-include TODO
-include AUTHORS
-include CHANGELOG
-include Makefile
-include setup.cfg
-include tox.ini
diff --git a/Makefile b/Makefile
index 0844e5bf..1657822e 100644
--- a/Makefile
+++ b/Makefile
@@ -22,5 +22,5 @@ clean:
 
 release:
 	@rm -rf dist/
-	python setup.py sdist upload --sign --identity E0B84F81
-	python setup.py bdist_wheel upload --sign --identity E0B84F81
+	python -m build
+	twine upload --sign --identity E0B84F81 dist/*
diff --git a/README.rst b/README.rst
index 3bc9670e..df4e7e36 100644
--- a/README.rst
+++ b/README.rst
@@ -1,67 +1,78 @@
 python-sqlparse - Parse SQL statements
 ======================================
 
-sqlparse is a non-validating SQL parser module for Python.
-
 |buildstatus|_
 |coverage|_
+|docs|_
+|packageversion|_
 
+.. docincludebegin
 
-Install
--------
-
-From pip, run::
-
-    $ pip install --upgrade sqlparse
-
-Consider using the ``--user`` option_.
+sqlparse is a non-validating SQL parser for Python.
+It provides support for parsing, splitting and formatting SQL statements.
 
-.. _option: https://pip.pypa.io/en/latest/user_guide/#user-installs
+The module is compatible with Python 3.5+ and released under the terms of the
+`New BSD license <https://opensource.org/licenses/BSD-3-Clause>`_.
 
-From the repository, run::
+Visit the project page at https://github.com/andialbrecht/sqlparse for
+further information about this project.
 
-  python setup.py install
 
-to install python-sqlparse on your system.
+Quick Start
+-----------
 
-python-sqlparse is compatible with Python 2.7 and Python 3 (>= 3.3).
+.. code-block:: sh
 
+   $ pip install sqlparse
 
-Run Tests
----------
+.. code-block:: python
 
-To run the test suite run::
+   >>> import sqlparse
 
-  tox
+   >>> # Split a string containing two SQL statements:
+   >>> raw = 'select * from foo; select * from bar;'
+   >>> statements = sqlparse.split(raw)
+   >>> statements
+   ['select * from foo;', 'select * from bar;']
 
-Note, you'll need tox installed, of course.
+   >>> # Format the first statement and print it out:
+   >>> first = statements[0]
+   >>> print(sqlparse.format(first, reindent=True, keyword_case='upper'))
+   SELECT *
+   FROM foo;
 
+   >>> # Parsing a SQL statement:
+   >>> parsed = sqlparse.parse('select * from foo')[0]
+   >>> parsed.tokens
+   [<DML 'select' at 0x7f22c5e15368>, <Whitespace ' ' at 0x7f22c5e153b0>, <Wildcard '*' … ]
+   >>>
 
 Links
 -----
 
-Project Page
-  https://github.com/andialbrecht/sqlparse
+Project page
+   https://github.com/andialbrecht/sqlparse
 
-Documentation
-  https://sqlparse.readthedocs.io/en/latest/
-
-Discussions
-  https://groups.google.com/forum/#!forum/sqlparse
+Bug tracker
+   https://github.com/andialbrecht/sqlparse/issues
 
-Issues/Bugs
-  https://github.com/andialbrecht/sqlparse/issues
+Documentation
+   https://sqlparse.readthedocs.io/
 
 Online Demo
-  https://sqlformat.org/
+   https://sqlformat.org/
 
 
-python-sqlparse is licensed under the BSD license.
+sqlparse is licensed under the BSD license.
 
 Parts of the code are based on pygments written by Georg Brandl and others.
 pygments-Homepage: http://pygments.org/
 
-.. |buildstatus| image:: https://secure.travis-ci.org/andialbrecht/sqlparse.png?branch=master
-.. _buildstatus: https://travis-ci.org/#!/andialbrecht/sqlparse
-.. |coverage| image:: https://coveralls.io/repos/andialbrecht/sqlparse/badge.svg?branch=master&service=github
-.. _coverage: https://coveralls.io/github/andialbrecht/sqlparse?branch=master
+.. |buildstatus| image:: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml/badge.svg
+.. _buildstatus: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml
+.. |coverage| image:: https://codecov.io/gh/andialbrecht/sqlparse/branch/master/graph/badge.svg
+.. _coverage: https://codecov.io/gh/andialbrecht/sqlparse
+.. |docs| image:: https://readthedocs.org/projects/sqlparse/badge/?version=latest
+.. _docs: https://sqlparse.readthedocs.io/en/latest/?badge=latest
+.. |packageversion| image:: https://img.shields.io/pypi/v/sqlparse?color=%2334D058&label=pypi%20package
+.. _packageversion: https://pypi.org/project/sqlparse
diff --git a/docs/source/api.rst b/docs/source/api.rst
index e9fcdaa3..40193d0b 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -46,6 +46,12 @@ The :meth:`~sqlparse.format` function accepts the following keyword arguments.
 ``reindent``
   If ``True`` the indentations of the statements are changed.
 
+``reindent_aligned``
+  If ``True`` the indentations of the statements are changed, and statements are aligned by keywords.
+
+``use_space_around_operators``
+  If ``True`` spaces are used around all operators.
+
 ``indent_tabs``
   If ``True`` tabs instead of spaces are used for indentation.
 
@@ -53,9 +59,14 @@ The :meth:`~sqlparse.format` function accepts the following keyword arguments.
   The width of the indentation, defaults to 2.
 
 ``wrap_after``
-  The column limit for wrapping comma-separated lists. If unspecified, it
+  The column limit (in characters) for wrapping comma-separated lists. If unspecified, it
   puts every item in the list on its own line.
 
 ``output_format``
   If given the output is additionally formatted to be used as a variable
   in a programming language. Allowed values are "python" and "php".
+
+``comma_first``
+  If ``True`` comma-first notation for column names is used.
+
+ 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 70bd69a5..a5be6409 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
 # python-sqlparse documentation build configuration file, created by
 # sphinx-quickstart on Thu Feb 26 08:19:28 2009.
 #
diff --git a/docs/source/extending.rst b/docs/source/extending.rst
new file mode 100644
index 00000000..0c10924b
--- /dev/null
+++ b/docs/source/extending.rst
@@ -0,0 +1,76 @@
+Extending :mod:`sqlparse`
+=========================
+
+.. module:: sqlparse
+   :synopsis: Extending parsing capability of sqlparse.
+
+The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous
+PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since
+some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse
+therefore exposes a mechanism to configure the fundamental keywords and regular
+expressions that parse the language as described below.
+
+If you find an adaptation that works for your specific use-case. Please consider
+contributing it back to the community by opening a PR on
+`GitHub <https://github.com/andialbrecht/sqlparse>`_.
+
+Configuring the Lexer
+---------------------
+
+The lexer is a singleton class that breaks down the stream of characters into language
+tokens. It does this by using a sequence of regular expressions and keywords that are
+listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar
+definitions directly, the lexer is default initialized in its method called
+``default_initialization()``. As an api user, you can adapt the Lexer configuration by
+applying your own configuration logic. To do so, start out by clearing previous
+configurations with ``.clear()``, then apply the SQL list with
+``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``.
+
+You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below),
+leaving parts out, or by making up your own master list.
+
+See the expected types of the arguments by inspecting their structure in
+``sqlparse.keywords``.
+(For compatibility with python 3.4, this library does not use type-hints.)
+
+The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as
+a keyword to the lexer:
+
+..  code-block:: python
+
+    import re
+
+    import sqlparse
+    from sqlparse import keywords
+    from sqlparse.lexer import Lexer
+
+    # get the lexer singleton object to configure it
+    lex = Lexer.get_default_instance()
+
+    # Clear the default configurations.
+    # After this call, reg-exps and keyword dictionaries need to be loaded
+    # to make the lexer functional again.
+    lex.clear()
+
+    my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
+
+    # slice the default SQL_REGEX to inject the custom object
+    lex.set_SQL_REGEX(
+        keywords.SQL_REGEX[:38]
+        + [my_regex]
+        + keywords.SQL_REGEX[38:]
+    )
+
+    # add the default keyword dictionaries
+    lex.add_keywords(keywords.KEYWORDS_COMMON)
+    lex.add_keywords(keywords.KEYWORDS_ORACLE)
+    lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
+    lex.add_keywords(keywords.KEYWORDS_HQL)
+    lex.add_keywords(keywords.KEYWORDS_MSACCESS)
+    lex.add_keywords(keywords.KEYWORDS)
+
+    # add a custom keyword dictionary
+    lex.add_keywords({'BAR', sqlparse.tokens.Keyword})
+
+    # no configuration is passed here. The lexer is used as a singleton.
+    sqlparse.parse("select * from foo zorder by bar;")
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 032318a5..e18d2b3c 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -6,33 +6,9 @@
 python-sqlparse
 ===============
 
-:mod:`sqlparse` is a non-validating SQL parser for Python.
-It provides support for parsing, splitting and formatting SQL statements.
-
-The module is compatible with Python 2.7 and Python 3 (>= 3.3)
-and released under the terms of the `New BSD license
-<https://opensource.org/licenses/BSD-3-Clause>`_.
-
-Visit the project page at https://github.com/andialbrecht/sqlparse for
-further information about this project.
-
-
-tl;dr
------
-
-.. code-block:: bash
-
-   $ pip install sqlparse
-   $ python
-   >>> import sqlparse
-   >>> print(sqlparse.format('select * from foo', reindent=True))
-   select *
-   from foo
-   >>> parsed = sqlparse.parse('select * from foo')[0]
-   >>> parsed.tokens
-   [<DML 'select' at 0x7f22c5e15368>, <Whitespace ' ' at 0x7f22c5e153b0>, <Wildcard '*' … ]
-   >>>
-
+.. include:: ../../README.rst
+   :start-after: docincludebegin
+   :end-before: Links
 
 Contents
 --------
@@ -44,7 +20,9 @@ Contents
    api
    analyzing
    ui
+   extending
    changes
+   license
    indices
 
 
@@ -59,3 +37,7 @@ Bug tracker
 
 Documentation
    https://sqlparse.readthedocs.io/
+
+Online Demo
+  https://sqlformat.org/
+
diff --git a/docs/source/intro.rst b/docs/source/intro.rst
index 1a9913b1..1d3c9498 100644
--- a/docs/source/intro.rst
+++ b/docs/source/intro.rst
@@ -6,7 +6,7 @@ Download & Installation
 -----------------------
 
 The latest released version can be obtained from the `Python Package
-Index (PyPI) <https://pypi.org/project/sqlparse/>`_. To extract the
+Index (PyPI) <https://pypi.org/project/sqlparse/>`_. To extract and
 install the module system-wide run
 
 .. code-block:: bash
@@ -48,7 +48,7 @@ SQL statements can be beautified by using the :meth:`~sqlparse.format` function.
 .. code-block:: python
 
   >>> sql = 'select * from foo where id in (select id from bar);'
-  >>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
+  >>> print(sqlparse.format(sql, reindent=True, keyword_case='upper'))
   SELECT *
   FROM foo
   WHERE id IN
@@ -120,7 +120,7 @@ To check out the latest sources of this module run
 
 to check out the latest sources from the repository.
 
-:mod:`sqlparse` is currently tested under Python 2.7, >=3.3 and pypy. Tests are
+:mod:`sqlparse` is currently tested under Python 3.5+ and PyPy. Tests are
 automatically run on each commit and for each pull request on Travis:
 https://travis-ci.org/andialbrecht/sqlparse
 
@@ -135,9 +135,4 @@ above. Ideally a Python 2 and a Python 3 version should be tested
 locally.
 
 Please file bug reports and feature requests on the project site at
-https://github.com/andialbrecht/sqlparse/issues/new or if you have
-code to contribute upload it to https://codereview.appspot.com/ and
-add albrecht.andi@googlemail.com as reviewer.
-
-For more information about the review tool and how to use it visit
-it's project page: https://github.com/rietveld-codereview/rietveld
+https://github.com/andialbrecht/sqlparse/issues/new.
diff --git a/docs/source/license.rst b/docs/source/license.rst
new file mode 100644
index 00000000..01f3963c
--- /dev/null
+++ b/docs/source/license.rst
@@ -0,0 +1,4 @@
+License
+=======
+
+.. include:: ../../LICENSE
\ No newline at end of file
diff --git a/examples/column_defs_lowlevel.py b/examples/column_defs_lowlevel.py
index eb289927..11ff4f38 100644
--- a/examples/column_defs_lowlevel.py
+++ b/examples/column_defs_lowlevel.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This example is part of python-sqlparse and is released under
@@ -17,18 +16,25 @@ def extract_definitions(token_list):
     # assumes that token_list is a parenthesis
     definitions = []
     tmp = []
-    # grab the first token, ignoring whitespace. idx=1 to skip open (
-    tidx, token = token_list.token_next(1)
-    while token and not token.match(sqlparse.tokens.Punctuation, ')'):
-        tmp.append(token)
-        # grab the next token, this times including whitespace
-        tidx, token = token_list.token_next(tidx, skip_ws=False)
-        # split on ",", except when on end of statement
-        if token and token.match(sqlparse.tokens.Punctuation, ','):
-            definitions.append(tmp)
+    par_level = 0
+    for token in token_list.flatten():
+        if token.is_whitespace:
+            continue
+        elif token.match(sqlparse.tokens.Punctuation, '('):
+            par_level += 1
+            continue
+        if token.match(sqlparse.tokens.Punctuation, ')'):
+            if par_level == 0:
+                break
+            else:
+                par_level += 1
+        elif token.match(sqlparse.tokens.Punctuation, ','):
+            if tmp:
+                definitions.append(tmp)
             tmp = []
-            tidx, token = token_list.token_next(tidx)
-    if tmp and isinstance(tmp[0], sqlparse.sql.Identifier):
+        else:
+            tmp.append(token)
+    if tmp:
         definitions.append(tmp)
     return definitions
 
@@ -46,5 +52,5 @@ def extract_definitions(token_list):
     columns = extract_definitions(par)
 
     for column in columns:
-        print('NAME: {name:10} DEFINITION: {definition}'.format(
-            name=column[0], definition=''.join(str(t) for t in column[1:])))
+        print('NAME: {name!s:12} DEFINITION: {definition}'.format(
+            name=column[0], definition=' '.join(str(t) for t in column[1:])))
diff --git a/examples/extract_table_names.py b/examples/extract_table_names.py
index a3ca3339..9837806f 100644
--- a/examples/extract_table_names.py
+++ b/examples/extract_table_names.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This example is part of python-sqlparse and is released under
@@ -32,10 +31,9 @@ def extract_from_part(parsed):
     for item in parsed.tokens:
         if from_seen:
             if is_subselect(item):
-                for x in extract_from_part(item):
-                    yield x
+                yield from extract_from_part(item)
             elif item.ttype is Keyword:
-                raise StopIteration
+                return
             else:
                 yield item
         elif item.ttype is Keyword and item.value.upper() == 'FROM':
@@ -67,4 +65,4 @@ def extract_tables(sql):
     """
 
     tables = ', '.join(extract_tables(sql))
-    print('Tables: {0}'.format(tables))
+    print('Tables: {}'.format(tables))
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..338a53ce
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,70 @@
+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+
+[project]
+name = "sqlparse"
+description = "A non-validating SQL parser."
+authors = [{name = "Andi Albrecht", email = "albrecht.andi@gmail.com"}]
+readme = "README.rst"
+dynamic = ["version"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.5",
+    "Programming Language :: Python :: 3.6",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+    "Topic :: Database",
+    "Topic :: Software Development",
+]
+requires-python = ">=3.5"
+
+[project.urls]
+Home = "https://github.com/andialbrecht/sqlparse"
+Documentation = "https://sqlparse.readthedocs.io/"
+"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/"
+Source = "https://github.com/andialbrecht/sqlparse"
+Tracker = "https://github.com/andialbrecht/sqlparse/issues"
+
+[project.scripts]
+sqlformat = "sqlparse.__main__:main"
+
+[project.optional-dependencies]
+dev = [
+    "flake8",
+    "build",
+]
+test = [
+    "pytest",
+    "pytest-cov",
+]
+doc = [
+    "sphinx",
+]
+
+[tool.flit.sdist]
+include = [
+    "docs/source/",
+    "docs/sqlformat.1",
+    "docs/Makefile",
+    "tests/*.py", "tests/files/*.sql",
+    "LICENSE",
+    "TODO",
+    "AUTHORS",
+    "CHANGELOG",
+    "Makefile",
+    "tox.ini",
+]
+
+[tool.coverage.run]
+omit = ["sqlparse/__main__.py"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index c4dee1df..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,20 +0,0 @@
-[bdist_wheel]
-universal = 1
-
-[metadata]
-license_file = LICENSE
-
-[tool:pytest]
-xfail_strict = True
-
-[flake8]
-exclude =
-    sqlparse/compat.py
-ignore =
-    W503,
-    E731
-
-[coverage:run]
-branch = False
-omit =
-    sqlparse/__main__.py
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 345d0ce4..00000000
--- a/setup.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
-# <see AUTHORS file>
-#
-# This setup script is part of python-sqlparse and is released under
-# the BSD License: https://opensource.org/licenses/BSD-3-Clause
-
-import re
-
-from setuptools import setup, find_packages
-
-
-def get_version():
-    """Parse __init__.py for version number instead of importing the file."""
-    VERSIONFILE = 'sqlparse/__init__.py'
-    VSRE = r'^__version__ = [\'"]([^\'"]*)[\'"]'
-    with open(VERSIONFILE) as f:
-        verstrline = f.read()
-    mo = re.search(VSRE, verstrline, re.M)
-    if mo:
-        return mo.group(1)
-    raise RuntimeError('Unable to find version in {fn}'.format(fn=VERSIONFILE))
-
-
-LONG_DESCRIPTION = """
-``sqlparse`` is a non-validating SQL parser module.
-It provides support for parsing, splitting and formatting SQL statements.
-
-Visit the `project page <https://github.com/andialbrecht/sqlparse>`_ for
-additional information and documentation.
-
-**Example Usage**
-
-
-Splitting SQL statements::
-
-   >>> import sqlparse
-   >>> sqlparse.split('select * from foo; select * from bar;')
-   [u'select * from foo; ', u'select * from bar;']
-
-
-Formatting statements::
-
-   >>> sql = 'select * from foo where id in (select id from bar);'
-   >>> print sqlparse.format(sql, reindent=True, keyword_case='upper')
-   SELECT *
-   FROM foo
-   WHERE id IN
-     (SELECT id
-      FROM bar);
-
-
-Parsing::
-
-   >>> sql = 'select * from someschema.mytable where id = 1'
-   >>> res = sqlparse.parse(sql)
-   >>> res
-   (<Statement 'select...' at 0x9ad08ec>,)
-   >>> stmt = res[0]
-   >>> str(stmt)  # converting it back to unicode
-   'select * from someschema.mytable where id = 1'
-   >>> # This is how the internal representation looks like:
-   >>> stmt.tokens
-   (<DML 'select' at 0x9b63c34>,
-    <Whitespace ' ' at 0x9b63e8c>,
-    <Operator '*' at 0x9b63e64>,
-    <Whitespace ' ' at 0x9b63c5c>,
-    <Keyword 'from' at 0x9b63c84>,
-    <Whitespace ' ' at 0x9b63cd4>,
-    <Identifier 'somes...' at 0x9b5c62c>,
-    <Whitespace ' ' at 0x9b63f04>,
-    <Where 'where ...' at 0x9b5caac>)
-
-"""
-
-setup(
-    name='sqlparse',
-    version=get_version(),
-    author='Andi Albrecht',
-    author_email='albrecht.andi@gmail.com',
-    url='https://github.com/andialbrecht/sqlparse',
-    description='Non-validating SQL parser',
-    long_description=LONG_DESCRIPTION,
-    license='BSD',
-    python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: BSD License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Topic :: Database',
-        'Topic :: Software Development',
-    ],
-    packages=find_packages(exclude=('tests',)),
-    entry_points={
-        'console_scripts': [
-            'sqlformat = sqlparse.__main__:main',
-        ]
-    },
-)
diff --git a/sqlparse/__init__.py b/sqlparse/__init__.py
index 69044d6a..122595b3 100644
--- a/sqlparse/__init__.py
+++ b/sqlparse/__init__.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -16,9 +15,8 @@
 from sqlparse import filters
 from sqlparse import formatter
 
-from sqlparse.compat import text_type
 
-__version__ = '0.3.0'
+__version__ = '0.4.4'
 __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']
 
 
@@ -58,7 +56,7 @@ def format(sql, encoding=None, **options):
     options = formatter.validate_options(options)
     stack = formatter.build_filter_stack(stack, options)
     stack.postprocess.append(filters.SerializerUnicode())
-    return u''.join(stack.run(sql, encoding))
+    return ''.join(stack.run(sql, encoding))
 
 
 def split(sql, encoding=None):
@@ -69,4 +67,4 @@ def split(sql, encoding=None):
     :returns: A list of strings.
     """
     stack = engine.FilterStack()
-    return [text_type(stmt).strip() for stmt in stack.run(sql, encoding)]
+    return [str(stmt).strip() for stmt in stack.run(sql, encoding)]
diff --git a/sqlparse/__main__.py b/sqlparse/__main__.py
index 867d75d5..2bf25132 100644
--- a/sqlparse/__main__.py
+++ b/sqlparse/__main__.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
diff --git a/sqlparse/cli.py b/sqlparse/cli.py
index 25555a59..7a8aacbf 100755
--- a/sqlparse/cli.py
+++ b/sqlparse/cli.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -23,10 +22,8 @@
 import argparse
 import sys
 from io import TextIOWrapper
-from codecs import open, getreader
 
 import sqlparse
-from sqlparse.compat import PY2
 from sqlparse.exceptions import SQLParseError
 
 
@@ -62,16 +59,16 @@ def create_parser():
         metavar='CHOICE',
         dest='keyword_case',
         choices=_CASE_CHOICES,
-        help='change case of keywords, CHOICE is one of {0}'.format(
-            ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES)))
+        help='change case of keywords, CHOICE is one of {}'.format(
+            ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
 
     group.add_argument(
         '-i', '--identifiers',
         metavar='CHOICE',
         dest='identifier_case',
         choices=_CASE_CHOICES,
-        help='change case of identifiers, CHOICE is one of {0}'.format(
-            ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES)))
+        help='change case of identifiers, CHOICE is one of {}'.format(
+            ', '.join('"{}"'.format(x) for x in _CASE_CHOICES)))
 
     group.add_argument(
         '-l', '--language',
@@ -153,7 +150,7 @@ def create_parser():
 
 def _error(msg):
     """Print msg and optionally exit with return code exit_."""
-    sys.stderr.write(u'[ERROR] {0}\n'.format(msg))
+    sys.stderr.write('[ERROR] {}\n'.format(msg))
     return 1
 
 
@@ -162,29 +159,26 @@ def main(args=None):
     args = parser.parse_args(args)
 
     if args.filename == '-':  # read from stdin
-        if PY2:
-            data = getreader(args.encoding)(sys.stdin).read()
-        else:
-            wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
-            try:
-                data = wrapper.read()
-            finally:
-                wrapper.detach()
+        wrapper = TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
+        try:
+            data = wrapper.read()
+        finally:
+            wrapper.detach()
     else:
         try:
-            with open(args.filename, 'r', args.encoding) as f:
+            with open(args.filename, encoding=args.encoding) as f:
                 data = ''.join(f.readlines())
-        except IOError as e:
+        except OSError as e:
             return _error(
-                u'Failed to read {0}: {1}'.format(args.filename, e))
+                'Failed to read {}: {}'.format(args.filename, e))
 
     close_stream = False
     if args.outfile:
         try:
-            stream = open(args.outfile, 'w', args.encoding)
+            stream = open(args.outfile, 'w', encoding=args.encoding)
             close_stream = True
-        except IOError as e:
-            return _error(u'Failed to open {0}: {1}'.format(args.outfile, e))
+        except OSError as e:
+            return _error('Failed to open {}: {}'.format(args.outfile, e))
     else:
         stream = sys.stdout
 
@@ -192,7 +186,7 @@ def main(args=None):
     try:
         formatter_opts = sqlparse.formatter.validate_options(formatter_opts)
     except SQLParseError as e:
-        return _error(u'Invalid options: {0}'.format(e))
+        return _error('Invalid options: {}'.format(e))
 
     s = sqlparse.format(data, **formatter_opts)
     stream.write(s)
diff --git a/sqlparse/compat.py b/sqlparse/compat.py
deleted file mode 100644
index d2214bed..00000000
--- a/sqlparse/compat.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
-# <see AUTHORS file>
-#
-# This module is part of python-sqlparse and is released under
-# the BSD License: https://opensource.org/licenses/BSD-3-Clause
-
-"""Python 2/3 compatibility.
-
-This module only exists to avoid a dependency on six
-for very trivial stuff. We only need to take care of
-string types, buffers and metaclasses.
-
-Parts of the code is copied directly from six:
-https://bitbucket.org/gutworth/six
-"""
-
-import sys
-from io import TextIOBase
-
-PY2 = sys.version_info[0] == 2
-PY3 = sys.version_info[0] == 3
-
-
-if PY3:
-    def unicode_compatible(cls):
-        return cls
-
-    text_type = str
-    string_types = (str,)
-    from io import StringIO
-    file_types = (StringIO, TextIOBase)
-
-
-elif PY2:
-    def unicode_compatible(cls):
-        cls.__unicode__ = cls.__str__
-        cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
-        return cls
-
-    text_type = unicode
-    string_types = (str, unicode,)
-    from StringIO import StringIO
-    file_types = (file, StringIO, TextIOBase)
diff --git a/sqlparse/engine/__init__.py b/sqlparse/engine/__init__.py
index 0b3f3eb5..6d54d514 100644
--- a/sqlparse/engine/__init__.py
+++ b/sqlparse/engine/__init__.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
diff --git a/sqlparse/engine/filter_stack.py b/sqlparse/engine/filter_stack.py
index fc77fd64..9665a224 100644
--- a/sqlparse/engine/filter_stack.py
+++ b/sqlparse/engine/filter_stack.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -13,7 +12,7 @@
 from sqlparse.engine.statement_splitter import StatementSplitter
 
 
-class FilterStack(object):
+class FilterStack:
     def __init__(self):
         self.preprocess = []
         self.stmtprocess = []
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index afc91236..86d8fc64 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -88,6 +87,56 @@ def post(tlist, pidx, tidx, nidx):
     _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
 
 
+def group_tzcasts(tlist):
+    def match(token):
+        return token.ttype == T.Keyword.TZCast
+
+    def valid_prev(token):
+        return token is not None
+
+    def valid_next(token):
+        return token is not None and (
+            token.is_whitespace
+            or token.match(T.Keyword, 'AS')
+            or token.match(*sql.TypedLiteral.M_CLOSE)
+        )
+
+    def post(tlist, pidx, tidx, nidx):
+        return pidx, nidx
+
+    _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
+
+
+def group_typed_literal(tlist):
+    # definitely not complete, see e.g.:
+    # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax
+    # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals
+    # https://www.postgresql.org/docs/9.1/datatype-datetime.html
+    # https://www.postgresql.org/docs/9.1/functions-datetime.html
+    def match(token):
+        return imt(token, m=sql.TypedLiteral.M_OPEN)
+
+    def match_to_extend(token):
+        return isinstance(token, sql.TypedLiteral)
+
+    def valid_prev(token):
+        return token is not None
+
+    def valid_next(token):
+        return token is not None and token.match(*sql.TypedLiteral.M_CLOSE)
+
+    def valid_final(token):
+        return token is not None and token.match(*sql.TypedLiteral.M_EXTEND)
+
+    def post(tlist, pidx, tidx, nidx):
+        return tidx, nidx
+
+    _group(tlist, sql.TypedLiteral, match, valid_prev, valid_next,
+           post, extend=False)
+    _group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final,
+           post, extend=True)
+
+
 def group_period(tlist):
     def match(token):
         return token.match(T.Punctuation, '.')
@@ -121,7 +170,7 @@ def valid_prev(token):
         return token.normalized == 'NULL' or not token.is_keyword
 
     def valid_next(token):
-        ttypes = T.DML, T.DDL
+        ttypes = T.DML, T.DDL, T.CTE
         return not imt(token, t=ttypes) and token is not None
 
     def post(tlist, pidx, tidx, nidx):
@@ -149,7 +198,7 @@ def post(tlist, pidx, tidx, nidx):
 
 def group_comparison(tlist):
     sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
-              sql.Operation)
+              sql.Operation, sql.TypedLiteral)
     ttypes = T_NUMERICAL + T_STRING + T_NAME
 
     def match(token):
@@ -204,13 +253,16 @@ def post(tlist, pidx, tidx, nidx):
 def group_operator(tlist):
     ttypes = T_NUMERICAL + T_STRING + T_NAME
     sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
-              sql.Identifier, sql.Operation)
+              sql.Identifier, sql.Operation, sql.TypedLiteral)
 
     def match(token):
         return imt(token, t=(T.Operator, T.Wildcard))
 
     def valid(token):
-        return imt(token, i=sqlcls, t=ttypes)
+        return imt(token, i=sqlcls, t=ttypes) \
+            or (token and token.match(
+                T.Keyword,
+                ('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP')))
 
     def post(tlist, pidx, tidx, nidx):
         tlist[tidx].ttype = T.Operator
@@ -289,12 +341,15 @@ def group_aliased(tlist):
 def group_functions(tlist):
     has_create = False
     has_table = False
+    has_as = False
     for tmp_token in tlist.tokens:
-        if tmp_token.value == 'CREATE':
+        if tmp_token.value.upper() == 'CREATE':
             has_create = True
-        if tmp_token.value == 'TABLE':
+        if tmp_token.value.upper() == 'TABLE':
             has_table = True
-    if has_create and has_table:
+        if tmp_token.value == 'AS':
+            has_as = True
+    if has_create and has_table and not has_as:
         return
 
     tidx, token = tlist.token_next_by(t=T.Name)
@@ -358,6 +413,8 @@ def group(stmt):
         group_identifier,
         group_order,
         group_typecasts,
+        group_tzcasts,
+        group_typed_literal,
         group_operator,
         group_comparison,
         group_as,
@@ -385,6 +442,8 @@ def _group(tlist, cls, match,
     pidx, prev_ = None, None
     for idx, token in enumerate(list(tlist)):
         tidx = idx - tidx_offset
+        if tidx < 0:  # tidx shouldn't get negative
+            continue
 
         if token.is_whitespace:
             continue
diff --git a/sqlparse/engine/statement_splitter.py b/sqlparse/engine/statement_splitter.py
index 444b46a9..a991959a 100644
--- a/sqlparse/engine/statement_splitter.py
+++ b/sqlparse/engine/statement_splitter.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -9,7 +8,7 @@
 from sqlparse import sql, tokens as T
 
 
-class StatementSplitter(object):
+class StatementSplitter:
     """Filter that split stream at individual statements"""
 
     def __init__(self):
@@ -27,11 +26,13 @@ def _reset(self):
 
     def _change_splitlevel(self, ttype, value):
         """Get the new split level (increase, decrease or remain equal)"""
-        # ANSI
-        # if normal token return
-        # wouldn't parenthesis increase/decrease a level?
-        # no, inside a parenthesis can't start new statement
-        if ttype not in T.Keyword:
+
+        # parenthesis increase/decrease a level
+        if ttype is T.Punctuation and value == '(':
+            return 1
+        elif ttype is T.Punctuation and value == ')':
+            return -1
+        elif ttype not in T.Keyword:  # if normal token return
             return 0
 
         # Everything after here is ttype = T.Keyword
@@ -65,7 +66,7 @@ def _change_splitlevel(self, ttype, value):
             self._begin_depth = max(0, self._begin_depth - 1)
             return -1
 
-        if (unified in ('IF', 'FOR', 'WHILE')
+        if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
                 and self._is_create and self._begin_depth > 0):
             return 1
 
@@ -102,5 +103,5 @@ def process(self, stream):
                 self.consume_ws = True
 
         # Yield pending statement (if any)
-        if self.tokens:
+        if self.tokens and not all(t.is_whitespace for t in self.tokens):
             yield sql.Statement(self.tokens)
diff --git a/sqlparse/exceptions.py b/sqlparse/exceptions.py
index 01e60f7b..11285da0 100644
--- a/sqlparse/exceptions.py
+++ b/sqlparse/exceptions.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
diff --git a/sqlparse/filters/__init__.py b/sqlparse/filters/__init__.py
index c60d84d7..5bd6b325 100644
--- a/sqlparse/filters/__init__.py
+++ b/sqlparse/filters/__init__.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py
index d3433c94..dc609263 100644
--- a/sqlparse/filters/aligned_indent.py
+++ b/sqlparse/filters/aligned_indent.py
@@ -1,17 +1,15 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 from sqlparse import sql, tokens as T
-from sqlparse.compat import text_type
 from sqlparse.utils import offset, indent
 
 
-class AlignedIndentFilter(object):
+class AlignedIndentFilter:
     join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
                   r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
                   r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
@@ -73,7 +71,7 @@ def _process_case(self, tlist):
         end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1]
         cases.append((None, [end_token]))
 
-        condition_width = [len(' '.join(map(text_type, cond))) if cond else 0
+        condition_width = [len(' '.join(map(str, cond))) if cond else 0
                            for cond, _ in cases]
         max_cond_width = max(condition_width)
 
@@ -82,8 +80,7 @@ def _process_case(self, tlist):
             stmt = cond[0] if cond else value[0]
 
             if i > 0:
-                tlist.insert_before(stmt, self.nl(
-                    offset_ - len(text_type(stmt))))
+                tlist.insert_before(stmt, self.nl(offset_ - len(str(stmt))))
             if cond:
                 ws = sql.Token(T.Whitespace, self.char * (
                     max_cond_width - condition_width[i]))
@@ -105,12 +102,12 @@ def _split_kwds(self, tlist):
             # joins, group/order by are special case. only consider the first
             # word as aligner
             if (
-                token.match(T.Keyword, self.join_words, regex=True) or
-                token.match(T.Keyword, self.by_words, regex=True)
+                token.match(T.Keyword, self.join_words, regex=True)
+                or token.match(T.Keyword, self.by_words, regex=True)
             ):
                 token_indent = token.value.split()[0]
             else:
-                token_indent = text_type(token)
+                token_indent = str(token)
             tlist.insert_before(token, self.nl(token_indent))
             tidx += 1
             tidx, token = self._next_token(tlist, tidx)
diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py
index 52b86170..6905f2d6 100644
--- a/sqlparse/filters/others.py
+++ b/sqlparse/filters/others.py
@@ -1,22 +1,36 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
+import re
+
 from sqlparse import sql, tokens as T
 from sqlparse.utils import split_unquoted_newlines
 
 
-class StripCommentsFilter(object):
+class StripCommentsFilter:
+
     @staticmethod
     def _process(tlist):
         def get_next_comment():
             # TODO(andi) Comment types should be unified, see related issue38
             return tlist.token_next_by(i=sql.Comment, t=T.Comment)
 
+        def _get_insert_token(token):
+            """Returns either a whitespace or the line breaks from token."""
+            # See issue484 why line breaks should be preserved.
+            # Note: The actual value for a line break is replaced by \n
+            # in SerializerUnicode which will be executed in the
+            # postprocessing state.
+            m = re.search(r'((\r|\n)+) *$', token.value)
+            if m is not None:
+                return sql.Token(T.Whitespace.Newline, m.groups()[0])
+            else:
+                return sql.Token(T.Whitespace, ' ')
+
         tidx, token = get_next_comment()
         while token:
             pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
@@ -27,15 +41,12 @@ def get_next_comment():
                     or prev_.is_whitespace or prev_.match(T.Punctuation, '(')
                     or next_.is_whitespace or next_.match(T.Punctuation, ')')):
                 # Insert a whitespace to ensure the following SQL produces
-                # a valid SQL (see #425). For example:
-                #
-                # Before: select a--comment\nfrom foo
-                # After: select a from foo
-                if prev_ is not None and next_ is None:
-                    tlist.tokens.insert(tidx, sql.Token(T.Whitespace, ' '))
+                # a valid SQL (see #425).
+                if prev_ is not None and not prev_.match(T.Punctuation, '('):
+                    tlist.tokens.insert(tidx, _get_insert_token(token))
                 tlist.tokens.remove(token)
             else:
-                tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ')
+                tlist.tokens[tidx] = _get_insert_token(token)
 
             tidx, token = get_next_comment()
 
@@ -45,7 +56,7 @@ def process(self, stmt):
         return stmt
 
 
-class StripWhitespaceFilter(object):
+class StripWhitespaceFilter:
     def _stripws(self, tlist):
         func_name = '_stripws_{cls}'.format(cls=type(tlist).__name__)
         func = getattr(self, func_name.lower(), self._stripws_default)
@@ -90,7 +101,7 @@ def process(self, stmt, depth=0):
         return stmt
 
 
-class SpacesAroundOperatorsFilter(object):
+class SpacesAroundOperatorsFilter:
     @staticmethod
     def _process(tlist):
 
@@ -118,7 +129,7 @@ def process(self, stmt):
 # ---------------------------
 # postprocess
 
-class SerializerUnicode(object):
+class SerializerUnicode:
     @staticmethod
     def process(stmt):
         lines = split_unquoted_newlines(stmt)
diff --git a/sqlparse/filters/output.py b/sqlparse/filters/output.py
index 3fbc46d1..253537e0 100644
--- a/sqlparse/filters/output.py
+++ b/sqlparse/filters/output.py
@@ -1,16 +1,14 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 from sqlparse import sql, tokens as T
-from sqlparse.compat import text_type
 
 
-class OutputFilter(object):
+class OutputFilter:
     varname_prefix = ''
 
     def __init__(self, varname='sql'):
@@ -23,11 +21,11 @@ def _process(self, stream, varname, has_nl):
     def process(self, stmt):
         self.count += 1
         if self.count > 1:
-            varname = u'{f.varname}{f.count}'.format(f=self)
+            varname = '{f.varname}{f.count}'.format(f=self)
         else:
             varname = self.varname
 
-        has_nl = len(text_type(stmt).strip().splitlines()) > 1
+        has_nl = len(str(stmt).strip().splitlines()) > 1
         stmt.tokens = self._process(stmt.tokens, varname, has_nl)
         return stmt
 
diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py
index acec8ca4..9fb232f0 100644
--- a/sqlparse/filters/reindent.py
+++ b/sqlparse/filters/reindent.py
@@ -1,17 +1,15 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 from sqlparse import sql, tokens as T
-from sqlparse.compat import text_type
 from sqlparse.utils import offset, indent
 
 
-class ReindentFilter(object):
+class ReindentFilter:
     def __init__(self, width=2, char=' ', wrap_after=0, n='\n',
                  comma_first=False, indent_after_first=False,
                  indent_columns=False):
@@ -42,7 +40,7 @@ def leading_ws(self):
         return self.offset + self.indent * self.width
 
     def _get_offset(self, token):
-        raw = u''.join(map(text_type, self._flatten_up_to_token(token)))
+        raw = ''.join(map(str, self._flatten_up_to_token(token)))
         line = (raw or '\n').splitlines()[-1]
         # Now take current offset into account and return relative offset.
         return len(line) - len(self.char * self.leading_ws)
@@ -71,7 +69,7 @@ def _split_kwds(self, tlist):
         tidx, token = self._next_token(tlist)
         while token:
             pidx, prev_ = tlist.token_prev(tidx, skip_ws=False)
-            uprev = text_type(prev_)
+            uprev = str(prev_)
 
             if prev_ and prev_.is_whitespace:
                 del tlist.tokens[pidx]
@@ -104,9 +102,10 @@ def _process(self, tlist):
 
     def _process_where(self, tlist):
         tidx, token = tlist.token_next_by(m=(T.Keyword, 'WHERE'))
+        if not token:
+            return
         # issue121, errors in statement fixed??
         tlist.insert_before(tidx, self.nl())
-
         with indent(self):
             self._process_default(tlist)
 
@@ -114,6 +113,8 @@ def _process_parenthesis(self, tlist):
         ttypes = T.Keyword.DML, T.Keyword.DDL
         _, is_dml_dll = tlist.token_next_by(t=ttypes)
         fidx, first = tlist.token_next_by(m=sql.Parenthesis.M_OPEN)
+        if first is None:
+            return
 
         with indent(self, 1 if is_dml_dll else 0):
             tlist.tokens.insert(0, self.nl()) if is_dml_dll else None
@@ -234,7 +235,7 @@ def process(self, stmt):
         self._process(stmt)
 
         if self._last_stmt is not None:
-            nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n'
+            nl = '\n' if str(self._last_stmt).endswith('\n') else '\n\n'
             stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
 
         self._last_stmt = stmt
diff --git a/sqlparse/filters/right_margin.py b/sqlparse/filters/right_margin.py
index 16581389..3e670562 100644
--- a/sqlparse/filters/right_margin.py
+++ b/sqlparse/filters/right_margin.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -9,11 +8,10 @@
 import re
 
 from sqlparse import sql, tokens as T
-from sqlparse.compat import text_type
 
 
 # FIXME: Doesn't work
-class RightMarginFilter(object):
+class RightMarginFilter:
     keep_together = (
         # sql.TypeCast, sql.Identifier, sql.Alias,
     )
@@ -32,14 +30,14 @@ def _process(self, group, stream):
             elif token.is_group and type(token) not in self.keep_together:
                 token.tokens = self._process(token, token.tokens)
             else:
-                val = text_type(token)
+                val = str(token)
                 if len(self.line) + len(val) > self.width:
                     match = re.search(r'^ +', self.line)
                     if match is not None:
                         indent = match.group()
                     else:
                         indent = ''
-                    yield sql.Token(T.Whitespace, '\n{0}'.format(indent))
+                    yield sql.Token(T.Whitespace, '\n{}'.format(indent))
                     self.line = indent
                 self.line += val
             yield token
diff --git a/sqlparse/filters/tokens.py b/sqlparse/filters/tokens.py
index 93182b19..cc00a844 100644
--- a/sqlparse/filters/tokens.py
+++ b/sqlparse/filters/tokens.py
@@ -1,21 +1,19 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 from sqlparse import tokens as T
-from sqlparse.compat import text_type
 
 
-class _CaseFilter(object):
+class _CaseFilter:
     ttype = None
 
     def __init__(self, case=None):
         case = case or 'upper'
-        self.convert = getattr(text_type, case)
+        self.convert = getattr(str, case)
 
     def process(self, stream):
         for ttype, value in stream:
@@ -38,7 +36,7 @@ def process(self, stream):
             yield ttype, value
 
 
-class TruncateStringFilter(object):
+class TruncateStringFilter:
     def __init__(self, width, char):
         self.width = width
         self.char = char
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
index 89627596..1d1871cf 100644
--- a/sqlparse/formatter.py
+++ b/sqlparse/formatter.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -17,32 +16,32 @@ def validate_options(options):
     kwcase = options.get('keyword_case')
     if kwcase not in [None, 'upper', 'lower', 'capitalize']:
         raise SQLParseError('Invalid value for keyword_case: '
-                            '{0!r}'.format(kwcase))
+                            '{!r}'.format(kwcase))
 
     idcase = options.get('identifier_case')
     if idcase not in [None, 'upper', 'lower', 'capitalize']:
         raise SQLParseError('Invalid value for identifier_case: '
-                            '{0!r}'.format(idcase))
+                            '{!r}'.format(idcase))
 
     ofrmt = options.get('output_format')
     if ofrmt not in [None, 'sql', 'python', 'php']:
         raise SQLParseError('Unknown output format: '
-                            '{0!r}'.format(ofrmt))
+                            '{!r}'.format(ofrmt))
 
     strip_comments = options.get('strip_comments', False)
     if strip_comments not in [True, False]:
         raise SQLParseError('Invalid value for strip_comments: '
-                            '{0!r}'.format(strip_comments))
+                            '{!r}'.format(strip_comments))
 
     space_around_operators = options.get('use_space_around_operators', False)
     if space_around_operators not in [True, False]:
         raise SQLParseError('Invalid value for use_space_around_operators: '
-                            '{0!r}'.format(space_around_operators))
+                            '{!r}'.format(space_around_operators))
 
     strip_ws = options.get('strip_whitespace', False)
     if strip_ws not in [True, False]:
         raise SQLParseError('Invalid value for strip_whitespace: '
-                            '{0!r}'.format(strip_ws))
+                            '{!r}'.format(strip_ws))
 
     truncate_strings = options.get('truncate_strings')
     if truncate_strings is not None:
@@ -50,17 +49,17 @@ def validate_options(options):
             truncate_strings = int(truncate_strings)
         except (ValueError, TypeError):
             raise SQLParseError('Invalid value for truncate_strings: '
-                                '{0!r}'.format(truncate_strings))
+                                '{!r}'.format(truncate_strings))
         if truncate_strings <= 1:
             raise SQLParseError('Invalid value for truncate_strings: '
-                                '{0!r}'.format(truncate_strings))
+                                '{!r}'.format(truncate_strings))
         options['truncate_strings'] = truncate_strings
         options['truncate_char'] = options.get('truncate_char', '[...]')
 
     indent_columns = options.get('indent_columns', False)
     if indent_columns not in [True, False]:
         raise SQLParseError('Invalid value for indent_columns: '
-                            '{0!r}'.format(indent_columns))
+                            '{!r}'.format(indent_columns))
     elif indent_columns:
         options['reindent'] = True  # enforce reindent
     options['indent_columns'] = indent_columns
@@ -68,27 +67,27 @@ def validate_options(options):
     reindent = options.get('reindent', False)
     if reindent not in [True, False]:
         raise SQLParseError('Invalid value for reindent: '
-                            '{0!r}'.format(reindent))
+                            '{!r}'.format(reindent))
     elif reindent:
         options['strip_whitespace'] = True
 
     reindent_aligned = options.get('reindent_aligned', False)
     if reindent_aligned not in [True, False]:
         raise SQLParseError('Invalid value for reindent_aligned: '
-                            '{0!r}'.format(reindent))
+                            '{!r}'.format(reindent))
     elif reindent_aligned:
         options['strip_whitespace'] = True
 
     indent_after_first = options.get('indent_after_first', False)
     if indent_after_first not in [True, False]:
         raise SQLParseError('Invalid value for indent_after_first: '
-                            '{0!r}'.format(indent_after_first))
+                            '{!r}'.format(indent_after_first))
     options['indent_after_first'] = indent_after_first
 
     indent_tabs = options.get('indent_tabs', False)
     if indent_tabs not in [True, False]:
         raise SQLParseError('Invalid value for indent_tabs: '
-                            '{0!r}'.format(indent_tabs))
+                            '{!r}'.format(indent_tabs))
     elif indent_tabs:
         options['indent_char'] = '\t'
     else:
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
index 602051a0..b45f3e0f 100644
--- a/sqlparse/keywords.py
+++ b/sqlparse/keywords.py
@@ -1,97 +1,96 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
-import re
-
 from sqlparse import tokens
 
-
-def is_keyword(value):
-    val = value.upper()
-    return (KEYWORDS_COMMON.get(val)
-            or KEYWORDS_ORACLE.get(val)
-            or KEYWORDS_PLPGSQL.get(val)
-            or KEYWORDS.get(val, tokens.Name)), value
-
-
-SQL_REGEX = {
-    'root': [
-        (r'(--|//|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
-        (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
-
-        (r'(--|//|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
-        (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
-
-        (r'(\r\n|\r|\n)', tokens.Newline),
-        (r'\s+?', tokens.Whitespace),
-
-        (r':=', tokens.Assignment),
-        (r'::', tokens.Punctuation),
-
-        (r'\*', tokens.Wildcard),
-
-        (r"`(``|[^`])*`", tokens.Name),
-        (r"´(´´|[^´])*´", tokens.Name),
-        (r'(\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
-
-        (r'\?', tokens.Name.Placeholder),
-        (r'%(\(\w+\))?s', tokens.Name.Placeholder),
-        (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
-
-        (r'\\\w+', tokens.Command),
-
-        # FIXME(andi): VALUES shouldn't be listed here
-        # see https://github.com/andialbrecht/sqlparse/pull/64
-        # IN is special, it may be followed by a parenthesis, but
-        # is never a function, see issue183
-        (r'(CASE|IN|VALUES|USING|FROM)\b', tokens.Keyword),
-
-        (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
-
-        # see issue #39
-        # Spaces around period `schema . name` are valid identifier
-        # TODO: Spaces before period not implemented
-        (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name),  # 'Name'   .
-        # FIXME(atronah): never match,
-        # because `re.match` doesn't work with look-behind regexp feature
-        (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
-        (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
-        (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
-        (r'-?\d*(\.\d+)?E-?\d+', tokens.Number.Float),
-        (r'-?(\d+(\.\d*)|\.\d+)', tokens.Number.Float),
-        (r'-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
-        (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single),
-        # not a real string literal in ANSI SQL:
-        (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol),
-        (r'(""|".*?[^\\]")', tokens.String.Symbol),
-        # sqlite names can be escaped with [square brackets]. left bracket
-        # cannot be preceded by word character or a right bracket --
-        # otherwise it's probably an array index
-        (r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
-        (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
-         r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
-        (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
-        (r'NOT\s+NULL\b', tokens.Keyword),
-        (r'UNION\s+ALL\b', tokens.Keyword),
-        (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
-        (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
-        (r'GROUP\s+BY\b', tokens.Keyword),
-        (r'ORDER\s+BY\b', tokens.Keyword),
-
-        (r'[0-9_A-ZÀ-Ü][_$#\w]*', is_keyword),
-
-        (r'[;:()\[\],\.]', tokens.Punctuation),
-        (r'[<>=~!]+', tokens.Operator.Comparison),
-        (r'[+/@#%^&|`?^-]+', tokens.Operator),
-    ]}
-
-FLAGS = re.IGNORECASE | re.UNICODE
-SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']]
+# object() only supports "is" and is useful as a marker
+# use this marker to specify that the given regex in SQL_REGEX
+# shall be processed further through a lookup in the KEYWORDS dictionaries
+PROCESS_AS_KEYWORD = object()
+
+
+SQL_REGEX = [
+    (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint),
+    (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint),
+
+    (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single),
+    (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline),
+
+    (r'(\r\n|\r|\n)', tokens.Newline),
+    (r'\s+?', tokens.Whitespace),
+
+    (r':=', tokens.Assignment),
+    (r'::', tokens.Punctuation),
+
+    (r'\*', tokens.Wildcard),
+
+    (r"`(``|[^`])*`", tokens.Name),
+    (r"´(´´|[^´])*´", tokens.Name),
+    (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal),
+
+    (r'\?', tokens.Name.Placeholder),
+    (r'%(\(\w+\))?s', tokens.Name.Placeholder),
+    (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder),
+
+    (r'\\\w+', tokens.Command),
+
+    # FIXME(andi): VALUES shouldn't be listed here
+    # see https://github.com/andialbrecht/sqlparse/pull/64
+    # AS and IN are special, it may be followed by a parenthesis, but
+    # are never functions, see issue183 and issue507
+    (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword),
+
+    (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name),
+
+    # see issue #39
+    # Spaces around period `schema . name` are valid identifier
+    # TODO: Spaces before period not implemented
+    (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name),  # 'Name'.
+    # FIXME(atronah): never match,
+    # because `re.match` doesn't work with look-behind regexp feature
+    (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name),  # .'Name'
+    (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name),  # side effect: change kw to func
+    (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal),
+    (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])',
+     tokens.Number.Float),
+    (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer),
+    (r"'(''|\\'|[^'])*'", tokens.String.Single),
+    # not a real string literal in ANSI SQL:
+    (r'"(""|\\"|[^"])*"', tokens.String.Symbol),
+    (r'(""|".*?[^\\]")', tokens.String.Symbol),
+    # sqlite names can be escaped with [square brackets]. left bracket
+    # cannot be preceded by word character or a right bracket --
+    # otherwise it's probably an array index
+    (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
+    (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
+     r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
+    (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
+    (r'NOT\s+NULL\b', tokens.Keyword),
+    (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword),
+    (r'UNION\s+ALL\b', tokens.Keyword),
+    (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
+    (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+    (r'GROUP\s+BY\b', tokens.Keyword),
+    (r'ORDER\s+BY\b', tokens.Keyword),
+    (r'HANDLER\s+FOR\b', tokens.Keyword),
+    (r'(LATERAL\s+VIEW\s+)'
+     r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b',
+     tokens.Keyword),
+    (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast),
+    (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison),
+    (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison),
+    # Check for keywords, also returns tokens.Name if regex matches
+    # but the match isn't a keyword.
+    (r'\w[$#\w]*', PROCESS_AS_KEYWORD),
+    (r'[;:()\[\],\.]', tokens.Punctuation),
+    (r'[<>=~!]+', tokens.Operator.Comparison),
+    (r'[+/@#%^&|^-]+', tokens.Operator),
+]
 
 KEYWORDS = {
     'ABORT': tokens.Keyword,
@@ -191,7 +190,7 @@ def is_keyword(value):
     'CONVERSION': tokens.Keyword,
     'CONVERT': tokens.Keyword,
     'COPY': tokens.Keyword,
-    'CORRESPONTING': tokens.Keyword,
+    'CORRESPONDING': tokens.Keyword,
     'COUNT': tokens.Keyword,
     'CREATEDB': tokens.Keyword,
     'CREATEUSER': tokens.Keyword,
@@ -235,6 +234,7 @@ def is_keyword(value):
     'DISABLE': tokens.Keyword,
     'DISCONNECT': tokens.Keyword,
     'DISPATCH': tokens.Keyword,
+    'DIV': tokens.Operator,
     'DO': tokens.Keyword,
     'DOMAIN': tokens.Keyword,
     'DYNAMIC': tokens.Keyword,
@@ -289,7 +289,6 @@ def is_keyword(value):
     'GRANTED': tokens.Keyword,
     'GROUPING': tokens.Keyword,
 
-    'HANDLER': tokens.Keyword,
     'HAVING': tokens.Keyword,
     'HIERARCHY': tokens.Keyword,
     'HOLD': tokens.Keyword,
@@ -309,7 +308,7 @@ def is_keyword(value):
     'INCREMENT': tokens.Keyword,
     'INDEX': tokens.Keyword,
 
-    'INDITCATOR': tokens.Keyword,
+    'INDICATOR': tokens.Keyword,
     'INFIX': tokens.Keyword,
     'INHERITS': tokens.Keyword,
     'INITIAL': tokens.Keyword,
@@ -426,11 +425,11 @@ def is_keyword(value):
     'PARAMETER': tokens.Keyword,
     'PARAMETERS': tokens.Keyword,
     'PARAMETER_MODE': tokens.Keyword,
-    'PARAMATER_NAME': tokens.Keyword,
-    'PARAMATER_ORDINAL_POSITION': tokens.Keyword,
+    'PARAMETER_NAME': tokens.Keyword,
+    'PARAMETER_ORDINAL_POSITION': tokens.Keyword,
     'PARAMETER_SPECIFIC_CATALOG': tokens.Keyword,
     'PARAMETER_SPECIFIC_NAME': tokens.Keyword,
-    'PARAMATER_SPECIFIC_SCHEMA': tokens.Keyword,
+    'PARAMETER_SPECIFIC_SCHEMA': tokens.Keyword,
     'PARTIAL': tokens.Keyword,
     'PASCAL': tokens.Keyword,
     'PCTFREE': tokens.Keyword,
@@ -634,14 +633,20 @@ def is_keyword(value):
     'DATE': tokens.Name.Builtin,
     'DEC': tokens.Name.Builtin,
     'DECIMAL': tokens.Name.Builtin,
+    'FILE_TYPE': tokens.Name.Builtin,
     'FLOAT': tokens.Name.Builtin,
     'INT': tokens.Name.Builtin,
     'INT8': tokens.Name.Builtin,
     'INTEGER': tokens.Name.Builtin,
     'INTERVAL': tokens.Name.Builtin,
     'LONG': tokens.Name.Builtin,
+    'NATURALN': tokens.Name.Builtin,
+    'NVARCHAR': tokens.Name.Builtin,
     'NUMBER': tokens.Name.Builtin,
     'NUMERIC': tokens.Name.Builtin,
+    'PLS_INTEGER': tokens.Name.Builtin,
+    'POSITIVE': tokens.Name.Builtin,
+    'POSITIVEN': tokens.Name.Builtin,
     'REAL': tokens.Name.Builtin,
     'ROWID': tokens.Name.Builtin,
     'ROWLABEL': tokens.Name.Builtin,
@@ -649,11 +654,18 @@ def is_keyword(value):
     'SERIAL': tokens.Name.Builtin,
     'SERIAL8': tokens.Name.Builtin,
     'SIGNED': tokens.Name.Builtin,
+    'SIGNTYPE': tokens.Name.Builtin,
+    'SIMPLE_DOUBLE': tokens.Name.Builtin,
+    'SIMPLE_FLOAT': tokens.Name.Builtin,
+    'SIMPLE_INTEGER': tokens.Name.Builtin,
     'SMALLINT': tokens.Name.Builtin,
+    'SYS_REFCURSOR': tokens.Name.Builtin,
     'SYSDATE': tokens.Name,
     'TEXT': tokens.Name.Builtin,
     'TINYINT': tokens.Name.Builtin,
     'UNSIGNED': tokens.Name.Builtin,
+    'UROWID': tokens.Name.Builtin,
+    'UTL_FILE': tokens.Name.Builtin,
     'VARCHAR': tokens.Name.Builtin,
     'VARCHAR2': tokens.Name.Builtin,
     'VARYING': tokens.Name.Builtin,
@@ -727,6 +739,7 @@ def is_keyword(value):
     'DOUBLE': tokens.Keyword,
     'DUMP': tokens.Keyword,
 
+    'ELSIF': tokens.Keyword,
     'EVENTS': tokens.Keyword,
     'EXCEPTIONS': tokens.Keyword,
     'EXPLAIN': tokens.Keyword,
@@ -815,6 +828,8 @@ def is_keyword(value):
 
 # PostgreSQL Syntax
 KEYWORDS_PLPGSQL = {
+    'CONFLICT': tokens.Keyword,
+    'WINDOW': tokens.Keyword,
     'PARTITION': tokens.Keyword,
     'OVER': tokens.Keyword,
     'PERFORM': tokens.Keyword,
@@ -822,6 +837,7 @@ def is_keyword(value):
     'PLPGSQL': tokens.Keyword,
     'INHERIT': tokens.Keyword,
     'INDEXES': tokens.Keyword,
+    'ON_ERROR_STOP': tokens.Keyword,
 
     'BYTEA': tokens.Keyword,
     'BIGSERIAL': tokens.Keyword,
@@ -854,3 +870,92 @@ def is_keyword(value):
     'IN': tokens.Keyword,
     'LOOP': tokens.Keyword,
 }
+
+# Hive Syntax
+KEYWORDS_HQL = {
+    'EXPLODE': tokens.Keyword,
+    'DIRECTORY': tokens.Keyword,
+    'DISTRIBUTE': tokens.Keyword,
+    'INCLUDE': tokens.Keyword,
+    'LOCATE': tokens.Keyword,
+    'OVERWRITE': tokens.Keyword,
+    'POSEXPLODE': tokens.Keyword,
+
+    'ARRAY_CONTAINS': tokens.Keyword,
+    'CMP': tokens.Keyword,
+    'COLLECT_LIST': tokens.Keyword,
+    'CONCAT': tokens.Keyword,
+    'CONDITION': tokens.Keyword,
+    'DATE_ADD': tokens.Keyword,
+    'DATE_SUB': tokens.Keyword,
+    'DECODE': tokens.Keyword,
+    'DBMS_OUTPUT': tokens.Keyword,
+    'ELEMENTS': tokens.Keyword,
+    'EXCHANGE': tokens.Keyword,
+    'EXTENDED': tokens.Keyword,
+    'FLOOR': tokens.Keyword,
+    'FOLLOWING': tokens.Keyword,
+    'FROM_UNIXTIME': tokens.Keyword,
+    'FTP': tokens.Keyword,
+    'HOUR': tokens.Keyword,
+    'INLINE': tokens.Keyword,
+    'INSTR': tokens.Keyword,
+    'LEN': tokens.Keyword,
+    'MAP': tokens.Name.Builtin,
+    'MAXELEMENT': tokens.Keyword,
+    'MAXINDEX': tokens.Keyword,
+    'MAX_PART_DATE': tokens.Keyword,
+    'MAX_PART_INT': tokens.Keyword,
+    'MAX_PART_STRING': tokens.Keyword,
+    'MINELEMENT': tokens.Keyword,
+    'MININDEX': tokens.Keyword,
+    'MIN_PART_DATE': tokens.Keyword,
+    'MIN_PART_INT': tokens.Keyword,
+    'MIN_PART_STRING': tokens.Keyword,
+    'NOW': tokens.Keyword,
+    'NVL': tokens.Keyword,
+    'NVL2': tokens.Keyword,
+    'PARSE_URL_TUPLE': tokens.Keyword,
+    'PART_LOC': tokens.Keyword,
+    'PART_COUNT': tokens.Keyword,
+    'PART_COUNT_BY': tokens.Keyword,
+    'PRINT': tokens.Keyword,
+    'PUT_LINE': tokens.Keyword,
+    'RANGE': tokens.Keyword,
+    'REDUCE': tokens.Keyword,
+    'REGEXP_REPLACE': tokens.Keyword,
+    'RESIGNAL': tokens.Keyword,
+    'RTRIM': tokens.Keyword,
+    'SIGN': tokens.Keyword,
+    'SIGNAL': tokens.Keyword,
+    'SIN': tokens.Keyword,
+    'SPLIT': tokens.Keyword,
+    'SQRT': tokens.Keyword,
+    'STACK': tokens.Keyword,
+    'STR': tokens.Keyword,
+    'STRING': tokens.Name.Builtin,
+    'STRUCT': tokens.Name.Builtin,
+    'SUBSTR': tokens.Keyword,
+    'SUMMARY': tokens.Keyword,
+    'TBLPROPERTIES': tokens.Keyword,
+    'TIMESTAMP': tokens.Name.Builtin,
+    'TIMESTAMP_ISO': tokens.Keyword,
+    'TO_CHAR': tokens.Keyword,
+    'TO_DATE': tokens.Keyword,
+    'TO_TIMESTAMP': tokens.Keyword,
+    'TRUNC': tokens.Keyword,
+    'UNBOUNDED': tokens.Keyword,
+    'UNIQUEJOIN': tokens.Keyword,
+    'UNIX_TIMESTAMP': tokens.Keyword,
+    'UTC_TIMESTAMP': tokens.Keyword,
+    'VIEWS': tokens.Keyword,
+
+    'EXIT': tokens.Keyword,
+    'BREAK': tokens.Keyword,
+    'LEAVE': tokens.Keyword,
+}
+
+
+KEYWORDS_MSACCESS = {
+    'DISTINCTROW': tokens.Keyword,
+}
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index fd007a4c..9d25c9e6 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -1,31 +1,104 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 """SQL Lexer"""
+import re
 
 # This code is based on the SqlLexer in pygments.
 # http://pygments.org/
 # It's separated from the rest of pygments to increase performance
 # and to allow some customizations.
 
-from sqlparse import tokens
-from sqlparse.keywords import SQL_REGEX
-from sqlparse.compat import text_type, file_types
+from io import TextIOBase
+
+from sqlparse import tokens, keywords
 from sqlparse.utils import consume
 
 
-class Lexer(object):
-    """Lexer
-    Empty class. Leaving for backwards-compatibility
-    """
+class Lexer:
+    """The Lexer supports configurable syntax.
+    To add support for additional keywords, use the `add_keywords` method."""
+
+    _default_intance = None
+
+    # Development notes:
+    # - This class is prepared to be able to support additional SQL dialects
+    #   in the future by adding additional functions that take the place of
+    #   the function default_initialization()
+    # - The lexer class uses an explicit singleton behavior with the
+    #   instance-getter method get_default_instance(). This mechanism has
+    #   the advantage that the call signature of the entry-points to the
+    #   sqlparse library are not affected. Also, usage of sqlparse in third
+    #   party code does not need to be adapted. On the other hand, singleton
+    #   behavior is not thread safe, and the current implementation does not
+    #   easily allow for multiple SQL dialects to be parsed in the same
+    #   process. Such behavior can be supported in the future by passing a
+    #   suitably initialized lexer object as an additional parameter to the
+    #   entry-point functions (such as `parse`). Code will need to be written
+    #   to pass down and utilize such an object. The current implementation
+    #   is prepared to support this thread safe approach without the
+    #   default_instance part needing to change interface.
+
+    @classmethod
+    def get_default_instance(cls):
+        """Returns the lexer instance used internally
+        by the sqlparse core functions."""
+        if cls._default_intance is None:
+            cls._default_intance = cls()
+            cls._default_intance.default_initialization()
+        return cls._default_intance
+
+    def default_initialization(self):
+        """Initialize the lexer with default dictionaries.
+        Useful if you need to revert custom syntax settings."""
+        self.clear()
+        self.set_SQL_REGEX(keywords.SQL_REGEX)
+        self.add_keywords(keywords.KEYWORDS_COMMON)
+        self.add_keywords(keywords.KEYWORDS_ORACLE)
+        self.add_keywords(keywords.KEYWORDS_PLPGSQL)
+        self.add_keywords(keywords.KEYWORDS_HQL)
+        self.add_keywords(keywords.KEYWORDS_MSACCESS)
+        self.add_keywords(keywords.KEYWORDS)
+
+    def clear(self):
+        """Clear all syntax configurations.
+        Useful if you want to load a reduced set of syntax configurations.
+        After this call, regexps and keyword dictionaries need to be loaded
+        to make the lexer functional again."""
+        self._SQL_REGEX = []
+        self._keywords = []
+
+    def set_SQL_REGEX(self, SQL_REGEX):
+        """Set the list of regex that will parse the SQL."""
+        FLAGS = re.IGNORECASE | re.UNICODE
+        self._SQL_REGEX = [
+            (re.compile(rx, FLAGS).match, tt)
+            for rx, tt in SQL_REGEX
+        ]
+
+    def add_keywords(self, keywords):
+        """Add keyword dictionaries. Keywords are looked up in the same order
+        that dictionaries were added."""
+        self._keywords.append(keywords)
+
+    def is_keyword(self, value):
+        """Checks for a keyword.
+
+        If the given value is in one of the KEYWORDS_* dictionary
+        it's considered a keyword. Otherwise, tokens.Name is returned.
+        """
+        val = value.upper()
+        for kwdict in self._keywords:
+            if val in kwdict:
+                return kwdict[val], value
+        else:
+            return tokens.Name, value
 
-    @staticmethod
-    def get_tokens(text, encoding=None):
+    def get_tokens(self, text, encoding=None):
         """
         Return an iterable of (tokentype, value) pairs generated from
         `text`. If `unfiltered` is set to `True`, the filtering mechanism
@@ -38,10 +111,10 @@ def get_tokens(text, encoding=None):
 
         ``stack`` is the initial stack (default: ``['root']``)
         """
-        if isinstance(text, file_types):
+        if isinstance(text, TextIOBase):
             text = text.read()
 
-        if isinstance(text, text_type):
+        if isinstance(text, str):
             pass
         elif isinstance(text, bytes):
             if encoding:
@@ -52,20 +125,20 @@ def get_tokens(text, encoding=None):
                 except UnicodeDecodeError:
                     text = text.decode('unicode-escape')
         else:
-            raise TypeError(u"Expected text or file-like object, got {!r}".
+            raise TypeError("Expected text or file-like object, got {!r}".
                             format(type(text)))
 
         iterable = enumerate(text)
         for pos, char in iterable:
-            for rexmatch, action in SQL_REGEX:
+            for rexmatch, action in self._SQL_REGEX:
                 m = rexmatch(text, pos)
 
                 if not m:
                     continue
                 elif isinstance(action, tokens._TokenType):
                     yield action, m.group()
-                elif callable(action):
-                    yield action(m.group())
+                elif action is keywords.PROCESS_AS_KEYWORD:
+                    yield self.is_keyword(m.group())
 
                 consume(iterable, m.end() - pos - 1)
                 break
@@ -79,4 +152,4 @@ def tokenize(sql, encoding=None):
     Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
     of ``(token type, value)`` items.
     """
-    return Lexer().get_tokens(sql, encoding)
+    return Lexer.get_default_instance().get_tokens(sql, encoding)
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index a752555c..1ccfbdbe 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -1,23 +1,42 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
 # the BSD License: https://opensource.org/licenses/BSD-3-Clause
 
 """This module contains classes representing syntactical elements of SQL."""
-from __future__ import print_function
 
 import re
 
 from sqlparse import tokens as T
-from sqlparse.compat import string_types, text_type, unicode_compatible
 from sqlparse.utils import imt, remove_quotes
 
 
-@unicode_compatible
-class Token(object):
+class NameAliasMixin:
+    """Implements get_real_name and get_alias."""
+
+    def get_real_name(self):
+        """Returns the real name (object name) of this identifier."""
+        # a.b
+        dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
+        return self._get_first_name(dot_idx, real_name=True)
+
+    def get_alias(self):
+        """Returns the alias for this identifier or ``None``."""
+
+        # "name AS alias"
+        kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS'))
+        if kw is not None:
+            return self._get_first_name(kw_idx + 1, keywords=True)
+
+        # "name alias" or "complicated column expression alias"
+        _, ws = self.token_next_by(t=T.Whitespace)
+        if len(self.tokens) > 2 and ws is not None:
+            return self._get_first_name(reverse=True)
+
+
+class Token:
     """Base class for all other classes in this module.
 
     It represents a single token and has two instance attributes:
@@ -29,7 +48,7 @@ class Token(object):
                  'is_group', 'is_whitespace')
 
     def __init__(self, ttype, value):
-        value = text_type(value)
+        value = str(value)
         self.value = value
         self.ttype = ttype
         self.parent = None
@@ -49,15 +68,15 @@ def __repr__(self):
         cls = self._get_repr_name()
         value = self._get_repr_value()
 
-        q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
-        return u"<{cls} {q}{value}{q} at 0x{id:2X}>".format(
+        q = '"' if value.startswith("'") and value.endswith("'") else "'"
+        return "<{cls} {q}{value}{q} at 0x{id:2X}>".format(
             id=id(self), **locals())
 
     def _get_repr_name(self):
         return str(self.ttype).split('.')[-1]
 
     def _get_repr_value(self):
-        raw = text_type(self)
+        raw = str(self)
         if len(raw) > 7:
             raw = raw[:6] + '...'
         return re.sub(r'\s+', ' ', raw)
@@ -82,7 +101,7 @@ def match(self, ttype, values, regex=False):
         if not type_matched or values is None:
             return type_matched
 
-        if isinstance(values, string_types):
+        if isinstance(values, str):
             values = (values,)
 
         if regex:
@@ -127,7 +146,6 @@ def has_ancestor(self, other):
         return False
 
 
-@unicode_compatible
 class TokenList(Token):
     """A group of tokens.
 
@@ -139,12 +157,12 @@ class TokenList(Token):
 
     def __init__(self, tokens=None):
         self.tokens = tokens or []
-        [setattr(token, 'parent', self) for token in tokens]
-        super(TokenList, self).__init__(None, text_type(self))
+        [setattr(token, 'parent', self) for token in self.tokens]
+        super().__init__(None, str(self))
         self.is_group = True
 
     def __str__(self):
-        return u''.join(token.value for token in self.flatten())
+        return ''.join(token.value for token in self.flatten())
 
     # weird bug
     # def __len__(self):
@@ -167,14 +185,14 @@ def _pprint_tree(self, max_depth=None, depth=0, f=None, _pre=''):
             value = token._get_repr_value()
 
             last = idx == (token_count - 1)
-            pre = u'`- ' if last else u'|- '
+            pre = '`- ' if last else '|- '
 
-            q = u'"' if value.startswith("'") and value.endswith("'") else u"'"
-            print(u"{_pre}{pre}{idx} {cls} {q}{value}{q}"
+            q = '"' if value.startswith("'") and value.endswith("'") else "'"
+            print("{_pre}{pre}{idx} {cls} {q}{value}{q}"
                   .format(**locals()), file=f)
 
             if token.is_group and (max_depth is None or depth < max_depth):
-                parent_pre = u'   ' if last else u'|  '
+                parent_pre = '   ' if last else '|  '
                 token._pprint_tree(max_depth, depth + 1, f, _pre + parent_pre)
 
     def get_token_at_offset(self, offset):
@@ -193,8 +211,7 @@ def flatten(self):
         """
         for token in self.tokens:
             if token.is_group:
-                for item in token.flatten():
-                    yield item
+                yield from token.flatten()
             else:
                 yield token
 
@@ -217,16 +234,16 @@ def _token_matching(self, funcs, start=0, end=None, reverse=False):
 
         if reverse:
             assert end is None
-            for idx in range(start - 2, -1, -1):
-                token = self.tokens[idx]
-                for func in funcs:
-                    if func(token):
-                        return idx, token
+            indexes = range(start - 2, -1, -1)
         else:
-            for idx, token in enumerate(self.tokens[start:end], start=start):
-                for func in funcs:
-                    if func(token):
-                        return idx, token
+            if end is None:
+                end = len(self.tokens)
+            indexes = range(start, end)
+        for idx in indexes:
+            token = self.tokens[idx]
+            for func in funcs:
+                if func(token):
+                    return idx, token
         return None, None
 
     def token_first(self, skip_ws=True, skip_cm=False):
@@ -239,15 +256,14 @@ def token_first(self, skip_ws=True, skip_cm=False):
         ignored too.
         """
         # this on is inconsistent, using Comment instead of T.Comment...
-        funcs = lambda tk: not ((skip_ws and tk.is_whitespace)
-                                or (skip_cm and imt(tk,
-                                                    t=T.Comment, i=Comment)))
-        return self._token_matching(funcs)[1]
+        def matcher(tk):
+            return not ((skip_ws and tk.is_whitespace)
+                        or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+        return self._token_matching(matcher)[1]
 
     def token_next_by(self, i=None, m=None, t=None, idx=-1, end=None):
-        funcs = lambda tk: imt(tk, i, m, t)
         idx += 1
-        return self._token_matching(funcs, idx, end)
+        return self._token_matching(lambda tk: imt(tk, i, m, t), idx, end)
 
     def token_not_matching(self, funcs, idx):
         funcs = (funcs,) if not isinstance(funcs, (list, tuple)) else funcs
@@ -277,10 +293,11 @@ def token_next(self, idx, skip_ws=True, skip_cm=False, _reverse=False):
         if idx is None:
             return None, None
         idx += 1  # alot of code usage current pre-compensates for this
-        funcs = lambda tk: not ((skip_ws and tk.is_whitespace)
-                                or (skip_cm and imt(tk,
-                                                    t=T.Comment, i=Comment)))
-        return self._token_matching(funcs, idx, reverse=_reverse)
+
+        def matcher(tk):
+            return not ((skip_ws and tk.is_whitespace)
+                        or (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+        return self._token_matching(matcher, idx, reverse=_reverse)
 
     def token_index(self, token, start=0):
         """Return list index of token."""
@@ -305,7 +322,7 @@ def group_tokens(self, grp_cls, start, end, include_end=True,
             grp = start
             grp.tokens.extend(subtokens)
             del self.tokens[start_idx + 1:end_idx]
-            grp.value = text_type(start)
+            grp.value = str(start)
         else:
             subtokens = self.tokens[start_idx:end_idx]
             grp = grp_cls(subtokens)
@@ -341,16 +358,7 @@ def has_alias(self):
 
     def get_alias(self):
         """Returns the alias for this identifier or ``None``."""
-
-        # "name AS alias"
-        kw_idx, kw = self.token_next_by(m=(T.Keyword, 'AS'))
-        if kw is not None:
-            return self._get_first_name(kw_idx + 1, keywords=True)
-
-        # "name alias" or "complicated column expression alias"
-        _, ws = self.token_next_by(t=T.Whitespace)
-        if len(self.tokens) > 2 and ws is not None:
-            return self._get_first_name(reverse=True)
+        return None
 
     def get_name(self):
         """Returns the name of this identifier.
@@ -363,9 +371,7 @@ def get_name(self):
 
     def get_real_name(self):
         """Returns the real name (object name) of this identifier."""
-        # a.b
-        dot_idx, _ = self.token_next_by(m=(T.Punctuation, '.'))
-        return self._get_first_name(dot_idx, real_name=True)
+        return None
 
     def get_parent_name(self):
         """Return name of the parent object if any.
@@ -407,33 +413,34 @@ def get_type(self):
         Whitespaces and comments at the beginning of the statement
         are ignored.
         """
-        first_token = self.token_first(skip_cm=True)
-        if first_token is None:
+        token = self.token_first(skip_cm=True)
+        if token is None:
             # An "empty" statement that either has not tokens at all
             # or only whitespace tokens.
             return 'UNKNOWN'
 
-        elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL):
-            return first_token.normalized
+        elif token.ttype in (T.Keyword.DML, T.Keyword.DDL):
+            return token.normalized
 
-        elif first_token.ttype == T.Keyword.CTE:
+        elif token.ttype == T.Keyword.CTE:
             # The WITH keyword should be followed by either an Identifier or
             # an IdentifierList containing the CTE definitions;  the actual
             # DML keyword (e.g. SELECT, INSERT) will follow next.
-            fidx = self.token_index(first_token)
-            tidx, token = self.token_next(fidx, skip_ws=True)
-            if isinstance(token, (Identifier, IdentifierList)):
-                _, dml_keyword = self.token_next(tidx, skip_ws=True)
+            tidx = self.token_index(token)
+            while tidx is not None:
+                tidx, token = self.token_next(tidx, skip_ws=True)
+                if isinstance(token, (Identifier, IdentifierList)):
+                    tidx, token = self.token_next(tidx, skip_ws=True)
 
-                if dml_keyword is not None \
-                        and dml_keyword.ttype == T.Keyword.DML:
-                    return dml_keyword.normalized
+                    if token is not None \
+                            and token.ttype == T.Keyword.DML:
+                        return token.normalized
 
         # Hmm, probably invalid syntax, so return unknown.
         return 'UNKNOWN'
 
 
-class Identifier(TokenList):
+class Identifier(NameAliasMixin, TokenList):
     """Represents an identifier.
 
     Identifiers may have aliases or typecasts.
@@ -477,6 +484,13 @@ def get_identifiers(self):
                 yield token
 
 
+class TypedLiteral(TokenList):
+    """A typed literal, such as "date '2001-09-28'" or "interval '2 hours'"."""
+    M_OPEN = [(T.Name.Builtin, None), (T.Keyword, "TIMESTAMP")]
+    M_CLOSE = T.String.Single, None
+    M_EXTEND = T.Keyword, ("DAY", "HOUR", "MINUTE", "MONTH", "SECOND", "YEAR")
+
+
 class Parenthesis(TokenList):
     """Tokens between parenthesis."""
     M_OPEN = T.Punctuation, '('
@@ -599,7 +613,7 @@ def get_cases(self, skip_ws=False):
         return ret
 
 
-class Function(TokenList):
+class Function(NameAliasMixin, TokenList):
     """A function or procedure call."""
 
     def get_parameters(self):
diff --git a/sqlparse/tokens.py b/sqlparse/tokens.py
index eefc0b49..d92bbdcf 100644
--- a/sqlparse/tokens.py
+++ b/sqlparse/tokens.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 3283274d..512f0385 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 #
-# Copyright (C) 2009-2018 the sqlparse authors and contributors
+# Copyright (C) 2009-2020 the sqlparse authors and contributors
 # <see AUTHORS file>
 #
 # This module is part of python-sqlparse and is released under
@@ -10,7 +9,6 @@
 import re
 from collections import deque
 from contextlib import contextmanager
-from sqlparse.compat import text_type
 
 # This regular expression replaces the home-cooked parser that was here before.
 # It is much faster, but requires an extra post-processing step to get the
@@ -40,7 +38,7 @@ def split_unquoted_newlines(stmt):
 
     Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
     character is inside of a string."""
-    text = text_type(stmt)
+    text = str(stmt)
     lines = SPLIT_REGEX.split(text)
     outputlines = ['']
     for line in lines:
@@ -57,7 +55,7 @@ def remove_quotes(val):
     """Helper that removes surrounding quotes from strings."""
     if val is None:
         return
-    if val[0] in ('"', "'") and val[0] == val[-1]:
+    if val[0] in ('"', "'", '`') and val[0] == val[-1]:
         val = val[1:-1]
     return val
 
diff --git a/tests/conftest.py b/tests/conftest.py
index f2473a43..939c481d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """Helpers for testing."""
 
 import io
@@ -35,7 +33,7 @@ def make_load_file(filename, encoding='utf-8'):
         # https://stackoverflow.com/questions/18011902/py-test-pass-a-parameter-to-a-fixture-function/33879151#33879151
         # Syntax is noisy and requires specific variable names
         # And seems to be limited to only 1 argument.
-        with io.open(filepath(filename), encoding=encoding) as f:
+        with open(filepath(filename), encoding=encoding) as f:
             return f.read().strip()
 
     return make_load_file
@@ -44,6 +42,6 @@ def make_load_file(filename, encoding='utf-8'):
 @pytest.fixture()
 def get_stream(filepath):
     def make_stream(filename, encoding='utf-8'):
-        return io.open(filepath(filename), encoding=encoding)
+        return open(filepath(filename), encoding=encoding)
 
     return make_stream
diff --git a/tests/files/casewhen_procedure.sql b/tests/files/casewhen_procedure.sql
new file mode 100644
index 00000000..e590d49b
--- /dev/null
+++ b/tests/files/casewhen_procedure.sql
@@ -0,0 +1,8 @@
+create procedure procName()
+begin
+  select case when column = 'value' then column else 0 end;
+end;
+create procedure procName()
+begin
+  select 1;
+end;
diff --git a/tests/files/mysql_handler.sql b/tests/files/mysql_handler.sql
new file mode 100644
index 00000000..702374ef
--- /dev/null
+++ b/tests/files/mysql_handler.sql
@@ -0,0 +1,10 @@
+create procedure proc1()
+begin
+  declare handler for foo begin end;
+  select 1;
+end;
+
+create procedure proc2()
+begin
+  select 1;
+end;
diff --git a/tests/files/slashcomment.sql b/tests/files/slashcomment.sql
deleted file mode 100644
index 90e3089d..00000000
--- a/tests/files/slashcomment.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-select * from user;
-//select * from host;
-select * from user;
-select * // foo;
-from foo;
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5f1ea0e5..b681a60b 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import subprocess
 import sys
 
@@ -71,75 +69,54 @@ def test_stdout(filepath, load_file, capsys):
 
 def test_script():
     # Call with the --help option as a basic sanity check.
-    cmd = "{0:s} -m sqlparse.cli --help".format(sys.executable)
+    cmd = "{:s} -m sqlparse.cli --help".format(sys.executable)
     assert subprocess.call(cmd.split()) == 0
 
 
-def test_encoding_utf8_stdout(filepath, load_file, capfd):
-    path = filepath('encoding_utf8.sql')
-    expected = load_file('encoding_utf8.sql', 'utf-8')
-    sys.stdout.encoding = 'utf-8'
-    sqlparse.cli.main([path])
-    out, _ = capfd.readouterr()
-    assert out == expected
-
-
-def test_encoding_utf8_output_file(filepath, load_file, tmpdir):
-    in_path = filepath('encoding_utf8.sql')
-    expected = load_file('encoding_utf8.sql', 'utf-8')
-    out_path = tmpdir.dirname + '/encoding_utf8.out.sql'
-    sqlparse.cli.main([in_path, '-o', out_path])
-    out = load_file(out_path, 'utf-8')
-    assert out == expected
-
-
-def test_encoding_gbk_stdout(filepath, load_file, capfd):
-    path = filepath('encoding_gbk.sql')
-    expected = load_file('encoding_gbk.sql', 'gbk')
-    sys.stdout.encoding = 'gbk'
-    sqlparse.cli.main([path, '--encoding', 'gbk'])
+@pytest.mark.parametrize('fpath, encoding', (
+    ('encoding_utf8.sql', 'utf-8'),
+    ('encoding_gbk.sql', 'gbk'),
+))
+def test_encoding_stdout(fpath, encoding, filepath, load_file, capfd):
+    path = filepath(fpath)
+    expected = load_file(fpath, encoding)
+    sqlparse.cli.main([path, '--encoding', encoding])
     out, _ = capfd.readouterr()
     assert out == expected
 
 
-def test_encoding_gbk_output_file(filepath, load_file, tmpdir):
-    in_path = filepath('encoding_gbk.sql')
-    expected = load_file('encoding_gbk.sql', 'gbk')
-    out_path = tmpdir.dirname + '/encoding_gbk.out.sql'
-    sqlparse.cli.main([in_path, '--encoding', 'gbk', '-o', out_path])
-    out = load_file(out_path, 'gbk')
+@pytest.mark.parametrize('fpath, encoding', (
+    ('encoding_utf8.sql', 'utf-8'),
+    ('encoding_gbk.sql', 'gbk'),
+))
+def test_encoding_output_file(fpath, encoding, filepath, load_file, tmpdir):
+    in_path = filepath(fpath)
+    expected = load_file(fpath, encoding)
+    out_path = tmpdir.dirname + '/encoding_out.sql'
+    sqlparse.cli.main([in_path, '--encoding', encoding, '-o', out_path])
+    out = load_file(out_path, encoding)
     assert out == expected
 
 
-def test_encoding_stdin_utf8(filepath, load_file, capfd):
-    path = filepath('encoding_utf8.sql')
-    expected = load_file('encoding_utf8.sql', 'utf-8')
+@pytest.mark.parametrize('fpath, encoding', (
+    ('encoding_utf8.sql', 'utf-8'),
+    ('encoding_gbk.sql', 'gbk'),
+))
+def test_encoding_stdin(fpath, encoding, filepath, load_file, capfd):
+    path = filepath(fpath)
+    expected = load_file(fpath, encoding)
     old_stdin = sys.stdin
-    with open(path, 'r') as f:
+    with open(path) as f:
         sys.stdin = f
-        sys.stdout.encoding = 'utf-8'
-        sqlparse.cli.main(['-'])
+        sqlparse.cli.main(['-', '--encoding', encoding])
     sys.stdin = old_stdin
     out, _ = capfd.readouterr()
     assert out == expected
 
 
-def test_encoding_stdin_gbk(filepath, load_file, capfd):
-    path = filepath('encoding_gbk.sql')
-    expected = load_file('encoding_gbk.sql', 'gbk')
-    old_stdin = sys.stdin
-    with open(path, 'r') as stream:
-        sys.stdin = stream
-        sys.stdout.encoding = 'gbk'
-        sqlparse.cli.main(['-', '--encoding', 'gbk'])
-        sys.stdin = old_stdin
-    out, _ = capfd.readouterr()
-    assert out == expected
-
-
 def test_encoding(filepath, capsys):
     path = filepath('test_cp1251.sql')
-    expected = u'insert into foo values (1); -- Песня про надежду\n'
+    expected = 'insert into foo values (1); -- Песня про надежду\n'
     sqlparse.cli.main([path, '--encoding=cp1251'])
     out, _ = capsys.readouterr()
     assert out == expected
diff --git a/tests/test_format.py b/tests/test_format.py
index 811e0833..70bb8055 100644
--- a/tests/test_format.py
+++ b/tests/test_format.py
@@ -1,12 +1,10 @@
-# -*- coding: utf-8 -*-
-
 import pytest
 
 import sqlparse
 from sqlparse.exceptions import SQLParseError
 
 
-class TestFormat(object):
+class TestFormat:
     def test_keywordcase(self):
         sql = 'select * from bar; -- select foo\n'
         res = sqlparse.format(sql, keyword_case='upper')
@@ -43,26 +41,26 @@ def test_identifiercase_quotes(self):
     def test_strip_comments_single(self):
         sql = 'select *-- statement starts here\nfrom foo'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select * from foo'
+        assert res == 'select *\nfrom foo'
         sql = 'select * -- statement starts here\nfrom foo'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select * from foo'
+        assert res == 'select *\nfrom foo'
         sql = 'select-- foo\nfrom -- bar\nwhere'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select from where'
+        assert res == 'select\nfrom\nwhere'
         sql = 'select *-- statement starts here\n\nfrom foo'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select * from foo'
+        assert res == 'select *\n\nfrom foo'
         sql = 'select * from foo-- statement starts here\nwhere'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select * from foo where'
+        assert res == 'select * from foo\nwhere'
         sql = 'select a-- statement starts here\nfrom foo'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select a from foo'
+        assert res == 'select a\nfrom foo'
         sql = '--comment\nselect a-- statement starts here\n' \
               'from foo--comment\nf'
         res = sqlparse.format(sql, strip_comments=True)
-        assert res == 'select a from foo f'
+        assert res == 'select a\nfrom foo\nf'
 
     def test_strip_comments_invalid_option(self):
         sql = 'select-- foo\nfrom -- bar\nwhere'
@@ -86,6 +84,23 @@ def test_strip_comments_multi(self):
         res = sqlparse.format(sql, strip_comments=True)
         assert res == 'select (select 2)'
 
+    def test_strip_comments_preserves_linebreak(self):
+        sql = 'select * -- a comment\r\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\rfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\nfrom foo'
+        sql = 'select * -- a comment\r\n\r\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\n\nfrom foo'
+        sql = 'select * -- a comment\n\nfrom foo'
+        res = sqlparse.format(sql, strip_comments=True)
+        assert res == 'select *\n\nfrom foo'
+
     def test_strip_ws(self):
         f = lambda sql: sqlparse.format(sql, strip_whitespace=True)
         s = 'select\n* from      foo\n\twhere  ( 1 = 2 )\n'
@@ -123,7 +138,7 @@ def test_notransform_of_quoted_crlf(self):
                 == "SELECT some_column LIKE 'value\\\\\\'\r' WHERE id = 1\n")
 
 
-class TestFormatReindentAligned(object):
+class TestFormatReindentAligned:
     @staticmethod
     def formatter(sql):
         return sqlparse.format(sql, reindent_aligned=True)
@@ -294,7 +309,7 @@ def test_window_functions(self):
             '  from table'])
 
 
-class TestSpacesAroundOperators(object):
+class TestSpacesAroundOperators:
     @staticmethod
     def formatter(sql):
         return sqlparse.format(sql, use_space_around_operators=True)
@@ -321,7 +336,7 @@ def test_wildcard_vs_mult(self):
         assert self.formatter(sql) == 'select a * b - c from table'
 
 
-class TestFormatReindent(object):
+class TestFormatReindent:
     def test_option(self):
         with pytest.raises(SQLParseError):
             sqlparse.format('foo', reindent=2)
@@ -598,7 +613,7 @@ def test_insert_values(self):
             '     , (5, 6)'])
 
 
-class TestOutputFormat(object):
+class TestOutputFormat:
     def test_python(self):
         sql = 'select * from foo;'
         f = lambda sql: sqlparse.format(sql, output_format='python')
@@ -663,7 +678,7 @@ def test_format_column_ordering():
 
 
 def test_truncate_strings():
-    sql = "update foo set value = '{0}';".format('x' * 1000)
+    sql = "update foo set value = '{}';".format('x' * 1000)
     formatted = sqlparse.format(sql, truncate_strings=10)
     assert formatted == "update foo set value = 'xxxxxxxxxx[...]';"
     formatted = sqlparse.format(sql, truncate_strings=3, truncate_char='YYY')
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
index 63a01f2d..03d16c5d 100644
--- a/tests/test_grouping.py
+++ b/tests/test_grouping.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import pytest
 
 import sqlparse
@@ -33,6 +31,40 @@ def test_grouping_assignment(s):
     assert isinstance(parsed.tokens[0], sql.Assignment)
 
 
+@pytest.mark.parametrize('s', ["x > DATE '2020-01-01'", "x > TIMESTAMP '2020-01-01 00:00:00'"])
+def test_grouping_typed_literal(s):
+    parsed = sqlparse.parse(s)[0]
+    assert isinstance(parsed[0][4], sql.TypedLiteral)
+
+
+@pytest.mark.parametrize('s, a, b', [
+    ('select a from b where c < d + e', sql.Identifier, sql.Identifier),
+    ('select a from b where c < d + interval \'1 day\'', sql.Identifier, sql.TypedLiteral),
+    ('select a from b where c < d + interval \'6\' month', sql.Identifier, sql.TypedLiteral),
+    ('select a from b where c < current_timestamp - interval \'1 day\'', sql.Token, sql.TypedLiteral),
+])
+def test_compare_expr(s, a, b):
+    parsed = sqlparse.parse(s)[0]
+    assert str(parsed) == s
+    assert isinstance(parsed.tokens[2], sql.Identifier)
+    assert isinstance(parsed.tokens[6], sql.Identifier)
+    assert isinstance(parsed.tokens[8], sql.Where)
+    assert len(parsed.tokens) == 9
+    where = parsed.tokens[8]
+    assert isinstance(where.tokens[2], sql.Comparison)
+    assert len(where.tokens) == 3
+    comparison = where.tokens[2]
+    assert isinstance(comparison.tokens[0], sql.Identifier)
+    assert comparison.tokens[2].ttype is T.Operator.Comparison
+    assert isinstance(comparison.tokens[4], sql.Operation)
+    assert len(comparison.tokens) == 5
+    operation = comparison.tokens[4]
+    assert isinstance(operation.tokens[0], a)
+    assert operation.tokens[2].ttype is T.Operator
+    assert isinstance(operation.tokens[4], b)
+    assert len(operation.tokens) == 5
+
+
 def test_grouping_identifiers():
     s = 'select foo.bar from "myscheme"."table" where fail. order'
     parsed = sqlparse.parse(s)[0]
@@ -127,6 +159,14 @@ def test_grouping_identifier_invalid_in_middle():
     assert p[3].ttype == T.Whitespace
     assert str(p[2]) == 'foo.'
 
+@pytest.mark.parametrize('s', ['foo as (select *)', 'foo as(select *)'])
+def test_grouping_identifer_as(s):
+    # issue507
+    p = sqlparse.parse(s)[0]
+    assert isinstance(p.tokens[0], sql.Identifier)
+    token = p.tokens[0].tokens[2]
+    assert token.ttype == T.Keyword
+    assert token.normalized == 'AS'
 
 def test_grouping_identifier_as_invalid():
     # issue8
@@ -284,6 +324,11 @@ def test_grouping_alias_case():
     assert p.tokens[0].get_alias() == 'foo'
 
 
+def test_grouping_alias_ctas():
+    p = sqlparse.parse('CREATE TABLE tbl1 AS SELECT coalesce(t1.col1, 0) AS col1 FROM t1')[0]
+    assert p.tokens[10].get_alias() == 'col1'
+    assert isinstance(p.tokens[10].tokens[0], sql.Function)
+
 def test_grouping_subquery_no_parens():
     # Not totally sure if this is the right approach...
     # When a THEN clause contains a subquery w/o parenthesis around it *and*
@@ -294,9 +339,10 @@ def test_grouping_subquery_no_parens():
     assert isinstance(p.tokens[0], sql.Case)
 
 
-def test_grouping_alias_returns_none():
-    # see issue185
-    p = sqlparse.parse('foo.bar')[0]
+@pytest.mark.parametrize('s', ['foo.bar', 'x, y', 'x > y', 'x / y'])
+def test_grouping_alias_returns_none(s):
+    # see issue185 and issue445
+    p = sqlparse.parse(s)[0]
     assert len(p.tokens) == 1
     assert p.tokens[0].get_alias() is None
 
@@ -348,10 +394,6 @@ def f(sql):
     assert f(' update foo').get_type() == 'UPDATE'
     assert f('\nupdate foo').get_type() == 'UPDATE'
     assert f('foo').get_type() == 'UNKNOWN'
-    # Statements that have a whitespace after the closing semicolon
-    # are parsed as two statements where later only consists of the
-    # trailing whitespace.
-    assert f('\n').get_type() == 'UNKNOWN'
 
 
 def test_identifier_with_operators():
@@ -435,15 +477,54 @@ def test_comparison_with_parenthesis():
     assert comp.right.ttype is T.Number.Integer
 
 
-def test_comparison_with_strings():
+@pytest.mark.parametrize('operator', (
+    '=', '!=', '>', '<', '<=', '>=', '~', '~~', '!~~',
+    'LIKE', 'NOT LIKE', 'ILIKE', 'NOT ILIKE',
+))
+def test_comparison_with_strings(operator):
     # issue148
-    p = sqlparse.parse("foo = 'bar'")[0]
+    p = sqlparse.parse("foo {} 'bar'".format(operator))[0]
     assert len(p.tokens) == 1
     assert isinstance(p.tokens[0], sql.Comparison)
     assert p.tokens[0].right.value == "'bar'"
     assert p.tokens[0].right.ttype == T.String.Single
 
 
+def test_like_and_ilike_comparison():
+    def validate_where_clause(where_clause, expected_tokens):
+        assert len(where_clause.tokens) == len(expected_tokens)
+        for where_token, expected_token in zip(where_clause, expected_tokens):
+            expected_ttype, expected_value = expected_token
+            if where_token.ttype is not None:
+                assert where_token.match(expected_ttype, expected_value, regex=True)
+            else:
+                # Certain tokens, such as comparison tokens, do not define a ttype that can be
+                # matched against. For these tokens, we ensure that the token instance is of
+                # the expected type and has a value conforming to specified regular expression
+                import re
+                assert (isinstance(where_token, expected_ttype)
+                        and re.match(expected_value, where_token.value))
+
+    [p1] = sqlparse.parse("select * from mytable where mytable.mycolumn LIKE 'expr%' limit 5;")
+    [p1_where] = [token for token in p1 if isinstance(token, sql.Where)]
+    validate_where_clause(p1_where, [
+        (T.Keyword, "where"),
+        (T.Whitespace, None),
+        (sql.Comparison, r"mytable.mycolumn LIKE.*"),
+        (T.Whitespace, None),
+    ])
+
+    [p2] = sqlparse.parse(
+        "select * from mytable where mycolumn NOT ILIKE '-expr' group by othercolumn;")
+    [p2_where] = [token for token in p2 if isinstance(token, sql.Where)]
+    validate_where_clause(p2_where, [
+        (T.Keyword, "where"),
+        (T.Whitespace, None),
+        (sql.Comparison, r"mycolumn NOT ILIKE.*"),
+        (T.Whitespace, None),
+    ])
+
+
 def test_comparison_with_functions():
     # issue230
     p = sqlparse.parse('foo = DATE(bar.baz)')[0]
@@ -468,9 +549,20 @@ def test_comparison_with_functions():
     assert p.tokens[0].right.value == 'bar.baz'
 
 
+def test_comparison_with_typed_literal():
+    p = sqlparse.parse("foo = DATE 'bar.baz'")[0]
+    assert len(p.tokens) == 1
+    comp = p.tokens[0]
+    assert isinstance(comp, sql.Comparison)
+    assert len(comp.tokens) == 5
+    assert comp.left.value == 'foo'
+    assert isinstance(comp.right, sql.TypedLiteral)
+    assert comp.right.value == "DATE 'bar.baz'"
+
+
 @pytest.mark.parametrize('start', ['FOR', 'FOREACH'])
 def test_forloops(start):
-    p = sqlparse.parse('{0} foo in bar LOOP foobar END LOOP'.format(start))[0]
+    p = sqlparse.parse('{} foo in bar LOOP foobar END LOOP'.format(start))[0]
     assert (len(p.tokens)) == 1
     assert isinstance(p.tokens[0], sql.For)
 
@@ -550,3 +642,15 @@ def test_aliased_literal_without_as():
     p = sqlparse.parse('1 foo')[0].tokens
     assert len(p) == 1
     assert p[0].get_alias() == 'foo'
+
+
+def test_grouping_as_cte():
+    p = sqlparse.parse('foo AS WITH apple AS 1, banana AS 2')[0].tokens
+    assert len(p) > 4
+    assert p[0].get_alias() is None
+    assert p[2].value == 'AS'
+    assert p[4].value == 'WITH'
+
+def test_grouping_create_table():
+    p = sqlparse.parse("create table db.tbl (a string)")[0].tokens
+    assert p[4].value == "db.tbl"
diff --git a/tests/test_keywords.py b/tests/test_keywords.py
index c197f367..b26e9b45 100644
--- a/tests/test_keywords.py
+++ b/tests/test_keywords.py
@@ -1,8 +1,7 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from sqlparse import tokens
-from sqlparse.keywords import SQL_REGEX
+from sqlparse.lexer import Lexer
 
 
 class TestSQLREGEX:
@@ -10,5 +9,5 @@ class TestSQLREGEX:
                                         '1.', '-1.',
                                         '.1', '-.1'])
     def test_float_numbers(self, number):
-        ttype = next(tt for action, tt in SQL_REGEX if action(number))
+        ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number))
         assert tokens.Number.Float == ttype
diff --git a/tests/test_parse.py b/tests/test_parse.py
index f2a2bda6..5feef5a7 100644
--- a/tests/test_parse.py
+++ b/tests/test_parse.py
@@ -1,12 +1,11 @@
-# -*- coding: utf-8 -*-
-
 """Tests sqlparse.parse()."""
+from io import StringIO
 
 import pytest
 
 import sqlparse
-from sqlparse import sql, tokens as T
-from sqlparse.compat import StringIO, text_type
+from sqlparse import sql, tokens as T, keywords
+from sqlparse.lexer import Lexer
 
 
 def test_parse_tokenize():
@@ -102,6 +101,12 @@ def test_parse_square_brackets_notation_isnt_too_greedy():
     assert t[0].tokens[-1].get_real_name() == '[bar]'
 
 
+def test_parse_square_brackets_notation_isnt_too_greedy2():
+    # see issue583
+    t = sqlparse.parse('[(foo[i])]')[0].tokens
+    assert isinstance(t[0], sql.SquareBrackets)  # not Identifier!
+
+
 def test_parse_keyword_like_identifier():
     # see issue47
     t = sqlparse.parse('foo.key')[0].tokens
@@ -128,6 +133,12 @@ def test_parse_nested_function():
     assert type(t[0]) is sql.Function
 
 
+def test_parse_div_operator():
+    p = sqlparse.parse('col1 DIV 5 AS div_col1')[0].tokens
+    assert p[0].tokens[0].tokens[2].ttype is T.Operator
+    assert p[0].get_alias() == 'div_col1'
+
+
 def test_quoted_identifier():
     t = sqlparse.parse('select x.y as "z" from foo')[0].tokens
     assert isinstance(t[2], sql.Identifier)
@@ -138,6 +149,7 @@ def test_quoted_identifier():
 @pytest.mark.parametrize('name', [
     'foo', '_foo',  # issue175
     '1_data',  # valid MySQL table name, see issue337
+    '業者名稱',  # valid at least for SQLite3, see issue641
 ])
 def test_valid_identifier_names(name):
     t = sqlparse.parse(name)[0].tokens
@@ -184,11 +196,16 @@ def test_placeholder(ph):
     assert p[0].ttype is T.Name.Placeholder
 
 
-@pytest.mark.parametrize('num', ['6.67428E-8', '1.988e33', '1e-12'])
-def test_scientific_numbers(num):
+@pytest.mark.parametrize('num, expected', [
+    ('6.67428E-8', T.Number.Float),
+    ('1.988e33', T.Number.Float),
+    ('1e-12', T.Number.Float),
+    ('e1', None),
+])
+def test_scientific_numbers(num, expected):
     p = sqlparse.parse(num)[0].tokens
     assert len(p) == 1
-    assert p[0].ttype is T.Number.Float
+    assert p[0].ttype is expected
 
 
 def test_single_quotes_are_strings():
@@ -332,7 +349,8 @@ def test_pprint():
         "|  |  `- 0 Name 'd0'",
         "|  |- 10 Punctuation ','",
         "|  |- 11 Whitespace ' '",
-        "|  `- 12 Float 'e0'",
+        "|  `- 12 Identifier 'e0'",
+        "|     `- 0 Name 'e0'",
         "|- 3 Whitespace ' '",
         "|- 4 Keyword 'from'",
         "|- 5 Whitespace ' '",
@@ -409,42 +427,42 @@ def test_dbldollar_as_literal(sql, is_literal):
 
 
 def test_non_ascii():
-    _test_non_ascii = u"insert into test (id, name) values (1, 'тест');"
+    _test_non_ascii = "insert into test (id, name) values (1, 'тест');"
 
     s = _test_non_ascii
     stmts = sqlparse.parse(s)
     assert len(stmts) == 1
     statement = stmts[0]
-    assert text_type(statement) == s
+    assert str(statement) == s
     assert statement._pprint_tree() is None
 
     s = _test_non_ascii.encode('utf-8')
     stmts = sqlparse.parse(s, 'utf-8')
     assert len(stmts) == 1
     statement = stmts[0]
-    assert text_type(statement) == _test_non_ascii
+    assert str(statement) == _test_non_ascii
     assert statement._pprint_tree() is None
 
 
 def test_get_real_name():
     # issue 369
-    s = u"update a t set t.b=1"
+    s = "update a t set t.b=1"
     stmts = sqlparse.parse(s)
     assert len(stmts) == 1
-    assert 'a' == stmts[0].get_real_name()
-    assert 't' == stmts[0].get_alias()
+    assert 'a' == stmts[0].tokens[2].get_real_name()
+    assert 't' == stmts[0].tokens[2].get_alias()
 
 
 def test_from_subquery():
     # issue 446
-    s = u'from(select 1)'
+    s = 'from(select 1)'
     stmts = sqlparse.parse(s)
     assert len(stmts) == 1
     assert len(stmts[0].tokens) == 2
     assert stmts[0].tokens[0].value == 'from'
     assert stmts[0].tokens[0].ttype == T.Keyword
 
-    s = u'from (select 1)'
+    s = 'from (select 1)'
     stmts = sqlparse.parse(s)
     assert len(stmts) == 1
     assert len(stmts[0].tokens) == 3
@@ -472,3 +490,79 @@ def test_parenthesis():
                                                     T.Newline,
                                                     T.Newline,
                                                     T.Punctuation]
+
+
+def test_configurable_keywords():
+    sql = """select * from foo BACON SPAM EGGS;"""
+    tokens = sqlparse.parse(sql)[0]
+
+    assert list(
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
+    ) == [
+        (sqlparse.tokens.Keyword.DML, "select"),
+        (sqlparse.tokens.Wildcard, "*"),
+        (sqlparse.tokens.Keyword, "from"),
+        (None, "foo BACON"),
+        (None, "SPAM EGGS"),
+        (sqlparse.tokens.Punctuation, ";"),
+    ]
+
+    Lexer.get_default_instance().add_keywords(
+        {
+            "BACON": sqlparse.tokens.Name.Builtin,
+            "SPAM": sqlparse.tokens.Keyword,
+            "EGGS": sqlparse.tokens.Keyword,
+        }
+    )
+
+    tokens = sqlparse.parse(sql)[0]
+
+    # reset the syntax for later tests.
+    Lexer.get_default_instance().default_initialization()
+
+    assert list(
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
+    ) == [
+        (sqlparse.tokens.Keyword.DML, "select"),
+        (sqlparse.tokens.Wildcard, "*"),
+        (sqlparse.tokens.Keyword, "from"),
+        (None, "foo"),
+        (sqlparse.tokens.Name.Builtin, "BACON"),
+        (sqlparse.tokens.Keyword, "SPAM"),
+        (sqlparse.tokens.Keyword, "EGGS"),
+        (sqlparse.tokens.Punctuation, ";"),
+    ]
+
+
+def test_configurable_regex():
+    lex = Lexer.get_default_instance()
+    lex.clear()
+
+    my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)
+
+    lex.set_SQL_REGEX(
+        keywords.SQL_REGEX[:38]
+        + [my_regex]
+        + keywords.SQL_REGEX[38:]
+    )
+    lex.add_keywords(keywords.KEYWORDS_COMMON)
+    lex.add_keywords(keywords.KEYWORDS_ORACLE)
+    lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
+    lex.add_keywords(keywords.KEYWORDS_HQL)
+    lex.add_keywords(keywords.KEYWORDS_MSACCESS)
+    lex.add_keywords(keywords.KEYWORDS)
+
+    tokens = sqlparse.parse("select * from foo zorder by bar;")[0]
+
+    # reset the syntax for later tests.
+    Lexer.get_default_instance().default_initialization()
+
+    assert list(
+        (t.ttype, t.value)
+        for t in tokens
+        if t.ttype not in sqlparse.tokens.Whitespace
+    )[4] == (sqlparse.tokens.Keyword, "zorder by")
diff --git a/tests/test_regressions.py b/tests/test_regressions.py
index 1d52ea78..bc8b7dd3 100644
--- a/tests/test_regressions.py
+++ b/tests/test_regressions.py
@@ -1,10 +1,7 @@
-# -*- coding: utf-8 -*-
-
 import pytest
 
 import sqlparse
 from sqlparse import sql, tokens as T
-from sqlparse.compat import PY2
 
 
 def test_issue9():
@@ -20,9 +17,9 @@ def test_issue9():
 
 
 def test_issue13():
-    parsed = sqlparse.parse(("select 'one';\n"
-                             "select 'two\\'';\n"
-                             "select 'three';"))
+    parsed = sqlparse.parse("select 'one';\n"
+                            "select 'two\\'';\n"
+                            "select 'three';")
     assert len(parsed) == 3
     assert str(parsed[1]).strip() == "select 'two\\'';"
 
@@ -73,8 +70,8 @@ def test_issue39():
 
 def test_issue40():
     # make sure identifier lists in subselects are grouped
-    p = sqlparse.parse(('SELECT id, name FROM '
-                        '(SELECT id, name FROM bar) as foo'))[0]
+    p = sqlparse.parse('SELECT id, name FROM '
+                       '(SELECT id, name FROM bar) as foo')[0]
     assert len(p.tokens) == 7
     assert p.tokens[2].__class__ == sql.IdentifierList
     assert p.tokens[-1].__class__ == sql.Identifier
@@ -149,7 +146,7 @@ def test_issue83():
 def test_comment_encoding_when_reindent():
     # There was an UnicodeEncodeError in the reindent filter that
     # casted every comment followed by a keyword to str.
-    sql = u'select foo -- Comment containing Ümläuts\nfrom bar'
+    sql = 'select foo -- Comment containing Ümläuts\nfrom bar'
     formatted = sqlparse.format(sql, reindent=True)
     assert formatted == sql
 
@@ -158,11 +155,9 @@ def test_parse_sql_with_binary():
     # See https://github.com/andialbrecht/sqlparse/pull/88
     # digest = '|ËêplL4¡høN{'
     digest = '\x82|\xcb\x0e\xea\x8aplL4\xa1h\x91\xf8N{'
-    sql = "select * from foo where bar = '{0}'".format(digest)
+    sql = "select * from foo where bar = '{}'".format(digest)
     formatted = sqlparse.format(sql, reindent=True)
-    tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest)
-    if PY2:
-        tformatted = tformatted.decode('unicode-escape')
+    tformatted = "select *\nfrom foo\nwhere bar = '{}'".format(digest)
     assert formatted == tformatted
 
 
@@ -180,7 +175,7 @@ def test_format_accepts_encoding(load_file):
     # issue20
     sql = load_file('test_cp1251.sql', 'cp1251')
     formatted = sqlparse.format(sql, reindent=True, encoding='cp1251')
-    tformatted = u'insert into foo\nvalues (1); -- Песня про надежду'
+    tformatted = 'insert into foo\nvalues (1); -- Песня про надежду'
 
     assert formatted == tformatted
 
@@ -275,7 +270,7 @@ def test_issue186_get_type():
 
 
 def test_issue212_py2unicode():
-    t1 = sql.Token(T.String, u'schöner ')
+    t1 = sql.Token(T.String, 'schöner ')
     t2 = sql.Token(T.String, 'bug')
     token_list = sql.TokenList([t1, t2])
     assert str(token_list) == 'schöner bug'
@@ -337,11 +332,9 @@ def test_issue315_utf8_by_default():
         '\x9b\xb2.'
         '\xec\x82\xac\xeb\x9e\x91\xed\x95\xb4\xec\x9a\x94'
     )
-    sql = "select * from foo where bar = '{0}'".format(digest)
+    sql = "select * from foo where bar = '{}'".format(digest)
     formatted = sqlparse.format(sql, reindent=True)
-    tformatted = "select *\nfrom foo\nwhere bar = '{0}'".format(digest)
-    if PY2:
-        tformatted = tformatted.decode('utf-8')
+    tformatted = "select *\nfrom foo\nwhere bar = '{}'".format(digest)
     assert formatted == tformatted
 
 
@@ -373,3 +366,73 @@ def test_issue469_copy_as_psql_command():
         '\\copy select * from foo',
         keyword_case='upper', identifier_case='capitalize')
     assert formatted == '\\copy SELECT * FROM Foo'
+
+
+@pytest.mark.xfail(reason='Needs to be fixed')
+def test_issue484_comments_and_newlines():
+    formatted = sqlparse.format('\n'.join([
+        'Create table myTable',
+        '(',
+        '  myId TINYINT NOT NULL, --my special comment',
+        '  myName VARCHAR2(100) NOT NULL',
+        ')']),
+        strip_comments=True)
+    assert formatted == ('\n'.join([
+        'Create table myTable',
+        '(',
+        '  myId TINYINT NOT NULL,',
+        '  myName VARCHAR2(100) NOT NULL',
+        ')']))
+
+
+def test_issue485_split_multi():
+    p_sql = '''CREATE OR REPLACE RULE ruled_tab_2rules AS ON INSERT
+TO public.ruled_tab
+DO instead (
+select 1;
+select 2;
+);'''
+    assert len(sqlparse.split(p_sql)) == 1
+
+
+def test_issue489_tzcasts():
+    p = sqlparse.parse('select bar at time zone \'UTC\' as foo')[0]
+    assert p.tokens[-1].has_alias() is True
+    assert p.tokens[-1].get_alias() == 'foo'
+
+
+def test_issue562_tzcasts():
+    # Test that whitespace between 'from' and 'bar' is retained
+    formatted = sqlparse.format(
+        'SELECT f(HOUR from bar AT TIME ZONE \'UTC\') from foo', reindent=True
+    )
+    assert formatted == \
+           'SELECT f(HOUR\n         from bar AT TIME ZONE \'UTC\')\nfrom foo'
+
+
+def test_as_in_parentheses_indents():
+    # did raise NoneType has no attribute is_group in _process_parentheses
+    formatted = sqlparse.format('(as foo)', reindent=True)
+    assert formatted == '(as foo)'
+
+
+def test_format_invalid_where_clause():
+    # did raise ValueError
+    formatted = sqlparse.format('where, foo', reindent=True)
+    assert formatted == 'where, foo'
+
+
+def test_splitting_at_and_backticks_issue588():
+    splitted = sqlparse.split(
+        'grant foo to user1@`myhost`; grant bar to user1@`myhost`;')
+    assert len(splitted) == 2
+    assert splitted[-1] == 'grant bar to user1@`myhost`;'
+
+
+def test_comment_between_cte_clauses_issue632():
+    p, = sqlparse.parse("""
+        WITH foo AS (),
+             -- A comment before baz subquery
+             baz AS ()
+        SELECT * FROM baz;""")
+    assert p.get_type() == "SELECT"
diff --git a/tests/test_split.py b/tests/test_split.py
index ccb84a89..e79750e8 100644
--- a/tests/test_split.py
+++ b/tests/test_split.py
@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
-
 # Tests splitting functions.
 
 import types
+from io import StringIO
 
 import pytest
 
 import sqlparse
-from sqlparse.compat import StringIO, text_type
 
 
 def test_split_semicolon():
@@ -20,8 +18,8 @@ def test_split_semicolon():
 
 
 def test_split_backslash():
-    stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';")
-    assert len(stmts) == 3
+    stmts = sqlparse.parse("select '\'; select '\'';")
+    assert len(stmts) == 2
 
 
 @pytest.mark.parametrize('fn', ['function.sql',
@@ -33,7 +31,7 @@ def test_split_create_function(load_file, fn):
     sql = load_file(fn)
     stmts = sqlparse.parse(sql)
     assert len(stmts) == 1
-    assert text_type(stmts[0]) == sql
+    assert str(stmts[0]) == sql
 
 
 def test_split_dashcomments(load_file):
@@ -52,22 +50,6 @@ def test_split_dashcomments_eol(s):
     assert len(stmts) == 1
 
 
-def test_split_slashcomments(load_file):
-    sql = load_file('slashcomment.sql')
-    stmts = sqlparse.parse(sql)
-    assert len(stmts) == 3
-    assert ''.join(str(q) for q in stmts) == sql
-
-
-@pytest.mark.parametrize('s', ['select foo; // comment\n',
-                               'select foo; // comment\r',
-                               'select foo; // comment\r\n',
-                               'select foo; // comment'])
-def test_split_slashcomments_eol(s):
-    stmts = sqlparse.parse(s)
-    assert len(stmts) == 1
-
-
 def test_split_begintag(load_file):
     sql = load_file('begintag.sql')
     stmts = sqlparse.parse(sql)
@@ -90,12 +72,12 @@ def test_split_dropif():
 
 
 def test_split_comment_with_umlaut():
-    sql = (u'select * from foo;\n'
-           u'-- Testing an umlaut: ä\n'
-           u'select * from bar;')
+    sql = ('select * from foo;\n'
+           '-- Testing an umlaut: ä\n'
+           'select * from bar;')
     stmts = sqlparse.parse(sql)
     assert len(stmts) == 2
-    assert ''.join(text_type(q) for q in stmts) == sql
+    assert ''.join(str(q) for q in stmts) == sql
 
 
 def test_split_comment_end_of_line():
@@ -115,6 +97,12 @@ def test_split_casewhen():
     assert len(stmts) == 2
 
 
+def test_split_casewhen_procedure(load_file):
+    # see issue580
+    stmts = sqlparse.split(load_file('casewhen_procedure.sql'))
+    assert len(stmts) == 2
+
+
 def test_split_cursor_declare():
     sql = ('DECLARE CURSOR "foo" AS SELECT 1;\n'
            'SELECT 2;')
@@ -141,11 +129,11 @@ def test_split_stream():
 def test_split_encoding_parsestream():
     stream = StringIO("SELECT 1; SELECT 2;")
     stmts = list(sqlparse.parsestream(stream))
-    assert isinstance(stmts[0].tokens[0].value, text_type)
+    assert isinstance(stmts[0].tokens[0].value, str)
 
 
 def test_split_unicode_parsestream():
-    stream = StringIO(u'SELECT ö')
+    stream = StringIO('SELECT ö')
     stmts = list(sqlparse.parsestream(stream))
     assert str(stmts[0]) == 'SELECT ö'
 
@@ -157,6 +145,13 @@ def test_split_simple():
     assert stmts[1] == 'select * from bar;'
 
 
+def test_split_ignores_empty_newlines():
+    stmts = sqlparse.split('select foo;\nselect bar;\n')
+    assert len(stmts) == 2
+    assert stmts[0] == 'select foo;'
+    assert stmts[1] == 'select bar;'
+
+
 def test_split_quotes_with_new_line():
     stmts = sqlparse.split('select "foo\nbar"')
     assert len(stmts) == 1
@@ -165,3 +160,9 @@ def test_split_quotes_with_new_line():
     stmts = sqlparse.split("select 'foo\n\bar'")
     assert len(stmts) == 1
     assert stmts[0] == "select 'foo\n\bar'"
+
+
+def test_split_mysql_handler_for(load_file):
+    # see issue581
+    stmts = sqlparse.split(load_file('mysql_handler.sql'))
+    assert len(stmts) == 2
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index fcd1102b..af0ba163 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
-
 import types
+from io import StringIO
 
 import pytest
 
 import sqlparse
 from sqlparse import lexer
 from sqlparse import sql, tokens as T
-from sqlparse.compat import StringIO
 
 
 def test_tokenize_simple():
@@ -152,7 +150,7 @@ def test_stream_error():
     'INNER JOIN',
     'LEFT INNER JOIN'])
 def test_parse_join(expr):
-    p = sqlparse.parse('{0} foo'.format(expr))[0]
+    p = sqlparse.parse('{} foo'.format(expr))[0]
     assert len(p.tokens) == 3
     assert p.tokens[0].ttype is T.Keyword
 
@@ -171,6 +169,13 @@ def test_parse_endifloop(s):
     assert p.tokens[0].ttype is T.Keyword
 
 
+@pytest.mark.parametrize('s', ['NULLS FIRST', 'NULLS LAST'])
+def test_parse_nulls(s):  # issue487
+    p = sqlparse.parse(s)[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype is T.Keyword
+
+
 @pytest.mark.parametrize('s', [
     'foo',
     'Foo',
@@ -197,6 +202,40 @@ def test_parse_order_by():
     assert p.tokens[0].ttype is T.Keyword
 
 
+def test_parse_window_as():
+    p = sqlparse.parse('WINDOW w AS')[0]
+    assert len(p.tokens) == 5
+    assert p.tokens[0].ttype is T.Keyword
+
+
+@pytest.mark.parametrize('s', (
+    "LIKE", "ILIKE", "NOT LIKE", "NOT ILIKE",
+    "NOT   LIKE", "NOT    ILIKE",
+))
+def test_like_and_ilike_parsed_as_comparisons(s):
+    p = sqlparse.parse(s)[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype == T.Operator.Comparison
+
+
+@pytest.mark.parametrize('s', (
+    "LIKEaaa", "bILIKE", "aaILIKEbb", "NOTLIKE", "NOTILIKE",
+))
+def test_near_like_and_ilike_parsed_appropriately(s):
+    p = sqlparse.parse(s)[0]
+    assert len(p.tokens) == 1
+    assert isinstance(p.tokens[0], sql.Identifier)
+
+
+@pytest.mark.parametrize('s', (
+    'AT TIME ZONE \'UTC\'',
+))
+def test_parse_tzcast(s):
+    p = sqlparse.parse(s)[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype == T.Keyword.TZCast
+
+
 def test_cli_commands():
     p = sqlparse.parse('\\copy')[0]
     assert len(p.tokens) == 1
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 00000000..d020f3fa
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,12 @@
+import pytest
+
+from sqlparse import utils
+
+
+@pytest.mark.parametrize('value, expected', (
+    [None, None],
+    ['\'foo\'', 'foo'],
+    ['"foo"', 'foo'],
+    ['`foo`', 'foo']))
+def test_remove_quotes(value, expected):
+    assert utils.remove_quotes(value) == expected
diff --git a/tox.ini b/tox.ini
index 2e5010a8..0087d50e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,22 +1,17 @@
 [tox]
 skip_missing_interpreters = True
 envlist =
-    py27
-    py34
     py35
     py36
     py37
     py38
-    pypy_54
+    pypy3
     flake8
 
 [testenv]
 deps =
     pytest
     pytest-cov
-    pytest-travis-fold
-passenv =
-    TRAVIS
 commands =
     sqlformat --version
     pytest --cov=sqlparse {posargs}