From 440e58b27d1d9427bfdfec5e62844998041f49e4 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:32:52 -0800 Subject: [PATCH 1/9] Pre-compile yaml file into py files This is a tremendous speedup vs parsing JSON Fixes #23 --- .gitignore | 3 +- MANIFEST.in | 1 - Makefile | 22 ++- setup.py | 115 ++++++++++++---- ua_parser/user_agent_parser.py | 207 +++++++++++++--------------- ua_parser/user_agent_parser_test.py | 2 +- 6 files changed, 198 insertions(+), 152 deletions(-) diff --git a/.gitignore b/.gitignore index dfac3ca..841470f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ *.pyc *.egg-info/ .eggs/ +.cache/ build/ dist/ tmp/ regexes.yaml -regexes.json +_regexes.py diff --git a/MANIFEST.in b/MANIFEST.in index 89e6544..bb3ec5f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1 @@ include README.md -include ua_parser/regexes.json diff --git a/Makefile b/Makefile index 52f7ab1..85e7b3d 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,18 @@ -all: prep test +all: test -prep: - #git submodule update --init - #sudo apt-get install python-yaml - -test: - @#test ! -d tmp && mkdir tmp - @export PYTHONPATH=tmp && python setup.py develop -d tmp +test: clean + @mkdir -p tmp + @PYTHONPATH=tmp python setup.py develop -d tmp @# run all tests - @python ua_parser/user_agent_parser_test.py + @PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py @# run a single test - @#python ua_parser/user_agent_parser_test.py ParseTest.testStringsDeviceBrandModel + @#PYTHONPATH=tmp python ua_parser/user_agent_parser_test.py ParseTest.testStringsDeviceBrandModel clean: - @rm -f ua_parser/user_agent_parser.pyc\ - ua_parser/regexes.yaml\ - ua_parser/regexes.json + @find . -name '*.pyc' -delete @rm -rf tmp\ ua_parser.egg-info\ dist\ build -.PHONY: all prep test clean +.PHONY: all test clean diff --git a/setup.py b/setup.py index 78f69a9..e8d9b84 100644 --- a/setup.py +++ b/setup.py @@ -1,38 +1,107 @@ #!/usr/bin/env python +import os +from distutils import log +from distutils.core import Command from setuptools import setup from setuptools.command.develop import develop as _develop from setuptools.command.sdist import sdist as _sdist -def install_regexes(): - print('Copying regexes.yaml to package directory...') - import os - cwd = os.path.abspath(os.path.dirname(__file__)) - yaml_src = os.path.join(cwd, 'uap-core', 'regexes.yaml') - if not os.path.exists(yaml_src): - raise RuntimeError( - 'Unable to find regexes.yaml, should be at %r' % yaml_src) +class build_regexes(Command): + description = 'build supporting regular expressions from uap-core' + user_options = [ + ('work-path=', 'w', + 'The working directory for source files. Defaults to .'), + ] - print('Converting regexes.yaml to regexes.json...') - import json - import yaml - json_dest = os.path.join(cwd, 'ua_parser', 'regexes.json') - with open(yaml_src, 'rb') as fp: - regexes = yaml.safe_load(fp) - with open(json_dest, "w") as f: - json.dump(regexes, f) + def initialize_options(self): + self.work_path = None + + def finalize_options(self): + if self.work_path is None: + self.work_path = os.path.realpath(os.path.join(os.path.dirname(__file__))) + + def run(self): + work_path = self.work_path + if os.path.exists(os.path.join(work_path, '.git')): + from subprocess import check_output + log.info("initializing git submodules") + check_output(['git', 'submodule', 'init'], cwd=work_path) + check_output(['git', 'submodule', 'update'], cwd=work_path) + + yaml_src = os.path.join(work_path, 'uap-core', 'regexes.yaml') + if not os.path.exists(yaml_src): + raise RuntimeError( + 'Unable to find regexes.yaml, should be at %r' % yaml_src) + + log.info('Converting regexes.yaml to _regexes.py...') + import yaml + py_dest = os.path.join(work_path, 'ua_parser', '_regexes.py') + with open(yaml_src, 'rb') as fp: + regexes = yaml.safe_load(fp) + with open(py_dest, 'wb') as fp: + fp.write('############################################\n') + fp.write('# NOTICE: This file is autogenerated from #\n') + fp.write('# regexes.yaml. Do not edit by hand, #\n') + fp.write('# instead, re-run `setup.py build_regexes` #\n') + fp.write('############################################\n') + fp.write('\n') + fp.write('from .user_agent_parser import (\n') + fp.write(' UserAgentParser, DeviceParser, OSParser,\n') + fp.write(')\n') + fp.write('\n') + fp.write('__all__ = (\n') + fp.write(' \'USER_AGENT_PARSERS\', \'DEVICE_PARSERS\', \'OS_PARSERS\',\n') + fp.write(')\n') + fp.write('\n') + fp.write('USER_AGENT_PARSERS = [\n') + for device_parser in regexes['user_agent_parsers']: + fp.write(' UserAgentParser(\n') + fp.write(' %r,\n' % device_parser['regex']) + fp.write(' %r,\n' % device_parser.get('family_replacement')) + fp.write(' %r,\n' % device_parser.get('v1_replacement')) + fp.write(' %r,\n' % device_parser.get('v2_replacement')) + fp.write(' ),\n') + fp.write(']\n') + fp.write('\n') + fp.write('DEVICE_PARSERS = [\n') + for device_parser in regexes['device_parsers']: + fp.write(' DeviceParser(\n') + fp.write(' %r,\n' % device_parser['regex']) + fp.write(' %r,\n' % device_parser.get('regex_flag')) + fp.write(' %r,\n' % device_parser.get('device_replacement')) + fp.write(' %r,\n' % device_parser.get('brand_replacement')) + fp.write(' %r,\n' % device_parser.get('model_replacement')) + fp.write(' ),\n') + fp.write(']\n') + fp.write('\n') + fp.write('OS_PARSERS = [\n') + for device_parser in regexes['os_parsers']: + fp.write(' OSParser(\n') + fp.write(' %r,\n' % device_parser['regex']) + fp.write(' %r,\n' % device_parser.get('os_replacement')) + fp.write(' %r,\n' % device_parser.get('os_v1_replacement')) + fp.write(' %r,\n' % device_parser.get('os_v2_replacement')) + fp.write(' ),\n') + fp.write(']\n') + fp.write('\n') class develop(_develop): def run(self): - install_regexes() + self.run_command('build_regexes') _develop.run(self) class sdist(_sdist): - def run(self): - install_regexes() - _sdist.run(self) + sub_commands = _sdist.sub_commands + [('build_regexes', None)] + + +cmdclass = { + 'develop': develop, + 'sdist': sdist, + 'build_regexes': build_regexes, +} setup( @@ -47,13 +116,9 @@ def run(self): zip_safe=False, url='https://github.com/ua-parser/uap-python', include_package_data=True, - package_data={'ua_parser': ['regexes.json']}, setup_requires=['pyyaml'], install_requires=[], - cmdclass={ - 'develop': develop, - 'sdist': sdist, - }, + cmdclass=cmdclass, classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Web Environment', diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index 5b2215a..85f4500 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -21,15 +21,6 @@ import os import re -# pip may copy regexes.yaml to different places depending on the OS. -# For example, on Mac pip copies regexes.yaml to the folder where -# user_agent_parser.py lives where as Fedora leaves regexes.yaml to "data" dir -# See https://github.com/tobie/ua-parser/issues/209 for the complete discussion - -ROOT_DIR = os.path.abspath(os.path.dirname(__file__)) -DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, '..', 'data')) -regex_dir = ROOT_DIR if os.path.exists(os.path.join(ROOT_DIR, 'regexes.yaml')) else DATA_DIR - class UserAgentParser(object): def __init__(self, pattern, family_replacement=None, v1_replacement=None, v2_replacement=None): @@ -455,107 +446,103 @@ def GetFilters(user_agent_string, js_user_agent_string=None, # Build the list of user agent parsers from YAML -UA_PARSER_YAML = os.getenv("UA_PARSER_YAML") -regexes = None - -if not UA_PARSER_YAML: - try: - from pkg_resources import resource_filename - json_path = resource_filename(__name__, 'regexes.json') - except ImportError: - json_path = os.path.join(ROOT_DIR, 'regexes.json') -else: +UA_PARSER_YAML = os.environ.get("UA_PARSER_YAML") +if UA_PARSER_YAML: # This will raise an ImportError if missing, obviously since it's no # longer a requirement import yaml - - with open(UA_PARSER_YAML) as yamlFile: - regexes = yaml.safe_load(yamlFile) - - -# If UA_PARSER_YAML is not specified, load regexes from regexes.json -if regexes is None: - import json - - with open(json_path) as fp: - regexes = json.load(fp) - - -USER_AGENT_PARSERS = [] -for _ua_parser in regexes['user_agent_parsers']: - _regex = _ua_parser['regex'] - - _family_replacement = None - if 'family_replacement' in _ua_parser: - _family_replacement = _ua_parser['family_replacement'] - - _v1_replacement = None - if 'v1_replacement' in _ua_parser: - _v1_replacement = _ua_parser['v1_replacement'] - - _v2_replacement = None - if 'v2_replacement' in _ua_parser: - _v2_replacement = _ua_parser['v2_replacement'] - - USER_AGENT_PARSERS.append(UserAgentParser(_regex, - _family_replacement, - _v1_replacement, - _v2_replacement)) - -OS_PARSERS = [] -for _os_parser in regexes['os_parsers']: - _regex = _os_parser['regex'] - - _os_replacement = None - if 'os_replacement' in _os_parser: - _os_replacement = _os_parser['os_replacement'] - - _os_v1_replacement = None - if 'os_v1_replacement' in _os_parser: - _os_v1_replacement = _os_parser['os_v1_replacement'] - - _os_v2_replacement = None - if 'os_v2_replacement' in _os_parser: - _os_v2_replacement = _os_parser['os_v2_replacement'] - - _os_v3_replacement = None - if 'os_v3_replacement' in _os_parser: - _os_v3_replacement = _os_parser['os_v3_replacement'] - - _os_v4_replacement = None - if 'os_v4_replacement' in _os_parser: - _os_v4_replacement = _os_parser['os_v4_replacement'] - - OS_PARSERS.append(OSParser(_regex, - _os_replacement, - _os_v1_replacement, - _os_v2_replacement, - _os_v3_replacement, - _os_v4_replacement)) - - -DEVICE_PARSERS = [] -for _device_parser in regexes['device_parsers']: - _regex = _device_parser['regex'] - - _regex_flag = None - if 'regex_flag' in _device_parser: - _regex_flag = _device_parser['regex_flag'] - - _device_replacement = None - if 'device_replacement' in _device_parser: - _device_replacement = _device_parser['device_replacement'] - - _brand_replacement = None - if 'brand_replacement' in _device_parser: - _brand_replacement = _device_parser['brand_replacement'] - - _model_replacement = None - if 'model_replacement' in _device_parser: - _model_replacement = _device_parser['model_replacement'] - - DEVICE_PARSERS.append(DeviceParser(_regex, - _regex_flag, - _device_replacement, - _brand_replacement, - _model_replacement)) + try: + # Try and use libyaml bindings if available since faster + from yaml import CSafeLoader as SafeLoader + except ImportError: + from yaml import SafeLoader + + with open(UA_PARSER_YAML) as fp: + regexes = yaml.load(fp, Loader=SafeLoader) + + USER_AGENT_PARSERS = [] + for _ua_parser in regexes['user_agent_parsers']: + _regex = _ua_parser['regex'] + + _family_replacement = None + if 'family_replacement' in _ua_parser: + _family_replacement = _ua_parser['family_replacement'] + + _v1_replacement = None + if 'v1_replacement' in _ua_parser: + _v1_replacement = _ua_parser['v1_replacement'] + + _v2_replacement = None + if 'v2_replacement' in _ua_parser: + _v2_replacement = _ua_parser['v2_replacement'] + + USER_AGENT_PARSERS.append(UserAgentParser(_regex, + _family_replacement, + _v1_replacement, + _v2_replacement)) + + OS_PARSERS = [] + for _os_parser in regexes['os_parsers']: + _regex = _os_parser['regex'] + + _os_replacement = None + if 'os_replacement' in _os_parser: + _os_replacement = _os_parser['os_replacement'] + + _os_v1_replacement = None + if 'os_v1_replacement' in _os_parser: + _os_v1_replacement = _os_parser['os_v1_replacement'] + + _os_v2_replacement = None + if 'os_v2_replacement' in _os_parser: + _os_v2_replacement = _os_parser['os_v2_replacement'] + + _os_v3_replacement = None + if 'os_v3_replacement' in _os_parser: + _os_v3_replacement = _os_parser['os_v3_replacement'] + + _os_v4_replacement = None + if 'os_v4_replacement' in _os_parser: + _os_v4_replacement = _os_parser['os_v4_replacement'] + + OS_PARSERS.append(OSParser(_regex, + _os_replacement, + _os_v1_replacement, + _os_v2_replacement, + _os_v3_replacement, + _os_v4_replacement)) + + DEVICE_PARSERS = [] + for _device_parser in regexes['device_parsers']: + _regex = _device_parser['regex'] + + _regex_flag = None + if 'regex_flag' in _device_parser: + _regex_flag = _device_parser['regex_flag'] + + _device_replacement = None + if 'device_replacement' in _device_parser: + _device_replacement = _device_parser['device_replacement'] + + _brand_replacement = None + if 'brand_replacement' in _device_parser: + _brand_replacement = _device_parser['brand_replacement'] + + _model_replacement = None + if 'model_replacement' in _device_parser: + _model_replacement = _device_parser['model_replacement'] + + DEVICE_PARSERS.append(DeviceParser(_regex, + _regex_flag, + _device_replacement, + _brand_replacement, + _model_replacement)) + + # Clean our our temporary vars explicitly + # so they can't be reused or imported + del regexes + del yaml + del SafeLoader +else: + # Just load our pre-compiled versions + from ._regexes import USER_AGENT_PARSERS, DEVICE_PARSERS, OS_PARSERS diff --git a/ua_parser/user_agent_parser_test.py b/ua_parser/user_agent_parser_test.py index 60ab8f8..05b533a 100644 --- a/ua_parser/user_agent_parser_test.py +++ b/ua_parser/user_agent_parser_test.py @@ -32,7 +32,7 @@ import unittest import yaml -import user_agent_parser +from ua_parser import user_agent_parser TEST_RESOURCES_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), '../uap-core') From 9b0b09537acd1325facb657a6314ca713042fd28 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:40:38 -0800 Subject: [PATCH 2/9] Explicitly write bytes for py3 compat --- setup.py | 84 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/setup.py b/setup.py index e8d9b84..41f7d97 100644 --- a/setup.py +++ b/setup.py @@ -40,51 +40,51 @@ def run(self): with open(yaml_src, 'rb') as fp: regexes = yaml.safe_load(fp) with open(py_dest, 'wb') as fp: - fp.write('############################################\n') - fp.write('# NOTICE: This file is autogenerated from #\n') - fp.write('# regexes.yaml. Do not edit by hand, #\n') - fp.write('# instead, re-run `setup.py build_regexes` #\n') - fp.write('############################################\n') - fp.write('\n') - fp.write('from .user_agent_parser import (\n') - fp.write(' UserAgentParser, DeviceParser, OSParser,\n') - fp.write(')\n') - fp.write('\n') - fp.write('__all__ = (\n') - fp.write(' \'USER_AGENT_PARSERS\', \'DEVICE_PARSERS\', \'OS_PARSERS\',\n') - fp.write(')\n') - fp.write('\n') - fp.write('USER_AGENT_PARSERS = [\n') + fp.write(b'############################################\n') + fp.write(b'# NOTICE: This file is autogenerated from #\n') + fp.write(b'# regexes.yaml. Do not edit by hand, #\n') + fp.write(b'# instead, re-run `setup.py build_regexes` #\n') + fp.write(b'############################################\n') + fp.write(b'\n') + fp.write(b'from .user_agent_parser import (\n') + fp.write(b' UserAgentParser, DeviceParser, OSParser,\n') + fp.write(b')\n') + fp.write(b'\n') + fp.write(b'__all__ = (\n') + fp.write(b' \'USER_AGENT_PARSERS\', \'DEVICE_PARSERS\', \'OS_PARSERS\',\n') + fp.write(b')\n') + fp.write(b'\n') + fp.write(b'USER_AGENT_PARSERS = [\n') for device_parser in regexes['user_agent_parsers']: - fp.write(' UserAgentParser(\n') - fp.write(' %r,\n' % device_parser['regex']) - fp.write(' %r,\n' % device_parser.get('family_replacement')) - fp.write(' %r,\n' % device_parser.get('v1_replacement')) - fp.write(' %r,\n' % device_parser.get('v2_replacement')) - fp.write(' ),\n') - fp.write(']\n') - fp.write('\n') - fp.write('DEVICE_PARSERS = [\n') + fp.write(b' UserAgentParser(\n') + fp.write(b' %r,\n' % device_parser['regex']) + fp.write(b' %r,\n' % device_parser.get('family_replacement')) + fp.write(b' %r,\n' % device_parser.get('v1_replacement')) + fp.write(b' %r,\n' % device_parser.get('v2_replacement')) + fp.write(b' ),\n') + fp.write(b']\n') + fp.write(b'\n') + fp.write(b'DEVICE_PARSERS = [\n') for device_parser in regexes['device_parsers']: - fp.write(' DeviceParser(\n') - fp.write(' %r,\n' % device_parser['regex']) - fp.write(' %r,\n' % device_parser.get('regex_flag')) - fp.write(' %r,\n' % device_parser.get('device_replacement')) - fp.write(' %r,\n' % device_parser.get('brand_replacement')) - fp.write(' %r,\n' % device_parser.get('model_replacement')) - fp.write(' ),\n') - fp.write(']\n') - fp.write('\n') - fp.write('OS_PARSERS = [\n') + fp.write(b' DeviceParser(\n') + fp.write(b' %r,\n' % device_parser['regex']) + fp.write(b' %r,\n' % device_parser.get('regex_flag')) + fp.write(b' %r,\n' % device_parser.get('device_replacement')) + fp.write(b' %r,\n' % device_parser.get('brand_replacement')) + fp.write(b' %r,\n' % device_parser.get('model_replacement')) + fp.write(b' ),\n') + fp.write(b']\n') + fp.write(b'\n') + fp.write(b'OS_PARSERS = [\n') for device_parser in regexes['os_parsers']: - fp.write(' OSParser(\n') - fp.write(' %r,\n' % device_parser['regex']) - fp.write(' %r,\n' % device_parser.get('os_replacement')) - fp.write(' %r,\n' % device_parser.get('os_v1_replacement')) - fp.write(' %r,\n' % device_parser.get('os_v2_replacement')) - fp.write(' ),\n') - fp.write(']\n') - fp.write('\n') + fp.write(b' OSParser(\n') + fp.write(b' %r,\n' % device_parser['regex']) + fp.write(b' %r,\n' % device_parser.get('os_replacement')) + fp.write(b' %r,\n' % device_parser.get('os_v1_replacement')) + fp.write(b' %r,\n' % device_parser.get('os_v2_replacement')) + fp.write(b' ),\n') + fp.write(b']\n') + fp.write(b'\n') class develop(_develop): From 0b493c35cff96cb15ff8149f15e9856cf59ca8a2 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:44:41 -0800 Subject: [PATCH 3/9] Future imports --- setup.py | 1 + ua_parser/user_agent_parser.py | 2 ++ ua_parser/user_agent_parser_test.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 41f7d97..e7eec7b 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ def run(self): fp.write(b'# instead, re-run `setup.py build_regexes` #\n') fp.write(b'############################################\n') fp.write(b'\n') + fp.write(b'from __future__ import absolute_import\n') fp.write(b'from .user_agent_parser import (\n') fp.write(b' UserAgentParser, DeviceParser, OSParser,\n') fp.write(b')\n') diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index 85f4500..6e4696f 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -16,6 +16,8 @@ """Python implementation of the UA parser.""" +from __future__ import absolute_import + __author__ = 'Lindsey Simon ' import os diff --git a/ua_parser/user_agent_parser_test.py b/ua_parser/user_agent_parser_test.py index 05b533a..6eefe70 100644 --- a/ua_parser/user_agent_parser_test.py +++ b/ua_parser/user_agent_parser_test.py @@ -23,7 +23,7 @@ """ -from __future__ import unicode_literals +from __future__ import unicode_literals, absolute_import __author__ = 'slamm@google.com (Stephen Lamm)' From 82663c6905e6567d655eec4ea5e250189399097f Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:45:20 -0800 Subject: [PATCH 4/9] Ignore .tox folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 841470f..1f95550 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.egg-info/ .eggs/ .cache/ +.tox/ build/ dist/ tmp/ From 2949b32f8a54e989d89b292a8146cc34f195d613 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:48:47 -0800 Subject: [PATCH 5/9] Run setup.py develop in tox --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 65950fe..83d0fc6 100644 --- a/tox.ini +++ b/tox.ini @@ -5,4 +5,5 @@ envlist = py26, py27, pypy, py31, py32, py33, py34, py35, docs, pep8, py3pep8 deps = pyyaml commands = + python setup.py develop python ua_parser/user_agent_parser_test.py From 906d3cd832ad733e44bdadb5cabf640bad4a2c51 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 15:51:40 -0800 Subject: [PATCH 6/9] user_agent_parser isn't an entrypoint This didn't work becasue it didn't have a __main__ anyways --- ua_parser/user_agent_parser.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index 6e4696f..9b2d02c 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# # Copyright 2009 Google Inc. # # Licensed under the Apache License, Version 2.0 (the 'License') From db65832b7679ecec15975cff20d33e2a5a7ee93b Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 16:02:55 -0800 Subject: [PATCH 7/9] Add missing os versions --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index e7eec7b..b684527 100644 --- a/setup.py +++ b/setup.py @@ -83,6 +83,8 @@ def run(self): fp.write(b' %r,\n' % device_parser.get('os_replacement')) fp.write(b' %r,\n' % device_parser.get('os_v1_replacement')) fp.write(b' %r,\n' % device_parser.get('os_v2_replacement')) + fp.write(b' %r,\n' % device_parser.get('os_v3_replacement')) + fp.write(b' %r,\n' % device_parser.get('os_v4_replacement')) fp.write(b' ),\n') fp.write(b']\n') fp.write(b'\n') From 8621957e3bca57d550cf3631d4a38930e17eed50 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 16:07:37 -0800 Subject: [PATCH 8/9] bytes all the way down --- setup.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/setup.py b/setup.py index b684527..42a6ebf 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,11 @@ def run(self): raise RuntimeError( 'Unable to find regexes.yaml, should be at %r' % yaml_src) + def force_bytes(text): + if text is None: + return text + return text.encode('utf8') + log.info('Converting regexes.yaml to _regexes.py...') import yaml py_dest = os.path.join(work_path, 'ua_parser', '_regexes.py') @@ -58,33 +63,33 @@ def run(self): fp.write(b'USER_AGENT_PARSERS = [\n') for device_parser in regexes['user_agent_parsers']: fp.write(b' UserAgentParser(\n') - fp.write(b' %r,\n' % device_parser['regex']) - fp.write(b' %r,\n' % device_parser.get('family_replacement')) - fp.write(b' %r,\n' % device_parser.get('v1_replacement')) - fp.write(b' %r,\n' % device_parser.get('v2_replacement')) + fp.write(force_bytes(' %r,\n' % device_parser['regex'])) + fp.write(force_bytes(' %r,\n' % device_parser.get('family_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('v1_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('v2_replacement'))) fp.write(b' ),\n') fp.write(b']\n') fp.write(b'\n') fp.write(b'DEVICE_PARSERS = [\n') for device_parser in regexes['device_parsers']: fp.write(b' DeviceParser(\n') - fp.write(b' %r,\n' % device_parser['regex']) - fp.write(b' %r,\n' % device_parser.get('regex_flag')) - fp.write(b' %r,\n' % device_parser.get('device_replacement')) - fp.write(b' %r,\n' % device_parser.get('brand_replacement')) - fp.write(b' %r,\n' % device_parser.get('model_replacement')) + fp.write(force_bytes(' %r,\n' % device_parser['regex'])) + fp.write(force_bytes(' %r,\n' % device_parser.get('regex_flag'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('device_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('brand_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('model_replacement'))) fp.write(b' ),\n') fp.write(b']\n') fp.write(b'\n') fp.write(b'OS_PARSERS = [\n') for device_parser in regexes['os_parsers']: fp.write(b' OSParser(\n') - fp.write(b' %r,\n' % device_parser['regex']) - fp.write(b' %r,\n' % device_parser.get('os_replacement')) - fp.write(b' %r,\n' % device_parser.get('os_v1_replacement')) - fp.write(b' %r,\n' % device_parser.get('os_v2_replacement')) - fp.write(b' %r,\n' % device_parser.get('os_v3_replacement')) - fp.write(b' %r,\n' % device_parser.get('os_v4_replacement')) + fp.write(force_bytes(' %r,\n' % device_parser['regex'])) + fp.write(force_bytes(' %r,\n' % device_parser.get('os_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('os_v1_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('os_v2_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('os_v3_replacement'))) + fp.write(force_bytes(' %r,\n' % device_parser.get('os_v4_replacement'))) fp.write(b' ),\n') fp.write(b']\n') fp.write(b'\n') From c8b8cbfb1a1e2a926520bf122267413ce2c654d1 Mon Sep 17 00:00:00 2001 From: Matt Robenolt Date: Mon, 21 Dec 2015 16:27:17 -0800 Subject: [PATCH 9/9] Fix for py26 --- setup.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 42a6ebf..da4773e 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,14 @@ from setuptools.command.sdist import sdist as _sdist +def check_output(*args, **kwargs): + from subprocess import Popen + proc = Popen(*args, **kwargs) + output, _ = proc.communicate() + rv = proc.poll() + assert rv == 0, output + + class build_regexes(Command): description = 'build supporting regular expressions from uap-core' user_options = [ @@ -24,8 +32,7 @@ def finalize_options(self): def run(self): work_path = self.work_path if os.path.exists(os.path.join(work_path, '.git')): - from subprocess import check_output - log.info("initializing git submodules") + log.info('initializing git submodules') check_output(['git', 'submodule', 'init'], cwd=work_path) check_output(['git', 'submodule', 'update'], cwd=work_path) @@ -39,9 +46,10 @@ def force_bytes(text): return text return text.encode('utf8') - log.info('Converting regexes.yaml to _regexes.py...') import yaml py_dest = os.path.join(work_path, 'ua_parser', '_regexes.py') + + log.info('Compiling regexes.yaml -> _regexes.py') with open(yaml_src, 'rb') as fp: regexes = yaml.safe_load(fp) with open(py_dest, 'wb') as fp: