diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..6d61431 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "uap-core"] + path = uap-core + url = https://github.com/ua-parser/uap-core.git + branch = master diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..14c203f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,16 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + +before_install: + - cp regexes.yaml py/ua_parser/regexes.yaml + +install: + - pip install . + +script: + - python ua_parser/user_agent_parser_test.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ae741a8 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +PWD = $(shell pwd) + +all: prep test + +prep: + #git submodule update --init + #sudo apt-get install python-yaml + +test: + @#test ! -d tmp && mkdir tmp + @export PYTHONPATH=tmp && python setup.py develop -d tmp + @# run all tests + @python ua_parser/user_agent_parser_test.py + @# run a single test + @#python ua_parser/user_agent_parser_test.py ParseTest.testStringsDeviceBrandModel + +clean: + @rm ua_parser/user_agent_parser.pyc\ + ua_parser/regexes.yaml\ + ua_parser/regexes.json + @rm -rf tmp\ + ua_parser.egg-info + +.PHONY: all clean diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1e1f7df --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +from setuptools import setup +from setuptools.command.develop import develop as _develop +from setuptools.command.sdist import sdist as _sdist + + +def install_regexes(): + print('Copying regexes.yaml to package directory...') + import os + import shutil + cwd = os.path.abspath(os.path.dirname(__file__)) + yaml_src = os.path.join(cwd, 'uap-core', 'regexes.yaml') + if not os.path.exists(yaml_src): + raise RuntimeError( + 'Unable to find regexes.yaml, should be at %r' % yaml_src) + yaml_dest = os.path.join(cwd, 'ua_parser', 'regexes.yaml') + shutil.copy2(yaml_src, yaml_dest) + + print('Converting regexes.yaml to regexes.json...') + import json + import yaml + json_dest = yaml_dest.replace('.yaml', '.json') + regexes = yaml.load(open(yaml_dest)) + with open(json_dest, "w") as f: + json.dump(regexes, f) + + +class develop(_develop): + def run(self): + install_regexes() + _develop.run(self) + + +class sdist(_sdist): + def run(self): + install_regexes() + _sdist.run(self) + + +setup( + name='ua-parser', + version='0.4.0', + description="Python port of Browserscope's user agent parser", + author='PBS', + author_email='no-reply@pbs.org', + packages=['ua_parser'], + package_dir={'': '.'}, + license='LICENSE.txt', + zip_safe=False, + url='https://github.com/ua-parser/uap-python', + include_package_data=True, + package_data={'ua_parser': ['regexes.yaml', 'regexes.json']}, + install_requires=['pyyaml'], + cmdclass={ + 'develop': develop, + 'sdist': sdist, + }, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'Operating System :: OS Independent', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python', + 'Topic :: Internet :: WWW/HTTP', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], +) diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index eb6f6fe..ca7954b 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -139,7 +139,7 @@ def Parse(self, user_agent_string): class DeviceParser(object): - def __init__(self, pattern, device_replacement=None): + def __init__(self, pattern, regex_flag=None, device_replacement=None, brand_replacement=None, model_replacement=None): """Initialize UserAgentParser. Args: @@ -147,8 +147,13 @@ def __init__(self, pattern, device_replacement=None): device_replacement: a string to override the matched device (optional) """ self.pattern = pattern - self.user_agent_re = re.compile(self.pattern) + if regex_flag == 'i': + self.user_agent_re = re.compile(self.pattern, re.IGNORECASE) + else: + self.user_agent_re = re.compile(self.pattern) self.device_replacement = device_replacement + self.brand_replacement = brand_replacement + self.model_replacement = model_replacement def MatchSpans(self, user_agent_string): match_spans = [] @@ -158,19 +163,38 @@ def MatchSpans(self, user_agent_string): for group_index in range(1, match.lastindex + 1)] return match_spans + def MultiReplace(self, string, match): + def _repl(m): + index = int(m.group(1)) - 1 + group = match.groups() + if index < len(group): + return group[index] + return '' + + _string = re.sub(r'\$(\d)', _repl, string) + _string = re.sub(r'^\s+|\s+$', '', _string) + if _string == '': + return None + return _string + def Parse(self, user_agent_string): - device = None + device, brand, model = None, None, None match = self.user_agent_re.search(user_agent_string) - if match: + if match: if self.device_replacement: - if re.search(r'\$1', self.device_replacement): - device = re.sub(r'\$1', match.group(1), self.device_replacement) - else: - device = self.device_replacement + device = self.MultiReplace(self.device_replacement, match) else: device = match.group(1) - return device + if self.brand_replacement: + brand = self.MultiReplace(self.brand_replacement, match) + + if self.model_replacement: + model = self.MultiReplace(self.model_replacement, match) + elif len(match.groups()) > 0: + model = match.group(1) + + return device, brand, model def Parse(user_agent_string, **jsParseBits): @@ -264,7 +288,7 @@ def ParseDevice(user_agent_string): A dictionary containing parsed bits. """ for deviceParser in DEVICE_PARSERS: - device = deviceParser.Parse(user_agent_string) + device, brand, model = deviceParser.Parse(user_agent_string) if device: break @@ -272,7 +296,9 @@ def ParseDevice(user_agent_string): device = 'Other' return { - 'family': device + 'family': device, + 'brand': brand, + 'model': model } @@ -483,9 +509,25 @@ def GetFilters(user_agent_string, js_user_agent_string=None, for _device_parser in regexes['device_parsers']: _regex = _device_parser['regex'] + _regex_flag = None + if 'regex_flag' in _device_parser: + _regex_flag = _device_parser['regex_flag'] + _device_replacement = None if 'device_replacement' in _device_parser: _device_replacement = _device_parser['device_replacement'] - DEVICE_PARSERS.append(DeviceParser(_regex, _device_replacement)) + _brand_replacement = None + if 'brand_replacement' in _device_parser: + _brand_replacement = _device_parser['brand_replacement'] + + _model_replacement = None + if 'model_replacement' in _device_parser: + _model_replacement = _device_parser['model_replacement'] + + DEVICE_PARSERS.append(DeviceParser(_regex, + _regex_flag, + _device_replacement, + _brand_replacement, + _model_replacement)) diff --git a/ua_parser/user_agent_parser_test.py b/ua_parser/user_agent_parser_test.py index de0c42d..8bbf754 100644 --- a/ua_parser/user_agent_parser_test.py +++ b/ua_parser/user_agent_parser_test.py @@ -33,29 +33,33 @@ import user_agent_parser TEST_RESOURCES_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), - '../../test_resources') + '../uap-core') class ParseTest(unittest.TestCase): def testBrowserscopeStrings(self): self.runUserAgentTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'test_user_agent_parser.yaml')) + TEST_RESOURCES_DIR, 'tests/test_ua.yaml')) def testBrowserscopeStringsOS(self): self.runOSTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'test_user_agent_parser_os.yaml')) + TEST_RESOURCES_DIR, 'tests/test_os.yaml')) def testStringsOS(self): self.runOSTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'additional_os_tests.yaml')) + TEST_RESOURCES_DIR, 'test_resources/additional_os_tests.yaml')) def testStringsDevice(self): self.runDeviceTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'test_device.yaml')) + TEST_RESOURCES_DIR, 'tests/test_device.yaml')) + + def testStringsDeviceBrandModel(self): + self.runDeviceTestsFromYAML(os.path.join( + TEST_RESOURCES_DIR, 'tests/test_device_brandmodel.yaml')) def testMozillaStrings(self): self.runUserAgentTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'firefox_user_agent_strings.yaml')) + TEST_RESOURCES_DIR, 'test_resources/firefox_user_agent_strings.yaml')) # NOTE: The YAML file used here is one output by makePGTSComparisonYAML() # below, as opposed to the pgts_browser_list-orig.yaml file. The -orig @@ -65,13 +69,15 @@ def testMozillaStrings(self): # reconcile the differences between the two YAML files. def testPGTSStrings(self): self.runUserAgentTestsFromYAML(os.path.join( - TEST_RESOURCES_DIR, 'pgts_browser_list.yaml')) + TEST_RESOURCES_DIR, 'test_resources/pgts_browser_list.yaml')) def testParseAll(self): user_agent_string = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; fr; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5,gzip(gfe),gzip(gfe)' expected = { 'device': { - 'family': 'Other' + 'family': 'Other', + 'brand': None, + 'model': None }, 'os': { 'family': 'Mac OS X', @@ -198,15 +204,21 @@ def runDeviceTestsFromYAML(self, file_name): # The expected results expected = { - 'family': test_case['family'] + 'family': test_case['family'], + 'brand': test_case['brand'], + 'model': test_case['model'] } result = user_agent_parser.ParseDevice(user_agent_string, **kwds) self.assertEqual(result, expected, - u"UA: {0}\n expected<{1}> != actual<{2}>".format( + u"UA: {0}\n expected<{1} {2} {3}> != actual<{4} {5} {6}>".format( user_agent_string, expected['family'], - result['family'])) + expected['brand'], + expected['model'], + result['family'], + result['brand'], + result['model'])) class GetFiltersTest(unittest.TestCase): diff --git a/uap-core b/uap-core new file mode 160000 index 0000000..52335a8 --- /dev/null +++ b/uap-core @@ -0,0 +1 @@ +Subproject commit 52335a8ebc40c4c273ebfa80abf950461d8d6707