|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import io |
| 4 | +import os |
| 5 | +import re |
| 6 | +from typing import List, Tuple, Union |
| 7 | + |
| 8 | +import re2 # type: ignore |
| 9 | + |
| 10 | +from .core import ( |
| 11 | + Parser as BaseParser, |
| 12 | + PartialParseResult, |
| 13 | + Device, |
| 14 | + Domain, |
| 15 | + OS, |
| 16 | + UserAgent, |
| 17 | + Matchers, |
| 18 | + UserAgentMatcher, |
| 19 | + OSMatcher, |
| 20 | + DeviceMatcher, |
| 21 | +) |
| 22 | + |
| 23 | + |
| 24 | +RE_OPTS = re2.Options() |
| 25 | +# as of uap-core 0.18, the devices set needs at least 28MB (up from |
| 26 | +# the default 8), set to 32 |
| 27 | +RE_OPTS.max_mem = 8 << 22 |
| 28 | +# might write directly to stdout? not great, suppress |
| 29 | +RE_OPTS.log_errors = False |
| 30 | + |
| 31 | + |
| 32 | +class Parser(BaseParser): |
| 33 | + ua: re2.Set |
| 34 | + user_agent_parsers: List[UserAgentMatcher] |
| 35 | + os: re2.Set |
| 36 | + os_parsers: List[OSMatcher] |
| 37 | + devices: re2.Set |
| 38 | + device_parsers: List[DeviceMatcher] |
| 39 | + |
| 40 | + def __init__( |
| 41 | + self, |
| 42 | + matchers: Matchers, |
| 43 | + ) -> None: |
| 44 | + self.user_agent_parsers, self.os_parsers, self.device_parsers = matchers |
| 45 | + |
| 46 | + self.ua = re2.Set.SearchSet(RE_OPTS) |
| 47 | + for u in self.user_agent_parsers: |
| 48 | + self.ua.Add(u.regex.pattern) |
| 49 | + self.ua.Compile() |
| 50 | + |
| 51 | + self.os = re2.Set.SearchSet(RE_OPTS) |
| 52 | + for o in self.os_parsers: |
| 53 | + self.os.Add(o.regex.pattern) |
| 54 | + self.os.Compile() |
| 55 | + |
| 56 | + self.devices = re2.Set.SearchSet(RE_OPTS) |
| 57 | + for d in self.device_parsers: |
| 58 | + # Prepend the i global flag if IGNORECASE is set. Assumes |
| 59 | + # no pattern uses global flags, but since they're not |
| 60 | + # supported in JS that seems safe. |
| 61 | + if d.regex.flags & re.IGNORECASE: |
| 62 | + self.devices.Add("(?i)" + d.regex.pattern) |
| 63 | + else: |
| 64 | + self.devices.Add(d.regex.pattern) |
| 65 | + self.devices.Compile() |
| 66 | + |
| 67 | + def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult: |
| 68 | + user_agent = os = device = None |
| 69 | + if Domain.USER_AGENT in domains: |
| 70 | + if matches := self.ua.Match(ua): |
| 71 | + user_agent = self.user_agent_parsers[min(matches)](ua) |
| 72 | + if Domain.OS in domains: |
| 73 | + if matches := self.os.Match(ua): |
| 74 | + os = self.os_parsers[min(matches)](ua) |
| 75 | + if Domain.DEVICE in domains: |
| 76 | + if matches := self.devices.Match(ua): |
| 77 | + device = self.device_parsers[min(matches)](ua) |
| 78 | + return PartialParseResult( |
| 79 | + domains=domains, string=ua, user_agent=user_agent, os=os, device=device |
| 80 | + ) |
0 commit comments