diff --git a/.env.sample b/.env.sample new file mode 100644 index 00000000..1f6bf5b0 --- /dev/null +++ b/.env.sample @@ -0,0 +1,4 @@ +# Rename to .env file and replace the contents with proper write key and dataplane url +# Do not add .env file to git +TEST_WRITE_KEY= +TEST_DATA_PLANE_URL= diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..23cfbceb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,41 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "BUG : " +labels: bug, open source +assignees: itsdebs +--- + +**Describe the bug** +Please provide the following information: + +1. A clear and concise description of what the bug is +2. Share the event payload +3. Offer a minimal viable example to reproduce the issue +4. Include the error's stack trace +5. Mention the date when the issue began + +**To Reproduce** +Steps to reproduce the behaviour: + +1. Initialise Python SDK +2. Make events '....' +3. See the error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Version of the _Python_ SDK** +Please mention the version of the Rudder Python SDK you are using (e.g., Python SDK v1.0.0). + +**Version of the _Python_** +Please mention the version of the Python you are using (e.g., Python v1.0.0). + +**SDK initialisation snippet** +Share the code snippet used for initializing the Python SDK. + +**Check for Correct Usage of _writeKey_ and _dataPlaneUrl_** +Confirm that the correct `writeKey` and `dataPlaneUrl` are utilized during SDK initialization. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..32f214da --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +version: 2 +updates: + - package-ecosystem: pip + directory: "/" + schedule: + interval: weekly + open-pull-requests-limit: 10 + + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'weekly' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..db9a6ea4 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,41 @@ +name: Lint and Test +on: + pull_request: + branches: ['master'] + types: ['opened', 'reopened', 'synchronize'] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -r requirements.txt + + - name: Lint + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Test + run: | + python -m pytest + env: + TEST_WRITE_KEY: ${{ secrets.TEST_WRITE_KEY }} + TEST_DATA_PLANE_URL: ${{ secrets.TEST_DATA_PLANE_URL }} diff --git a/.github/workflows/check_pr_title.yml b/.github/workflows/check_pr_title.yml new file mode 100644 index 00000000..5c76e4c9 --- /dev/null +++ b/.github/workflows/check_pr_title.yml @@ -0,0 +1,14 @@ +name: Check PR Title + +on: + pull_request: + branches: ['master'] + types: ['opened', 'reopened', 'edited', 'synchronize'] + +jobs: + check_pr_title: + name: Check PR title + runs-on: [self-hosted, Linux, X64] + steps: + - name: Check PR title + uses: rudderlabs/github-action-check-pr-title@v1.0.11 diff --git a/.github/workflows/housekeeping.yaml b/.github/workflows/housekeeping.yaml new file mode 100644 index 00000000..a6803be7 --- /dev/null +++ b/.github/workflows/housekeeping.yaml @@ -0,0 +1,41 @@ +name: Handle Stale PRs and Branches + +on: + schedule: + - cron: '1 0 * * *' # every day at 00:01 + +jobs: + prs: + name: Clean up stale PRs + runs-on: [self-hosted, Linux, X64] + + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + operations-per-run: 200 + stale-pr-message: "Hello! This PR has been open for 20 days without any activity. Therefore, it's considered as stale and is scheduled to be closed in 10 days. If you're still working on this, please remove the 'Stale' label or add a comment to keep it open. Thanks for your contribution!" + days-before-pr-stale: 20 + days-before-pr-close: 10 + stale-pr-label: 'Stale' + + branches: + name: Clean up stale branches + runs-on: [self-hosted, Linux, X64] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run delete-old-branches-action + uses: beatlabs/delete-old-branches-action@v0.0.10 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + date: '2 months ago' + dry_run: false + delete_tags: false + extra_protected_branch_regex: ^(master)$ + exclude_open_pr_branches: true diff --git a/.github/workflows/slack-notify.yml b/.github/workflows/slack-notify.yml new file mode 100644 index 00000000..7320f215 --- /dev/null +++ b/.github/workflows/slack-notify.yml @@ -0,0 +1,41 @@ +name: Notify Slack Channel + +on: + release: + types: [created] + +jobs: + deploy-tag: + name: Notify Slack + runs-on: ubuntu-latest + steps: + - name: Send message to Slack channel + id: slack + uses: slackapi/slack-github-action@v1.23.0 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + PROJECT_NAME: 'Python SDK' + with: + channel-id: "${{ secrets.SLACK_RELEASE_CHANNEL_ID }}" + payload: | + { + "blocks": [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "New release: ${{ env.PROJECT_NAME }}" + } + }, + { + "type": "divider" + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "*Release: <${{ github.event.release.html_url }}|${{ github.event.release.tag_name }}>*" + } + } + ] + } \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5cf10c46..8d7d98ff 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,10 @@ dist MANIFEST build .eggs -[metadata] -**/.DS_Store \ No newline at end of file +*.bat +.vscode/ +.idea/ +.python-version +**/.DS_Store +.env +venv diff --git a/.pylintrc b/.pylintrc index 8f7fd04a..568c4cc2 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,15 +1,10 @@ [MASTER] -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code -extension-pkg-whitelist= - -# Add files or directories to the blacklist. They should be base names, not +# Add files or directories to the ignore list. They should be base names, not # paths. ignore=CVS -# Add files or directories matching the regex patterns to the blacklist. The +# Add files or directories matching the regex patterns to the denylist. The # regex matches against base names, not paths. ignore-patterns= @@ -54,7 +49,9 @@ confidence= # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes # --disable=W" -disable=print-statement, +disable=too-many-public-methods, + no-else-return, + print-statement, invalid-name, global-statement, too-many-arguments, @@ -309,7 +306,7 @@ init-import=no # List of qualified module names which can have objects that can redefine # builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,io,builtins +redefining-builtins-modules=past.builtins,future.builtins,io,builtins [FORMAT] @@ -471,7 +468,7 @@ max-bool-expr=5 max-branches=12 # Maximum number of locals for function / method body -max-locals=15 +max-locals=20 # Maximum number of parents for a class (see R0901). max-parents=7 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..599a5e3d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,34 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [2.0.0] +## Features +- Update SDK with latest code. +- Update tests with latest dependencies +## Breaking Changes +- Updated package name to rudderstack.analytics +- Added optional gzip capabilities +- Batch size reduced to 500KB +- Max message size reduced to 32KB +- flush_at is now renamed to upload_size +- flush_interval renamed to upload_interval +- Removed support for python 3.6 + +## [2.0.1] +## Fixes +- Default gzip value is set to True + +## [2.0.2] +## Fixes +- Fixed dataPlaneUrl setter issue [#20](https://github.com/rudderlabs/rudder-sdk-python/issues/20) + +## [2.1.0] +## Fixes +- Fixed versions of dependencies. Moved to use flexible depencencies. +- Updated License + +## [2.1.1] +## Fixes +- Fixed versions of dotenv dependency. Moved to use a higher upper limit (2.0.0) +- Updated License diff --git a/LICENSE.md b/LICENSE.md index 2abc92cb..b97c71ae 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,28 @@ MIT License -Copyright (c) 2021 RudderStack +Copyright (c) 2021 Segment (segment.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +MIT License + +Copyright (c) 2020 RudderStack Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..eb98792f --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +install: + pip install --edit .[test] + +test: + pylint --rcfile=.pylintrc --reports=y --exit-zero analytics | tee pylint.out + flake8 --max-complexity=10 --statistics analytics > flake8.out || true + +release: + python setup.py sdist bdist_wheel + twine upload dist/* + diff --git a/README.md b/README.md index 93a26b8a..9505336f 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,14 @@ pip install rudder-sdk-python ## Initializing the RudderStack Client ```python -import rudder_analytics +import rudderstack.analytics as analytics -rudder_analytics.write_key = <SOURCE_WRITE_KEY> -rudder_analytics.data_plane_url = <DATA_PLANE_URL> +analytics.write_key = <SOURCE_WRITE_KEY> +analytics.on_error = <FAILURE CALLBACK> +analytics.debug = <True or False> +analytics.dataPlaneUrl = <RUDDERSTACK_DATA_PLANE_URL> + +analytics.gzip = <True or False> ``` ## Sending Events @@ -29,6 +33,12 @@ Once the RudderStack client is initialized, you can use it to send your customer rudder_analytics.track('developer_user_id', 'Simple Track Event', { 'key1': 'val1' }) +analytics.track('user_id', 'Simple Track Event', anonymous_id='anonymousId', + properties={ + 'key1': 'val1' +}, context={ + 'key1': 'val1' +}) ``` For more information on the supported calls, refer to the [**documentation**](https://docs.rudderstack.com/stream-sources/rudderstack-sdk-integration-guides/rudderstack-python-sdk#sending-events-from-rudderstack). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..722d6b84 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,19 @@ +# Security Reporting + +Thank you in advance for helping us keep RudderStack secure! + +Please report any security issues or vulnerabilities to [security@rudderstack.com](mailto:security@rudderstack.com), before proceeding to post them publicly as issues on GitHub or any other public channel like the RudderStack community Slack. These issues might also affect other users, and security vulnerabilities need to be handled quickly and sometimes privately. + +We will triage the issue, contact you for further instructions and make sure to take any necessary measures as needed. + +## Supported versions + +We will fix any security bugs for the latest major.minor version of the SDK. + +| Version | Supported | +| :-------| :---------| +| Latest 1.x | ✅ | +| Older 1.x | ❌ | +| < 1.0 | ❌ | + +We may fix the vulnerabilities in the older versions depending on the severity of the issue and the age of the release, but we are only committing to the latest version released. diff --git a/example.py b/example.py deleted file mode 100644 index 0c13388a..00000000 --- a/example.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging -import rudder_analytics - -logging.basicConfig() -logging.getLogger('rudder').setLevel('DEBUG') - -rudder_analytics.write_key = "1arY3oVVVTHxHWjYXjx6dFWRSze" -rudder_analytics.data_plane_url = "https://1b38a868.ngrok.io" -rudder_analytics.debug = True - -rudder_analytics.track('user_id', 'simple_track', { - 'key1' : 'val1' -}) - -rudder_analytics.identify('user_id', { - 'key1': 'val1' -}) diff --git a/example_context_library_integration.py b/example_context_library_integration.py new file mode 100644 index 00000000..e4291d6d --- /dev/null +++ b/example_context_library_integration.py @@ -0,0 +1,97 @@ +import logging +import rudderstack.analytics as analytics +from rudderstack.analytics.get_env import TEST_DATA_PLANE_URL +from rudderstack.analytics.get_env import TEST_WRITE_KEY + +logging.basicConfig() +logging.getLogger('rudder').setLevel('DEBUG') + +analytics.write_key = TEST_WRITE_KEY +analytics.host = "some_random_url" # Do not use this +analytics.dataPlaneUrl = TEST_DATA_PLANE_URL +analytics.debug = True +properties = { + "library": { + "application": 'Rudder Desktop', + "version": '1.1.0', + "platform": 'osx' + } +} + +traits = { + "firstname": 'First', + "lastname": 'Last', + "Role": 'Jedi', + "age": 25 +} + +context = { + "screen": { + "width": 852, + "height": 393, + "density": 3 + }, + "os": { + "name": 'macOS', + "version": '11' + }, + "locale": 'en-US' +} + +context_with_library = { + "screen": { + "width": 852, + "height": 393, + "density": 3 + }, + "os": { + "name": 'macOS', + "version": '11' + }, + "locale": 'en-US', + 'library': { + 'name': 'analytics-random-sdk', + 'version': '1.0.0.beta.1' + } +} + +user_id = '123456' +anonymous_id = 'uid' + +analytics.track( + user_id= user_id, + event= 'Test Track', + anonymous_id= anonymous_id, + properties= properties +) + +analytics.screen( + user_id= user_id, + name= 'Test Screen', + anonymous_id= anonymous_id, + properties= properties +) + +analytics.identify( + user_id= '654321', + traits= traits +) + +analytics.group( + user_id= user_id, + group_id= "uid", + anonymous_id= anonymous_id, + traits= traits +) + +analytics.alias( + user_id= user_id, + previous_id= '654321' +) + +analytics.page( + user_id= user_id, + name= 'Test Page', + anonymous_id= anonymous_id, + properties= properties +) \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..838d15b5 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +pythonpath = . rudderstack/analytics +python_files = + test_*.py \ No newline at end of file diff --git a/requirements.in b/requirements.in new file mode 100644 index 00000000..cf75b967 --- /dev/null +++ b/requirements.in @@ -0,0 +1,10 @@ +backoff==2.2.1 +deprecation==2.1.0 +mock==5.1.0 +monotonic==1.6 +packaging==24.1 +pytest==8.3.3 +requests==2.32.3 +flake8==7.1.1 +python-dateutil==2.9.0 +python-dotenv==1.0.1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..89d9beb5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,49 @@ +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile requirements.in +# +backoff==2.2.1 + # via -r requirements.in +certifi==2024.8.30 + # via requests +charset-normalizer==3.3.2 + # via requests +deprecation==2.1.0 + # via -r requirements.in +flake8==7.1.1 + # via -r requirements.in +idna==3.10 + # via requests +iniconfig==2.0.0 + # via pytest +mccabe==0.7.0 + # via flake8 +mock==5.1.0 + # via -r requirements.in +monotonic==1.6 + # via -r requirements.in +packaging==24.1 + # via + # -r requirements.in + # deprecation + # pytest +pluggy==1.5.0 + # via pytest +pycodestyle==2.12.1 + # via flake8 +pyflakes==3.2.0 + # via flake8 +pytest==8.3.3 + # via -r requirements.in +python-dateutil==2.9.0 + # via -r requirements.in +python-dotenv==1.0.1 + # via -r requirements.in +requests==2.32.3 + # via -r requirements.in +six==1.16.0 + # via python-dateutil +urllib3==2.2.3 + # via requests diff --git a/rudder_analytics/__init__.py b/rudder_analytics/__init__.py deleted file mode 100644 index 725c3f15..00000000 --- a/rudder_analytics/__init__.py +++ /dev/null @@ -1,72 +0,0 @@ -from rudder_analytics.version import VERSION -from rudder_analytics.client import Client - -__version__ = VERSION - -"""Settings.""" -write_key = None -data_plane_url = None -on_error = None -debug = False -send = True -sync_mode = False - -default_client = None - - -def track(*args, **kwargs): - """Send a track call.""" - _proxy('track', *args, **kwargs) - - -def identify(*args, **kwargs): - """Send a identify call.""" - _proxy('identify', *args, **kwargs) - - -def group(*args, **kwargs): - """Send a group call.""" - _proxy('group', *args, **kwargs) - - -def alias(*args, **kwargs): - """Send a alias call.""" - _proxy('alias', *args, **kwargs) - - -def page(*args, **kwargs): - """Send a page call.""" - _proxy('page', *args, **kwargs) - - -def screen(*args, **kwargs): - """Send a screen call.""" - _proxy('screen', *args, **kwargs) - - -def flush(): - """Tell the client to flush.""" - _proxy('flush') - - -def join(): - """Block program until the client clears the queue""" - _proxy('join') - - -def shutdown(): - """Flush all messages and cleanly shutdown the client""" - _proxy('flush') - _proxy('join') - - -def _proxy(method, *args, **kwargs): - """Create an analytics client if one doesn't exist and send to it.""" - global default_client - if not default_client: - default_client = Client(write_key, host=data_plane_url, debug=debug, - on_error=on_error, send=send, - sync_mode=sync_mode) - - fn = getattr(default_client, method) - fn(*args, **kwargs) diff --git a/rudder_analytics/test/__init__.py b/rudder_analytics/test/__init__.py deleted file mode 100644 index 4a920f8e..00000000 --- a/rudder_analytics/test/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest -import pkgutil -import logging -import sys - - -def all_names(): - for _, modname, _ in pkgutil.iter_modules(__path__): - yield 'analytics.test.' + modname - - -def all(): - logging.basicConfig(stream=sys.stderr) - return unittest.defaultTestLoader.loadTestsFromNames(all_names()) diff --git a/rudder_analytics/version.py b/rudder_analytics/version.py deleted file mode 100644 index 1e5a6058..00000000 --- a/rudder_analytics/version.py +++ /dev/null @@ -1 +0,0 @@ -VERSION = '1.0.0' diff --git a/rudderstack/analytics/__init__.py b/rudderstack/analytics/__init__.py new file mode 100644 index 00000000..9faf32bb --- /dev/null +++ b/rudderstack/analytics/__init__.py @@ -0,0 +1,113 @@ + +import warnings +from rudderstack.analytics.version import VERSION +from rudderstack.analytics.client import Client +import deprecation +__version__ = VERSION + +"""Settings.""" +write_key = Client.DefaultConfig.write_key + +@property +@deprecation.deprecated(deprecated_in="2.0", + current_version=__version__, + details="Use the dataPlaneUrl property instead") +def host(self): + warnings.warn('The use of host is deprecated. Use dataPlaneUrl instead', DeprecationWarning) + return host + +@host.setter +@deprecation.deprecated(deprecated_in="2.0", + current_version=__version__, + details="Use the dataPlaneUrl property instead") +def host(self, value: str): + warnings.warn('The use of host is deprecated. Use dataPlaneUrl instead', DeprecationWarning) + self.host = value + +host = Client.DefaultConfig.host + +@property +def dataPlaneUrl(self): + return dataPlaneUrl + +@dataPlaneUrl.setter +def dataPlaneUrl(self, value: str): + self.host = value + +on_error = Client.DefaultConfig.on_error +debug = Client.DefaultConfig.debug +send = Client.DefaultConfig.send +sync_mode = Client.DefaultConfig.sync_mode +max_queue_size = Client.DefaultConfig.max_queue_size +gzip = Client.DefaultConfig.gzip +timeout = Client.DefaultConfig.timeout +upload_interval = Client.DefaultConfig.upload_interval +upload_size = Client.DefaultConfig.upload_size +max_retries = Client.DefaultConfig.max_retries + +default_client = None + + +def track(*args, **kwargs): + """Send a track call.""" + _proxy('track', *args, **kwargs) + + +def identify(*args, **kwargs): + """Send a identify call.""" + _proxy('identify', *args, **kwargs) + + +def group(*args, **kwargs): + """Send a group call.""" + _proxy('group', *args, **kwargs) + + +def alias(*args, **kwargs): + """Send a alias call.""" + _proxy('alias', *args, **kwargs) + + +def page(*args, **kwargs): + """Send a page call.""" + _proxy('page', *args, **kwargs) + + +def screen(*args, **kwargs): + """Send a screen call.""" + _proxy('screen', *args, **kwargs) + + +def flush(): + """Tell the client to flush.""" + _proxy('flush') + + +def join(): + """Block program until the client clears the queue""" + _proxy('join') + + +def shutdown(): + """Flush all messages and cleanly shutdown the client""" + _proxy('flush') + _proxy('join') + + +def _proxy(method, *args, **kwargs): + """Create an analytics client if one doesn't exist and send to it.""" + global default_client + if not default_client: + if isinstance(dataPlaneUrl,str) and dataPlaneUrl != "": + finalDataplaneUrl = dataPlaneUrl + else: + finalDataplaneUrl = host + + default_client = Client(write_key, host=finalDataplaneUrl, debug=debug, + max_queue_size=max_queue_size, + send=send, on_error=on_error, + gzip=gzip, max_retries=max_retries, + sync_mode=sync_mode, timeout=timeout) + + fn = getattr(default_client, method) + fn(*args, **kwargs) diff --git a/rudder_analytics/client.py b/rudderstack/analytics/client.py similarity index 73% rename from rudder_analytics/client.py rename to rudderstack/analytics/client.py index dad15e0f..53bb21f1 100644 --- a/rudder_analytics/client.py +++ b/rudderstack/analytics/client.py @@ -1,35 +1,58 @@ -from datetime import datetime +from datetime import datetime, timezone from uuid import uuid4 import logging import numbers import atexit +import json from dateutil.tz import tzutc -from six import string_types +from rudderstack.analytics.get_env import TEST_DATA_PLANE_URL -from rudder_analytics.utils import guess_timezone, clean -from rudder_analytics.consumer import Consumer -from rudder_analytics.request import post -from rudder_analytics.version import VERSION +from rudderstack.analytics.utils import guess_timezone, clean +from rudderstack.analytics.consumer import Consumer, MAX_MSG_SIZE +from rudderstack.analytics.request import post, DatetimeSerializer +from rudderstack.analytics.version import VERSION -try: - import queue -except ImportError: - import Queue as queue +import queue - -ID_TYPES = (numbers.Number, string_types) +ID_TYPES = (numbers.Number, str) +CHANNEL = 'server' class Client(object): - """Create a new Rudder client.""" - log = logging.getLogger('rudder') - anonymoys_id = str(uuid4()) - - def __init__(self, write_key=None, host='https://hosted.rudderlabs.com', debug=False, - max_queue_size=10000, send=True, on_error=None, flush_at=100, - flush_interval=0.5, max_retries=3, sync_mode=False, - timeout=15, thread=1): - require('write_key', write_key, string_types) + class DefaultConfig(object): + write_key = None + host = TEST_DATA_PLANE_URL + on_error = None + debug = False + send = True + sync_mode = False + max_queue_size = 10000 + gzip = True + timeout = 15 + max_retries = 10 + proxies = None + thread = 1 + upload_interval = 0.5 + upload_size = 100 + + """Create a new rudderstack client.""" + log = logging.getLogger('rudderstack') + def __init__(self, + write_key=DefaultConfig.write_key, + host=DefaultConfig.host, + debug=DefaultConfig.debug, + max_queue_size=DefaultConfig.max_queue_size, + send=DefaultConfig.send, + on_error=DefaultConfig.on_error, + gzip=DefaultConfig.gzip, + max_retries=DefaultConfig.max_retries, + sync_mode=DefaultConfig.sync_mode, + timeout=DefaultConfig.timeout, + proxies=DefaultConfig.proxies, + thread=DefaultConfig.thread, + upload_size=DefaultConfig.upload_size, + upload_interval=DefaultConfig.upload_interval,): + require('write_key', write_key, str) self.queue = queue.Queue(max_queue_size) self.write_key = write_key @@ -38,7 +61,9 @@ def __init__(self, write_key=None, host='https://hosted.rudderlabs.com', debug=F self.send = send self.sync_mode = sync_mode self.host = host + self.gzip = gzip self.timeout = timeout + self.proxies = proxies if debug: self.log.setLevel(logging.DEBUG) @@ -54,12 +79,13 @@ def __init__(self, write_key=None, host='https://hosted.rudderlabs.com', debug=F # to call flush(). if send: atexit.register(self.join) - for n in range(thread): + for _ in range(thread): self.consumers = [] consumer = Consumer( self.queue, write_key, host=host, on_error=on_error, - flush_at=flush_at, flush_interval=flush_interval, - retries=max_retries, timeout=timeout, + upload_size=upload_size, upload_interval=upload_interval, + gzip=gzip, retries=max_retries, timeout=timeout, + proxies=proxies, ) self.consumers.append(consumer) @@ -71,13 +97,12 @@ def identify(self, user_id=None, traits=None, context=None, timestamp=None, anonymous_id=None, integrations=None, message_id=None): traits = traits or {} context = context or {} + # putting traits inside context + context['traits'] = traits integrations = integrations or {} require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES) require('traits', traits, dict) - if traits != None: - context['traits'] = traits.copy() - msg = { 'integrations': integrations, 'anonymousId': anonymous_id, @@ -85,7 +110,7 @@ def identify(self, user_id=None, traits=None, context=None, timestamp=None, 'context': context, 'type': 'identify', 'userId': user_id, - 'traits': traits, + # 'traits': traits, #traits not needed at root level 'messageId': message_id, } @@ -99,7 +124,7 @@ def track(self, user_id=None, event=None, properties=None, context=None, integrations = integrations or {} require('user_id or anonymous_id', user_id or anonymous_id, ID_TYPES) require('properties', properties, dict) - require('event', event, string_types) + require('event', event, str) msg = { 'integrations': integrations, @@ -168,9 +193,9 @@ def page(self, user_id=None, category=None, name=None, properties=None, require('properties', properties, dict) if name: - require('name', name, string_types) + require('name', name, str) if category: - require('category', category, string_types) + require('category', category, str) msg = { 'integrations': integrations, @@ -197,9 +222,9 @@ def screen(self, user_id=None, category=None, name=None, properties=None, require('properties', properties, dict) if name: - require('name', name, string_types) + require('name', name, str) if category: - require('category', category, string_types) + require('category', category, str) msg = { 'integrations': integrations, @@ -215,55 +240,52 @@ def screen(self, user_id=None, category=None, name=None, properties=None, } return self._enqueue(msg) - + def _enqueue(self, msg): """Push a new `msg` onto the queue, return `(success, msg)`""" timestamp = msg['timestamp'] if timestamp is None: - timestamp = datetime.utcnow().replace(tzinfo=tzutc()) + timestamp = datetime.now(timezone.utc).replace(tzinfo=tzutc()) message_id = msg.get('messageId') if message_id is None: message_id = uuid4() + # default integrations should be "All": True + if msg['integrations'] == {} : + msg['integrations'] = {"All" : True} + require('integrations', msg['integrations'], dict) - require('type', msg['type'], string_types) + require('type', msg['type'], str) require('timestamp', timestamp, datetime) require('context', msg['context'], dict) - # add anonymousId to the message if not passed - msg['anonymousId'] = msg['anonymousId'] or self.anonymoys_id - - # copy the userId to context.traits - if msg['userId'] != None: - if 'traits' in msg['context'].keys(): - msg['context']['traits']['userId'] = msg['userId'] - else : - msg['context']['traits'] = {'userId': msg['userId']} - - msg['context']['traits']['anonymousId'] = msg['anonymousId'] - # add common timestamp = guess_timezone(timestamp) msg['timestamp'] = timestamp.isoformat() msg['messageId'] = stringify_id(message_id) msg['context']['library'] = { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION } - msg['userId'] = stringify_id(msg.get('userId', None)) msg['anonymousId'] = stringify_id(msg.get('anonymousId', None)) - + msg['channel'] = CHANNEL msg = clean(msg) self.log.debug('queueing: %s', msg) + # Check message size. + msg_size = len(json.dumps(msg, cls=DatetimeSerializer).encode()) + if msg_size > MAX_MSG_SIZE: + raise RuntimeError(f'Message exceeds {str(int(MAX_MSG_SIZE / 1024))}kb limit. ({str(msg)})') + # if send is False, return msg as if it was successfully queued if not self.send: return True, msg if self.sync_mode: self.log.debug('enqueued with blocking %s.', msg['type']) - post(self.write_key, self.host, timeout=self.timeout, batch=[msg]) + post(self.write_key, self.host, gzip=self.gzip, + timeout=self.timeout, proxies=self.proxies, batch=[msg]) return True, msg @@ -272,7 +294,7 @@ def _enqueue(self, msg): self.log.debug('enqueued %s.', msg['type']) return True, msg except queue.Full: - self.log.warning('rudder-analytics-python queue is full') + self.log.warning('analytics-python queue is full') return False, msg def flush(self): @@ -311,6 +333,6 @@ def require(name, field, data_type): def stringify_id(val): if val is None: return None - if isinstance(val, string_types): + if isinstance(val, str): return val return str(val) diff --git a/rudder_analytics/consumer.py b/rudderstack/analytics/consumer.py similarity index 74% rename from rudder_analytics/consumer.py rename to rudderstack/analytics/consumer.py index 64648f0a..ce8f6ff9 100644 --- a/rudder_analytics/consumer.py +++ b/rudderstack/analytics/consumer.py @@ -4,36 +4,34 @@ import backoff import json -from rudder_analytics.request import post, APIError, DatetimeSerializer +from rudderstack.analytics.request import post, APIError, DatetimeSerializer -try: - from queue import Empty -except ImportError: - from Queue import Empty +from queue import Empty -MAX_MSG_SIZE = 32 << 10 - -# Our servers only accept batches less than 500KB. Here limit is set slightly +# Our servers only accept payloads within this limit. Here limit is set slightly # lower to leave space for extra data that will be added later, eg. "sentAt". -BATCH_SIZE_LIMIT = 475000 +MAX_MSG_SIZE = 4 << 10 << 10 ## 4 mb +BATCH_SIZE_LIMIT = 4 << 10 << 10 ## 4 mb class Consumer(Thread): """Consumes the messages from the client's queue.""" - log = logging.getLogger('rudder') + log = logging.getLogger('rudderstack') - def __init__(self, queue, write_key, flush_at=100, host=None, - on_error=None, flush_interval=0.5, retries=10, timeout=15): + def __init__(self, queue, write_key, upload_size=100, host=None, + on_error=None, upload_interval=0.5, gzip=True, retries=10, + timeout=15, proxies=None): """Create a consumer thread.""" Thread.__init__(self) # Make consumer a daemon thread so that it doesn't block program exit self.daemon = True - self.flush_at = flush_at - self.flush_interval = flush_interval + self.upload_size = upload_size + self.upload_interval = upload_interval self.write_key = write_key self.host = host self.on_error = on_error self.queue = queue + self.gzip = gzip # It's important to set running in the constructor: if we are asked to # pause immediately after construction, we might set running to True in # run() *after* we set it to False in pause... and keep running @@ -41,6 +39,7 @@ def __init__(self, queue, write_key, flush_at=100, host=None, self.running = True self.retries = retries self.timeout = timeout + self.proxies = proxies def run(self): """Runs the consumer.""" @@ -71,9 +70,9 @@ def upload(self): self.on_error(e, batch) finally: # mark items as acknowledged from queue - for item in batch: + for _ in batch: self.queue.task_done() - return success + return success def next(self): """Return the next batch of items to upload.""" @@ -83,18 +82,18 @@ def next(self): start_time = monotonic.monotonic() total_size = 0 - while len(items) < self.flush_at: + while len(items) < self.upload_size: elapsed = monotonic.monotonic() - start_time - if elapsed >= self.flush_interval: + if elapsed >= self.upload_interval: break try: item = queue.get( - block=True, timeout=self.flush_interval - elapsed) + block=True, timeout=self.upload_interval - elapsed) item_size = len(json.dumps( item, cls=DatetimeSerializer).encode()) if item_size > MAX_MSG_SIZE: self.log.error( - 'Item exceeds 32kb limit, dropping. (%s)', str(item)) + 'Item exceeds 4mb limit, dropping. (%s)', str(item)) continue items.append(item) total_size += item_size @@ -104,6 +103,8 @@ def next(self): break except Empty: break + except Exception as e: + self.log.exception('Exception: %s', e) return items @@ -126,6 +127,7 @@ def fatal_exception(exc): max_tries=self.retries + 1, giveup=fatal_exception) def send_request(): - post(self.write_key, self.host, timeout=self.timeout, batch=batch) + post(self.write_key, self.host, gzip=self.gzip, + timeout=self.timeout, batch=batch, proxies=self.proxies) send_request() diff --git a/rudderstack/analytics/get_env.py b/rudderstack/analytics/get_env.py new file mode 100644 index 00000000..813a9b3a --- /dev/null +++ b/rudderstack/analytics/get_env.py @@ -0,0 +1,7 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +TEST_WRITE_KEY = os.getenv('TEST_WRITE_KEY') +TEST_DATA_PLANE_URL = os.getenv('TEST_DATA_PLANE_URL') diff --git a/rudder_analytics/request.py b/rudderstack/analytics/request.py similarity index 54% rename from rudder_analytics/request.py rename to rudderstack/analytics/request.py index b1df847c..67b4f249 100644 --- a/rudder_analytics/request.py +++ b/rudderstack/analytics/request.py @@ -1,29 +1,44 @@ -from datetime import date, datetime -from dateutil.tz import tzutc +from datetime import date, datetime, timezone +from io import BytesIO +from gzip import GzipFile import logging import json +from dateutil.tz import tzutc from requests.auth import HTTPBasicAuth from requests import sessions -from rudder_analytics.version import VERSION -from rudder_analytics.utils import remove_trailing_slash +from rudderstack.analytics.version import VERSION +from rudderstack.analytics.utils import remove_trailing_slash _session = sessions.Session() -def post(write_key, host=None, timeout=15, **kwargs): +def post(write_key, host=None, gzip=True, timeout=15, proxies=None, **kwargs): """Post the `kwargs` to the API""" - log = logging.getLogger('rudder') + log = logging.getLogger('rudderstack') body = kwargs - body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat() - url = remove_trailing_slash(host or 'https://hosted.rudderlabs.com') + '/v1/batch' + body["sentAt"] = datetime.now(timezone.utc).replace(tzinfo=tzutc()).isoformat() + url = remove_trailing_slash(host or 'https://api.rudderstack.com') + '/v1/batch' auth = HTTPBasicAuth(write_key, '') data = json.dumps(body, cls=DatetimeSerializer) log.debug('making request: %s', data) headers = { 'Content-Type': 'application/json', - 'User-Agent': 'rudderstack-python/' + VERSION + 'User-Agent': 'analytics-python/' + VERSION } + if gzip: + headers['Content-Encoding'] = 'gzip' + data = _gzip_json(data) + + kwargs = { + "data": data, + "auth": auth, + "headers": headers, + "timeout": 15, + } + + if proxies: + kwargs['proxies'] = proxies res = _session.post(url, data=data, auth=auth, headers=headers, timeout=timeout) @@ -39,6 +54,13 @@ def post(write_key, host=None, timeout=15, **kwargs): except ValueError: raise APIError(res.status_code, 'unknown', res.text) +def _gzip_json(data): + buf = BytesIO() + with GzipFile(fileobj=buf, mode='w') as gz: + # 'data' was produced by json.dumps(), + # whose default encoding is utf-8. + gz.write(data.encode('utf-8')) + return buf.getvalue() class APIError(Exception): @@ -48,7 +70,7 @@ def __init__(self, status, code, message): self.code = code def __str__(self): - msg = "[Rudder] {0}: {1} ({2})" + msg = "[rudderstack] {0}: {1} ({2})" return msg.format(self.code, self.message, self.status) diff --git a/rudderstack/analytics/test/__init__.py b/rudderstack/analytics/test/__init__.py new file mode 100644 index 00000000..c7cfb63e --- /dev/null +++ b/rudderstack/analytics/test/__init__.py @@ -0,0 +1,82 @@ +import unittest +import pkgutil +import logging +import sys +import rudderstack.analytics as analytics + +from rudderstack.analytics.client import Client + + +def all_names(): + for _, modname, _ in pkgutil.iter_modules(__path__): + yield 'rudderstack.analytics.test.test_' + modname + + +def all(): + logging.basicConfig(stream=sys.stderr) + return unittest.defaultTestLoader.loadTestsFromNames(all_names()) + + +class TestInit(unittest.TestCase): + def test_writeKey(self): + self.assertIsNone(analytics.default_client) + analytics.flush() + self.assertEqual(analytics.default_client.write_key, 'test-init') + + def test_debug(self): + self.assertIsNone(analytics.default_client) + analytics.debug = True + analytics.flush() + self.assertTrue(analytics.default_client.debug) + analytics.default_client = None + analytics.debug = False + analytics.flush() + self.assertFalse(analytics.default_client.debug) + + def test_gzip(self): + self.assertIsNone(analytics.default_client) + analytics.gzip = True + analytics.flush() + self.assertTrue(analytics.default_client.gzip) + analytics.default_client = None + analytics.gzip = False + analytics.flush() + self.assertFalse(analytics.default_client.gzip) + + def test_host(self): + self.assertIsNone(analytics.default_client) + analytics.host = 'https://test-host/v1/batch' + analytics.flush() + self.assertEqual(analytics.default_client.host, 'https://test-host/v1/batch') + + def test_max_queue_size(self): + self.assertIsNone(analytics.default_client) + analytics.max_queue_size = 1337 + analytics.flush() + self.assertEqual(analytics.default_client.queue.maxsize, 1337) + + def test_max_retries(self): + self.assertIsNone(analytics.default_client) + client = Client('testsecret', max_retries=42) + for consumer in client.consumers: + self.assertEqual(consumer.retries, 42) + + def test_sync_mode(self): + self.assertIsNone(analytics.default_client) + analytics.sync_mode = True + analytics.flush() + self.assertTrue(analytics.default_client.sync_mode) + analytics.default_client = None + analytics.sync_mode = False + analytics.flush() + self.assertFalse(analytics.default_client.sync_mode) + + def test_timeout(self): + self.assertIsNone(analytics.default_client) + analytics.timeout = 1.234 + analytics.flush() + self.assertEqual(analytics.default_client.timeout, 1.234) + + def setUp(self): + analytics.write_key = 'test-init' + analytics.default_client = None diff --git a/rudder_analytics/test/client.py b/rudderstack/analytics/test/test_client.py similarity index 82% rename from rudder_analytics/test/client.py rename to rudderstack/analytics/test/test_client.py index 1f6ae097..90fec57b 100644 --- a/rudder_analytics/test/client.py +++ b/rudderstack/analytics/test/test_client.py @@ -1,22 +1,23 @@ from datetime import date, datetime import unittest -import six -import mock import time +from unittest import mock +from rudderstack.analytics.test.test_constants import TEST_PROXY -from analytics.version import VERSION -from analytics.client import Client - +from version import VERSION +from client import Client +from get_env import TEST_WRITE_KEY class TestClient(unittest.TestCase): + def fail(self, e, batch): """Mark the failure handler""" self.failed = True def setUp(self): self.failed = False - self.client = Client('testsecret', on_error=self.fail) + self.client = Client(TEST_WRITE_KEY, on_error=self.fail) def test_requires_write_key(self): self.assertRaises(AssertionError, Client) @@ -80,7 +81,7 @@ def test_advanced_track(self): self.assertEqual(msg['event'], 'python test event') self.assertEqual(msg['anonymousId'], 'anonymousId') self.assertEqual(msg['context']['library'], { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION }) self.assertEqual(msg['messageId'], 'messageId') @@ -94,7 +95,7 @@ def test_basic_identify(self): self.assertTrue(success) self.assertFalse(self.failed) - self.assertEqual(msg['traits'], {'trait': 'value'}) + self.assertEqual(msg['context']['traits'], {'trait': 'value'}) self.assertTrue(isinstance(msg['timestamp'], str)) self.assertTrue(isinstance(msg['messageId'], str)) self.assertEqual(msg['userId'], 'userId') @@ -112,10 +113,10 @@ def test_advanced_identify(self): self.assertEqual(msg['timestamp'], '2014-09-03T00:00:00+00:00') self.assertEqual(msg['integrations'], {'Amplitude': True}) self.assertEqual(msg['context']['ip'], '192.168.0.1') - self.assertEqual(msg['traits'], {'trait': 'value'}) + self.assertEqual(msg['context']['traits'], {'trait': 'value'}) self.assertEqual(msg['anonymousId'], 'anonymousId') self.assertEqual(msg['context']['library'], { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION }) self.assertTrue(isinstance(msg['timestamp'], str)) @@ -149,7 +150,7 @@ def test_advanced_group(self): self.assertEqual(msg['traits'], {'trait': 'value'}) self.assertEqual(msg['anonymousId'], 'anonymousId') self.assertEqual(msg['context']['library'], { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION }) self.assertTrue(isinstance(msg['timestamp'], str)) @@ -191,7 +192,7 @@ def test_advanced_page(self): self.assertEqual(msg['properties'], {'property': 'value'}) self.assertEqual(msg['anonymousId'], 'anonymousId') self.assertEqual(msg['context']['library'], { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION }) self.assertEqual(msg['category'], 'category') @@ -225,7 +226,7 @@ def test_advanced_screen(self): self.assertEqual(msg['properties'], {'property': 'value'}) self.assertEqual(msg['anonymousId'], 'anonymousId') self.assertEqual(msg['context']['library'], { - 'name': 'rudder-analytics-python', + 'name': 'analytics-python', 'version': VERSION }) self.assertTrue(isinstance(msg['timestamp'], str)) @@ -238,8 +239,8 @@ def test_advanced_screen(self): def test_flush(self): client = self.client # set up the consumer with more requests than a single batch will allow - for i in range(1000): - success, msg = client.identify('userId', {'trait': 'value'}) + for _ in range(1000): + _, _ = client.identify('userId', {'trait': 'value'}) # We can't reliably assert that the queue is non-empty here; that's # a race condition. We do our best to load it up though. client.flush() @@ -249,8 +250,8 @@ def test_flush(self): def test_shutdown(self): client = self.client # set up the consumer with more requests than a single batch will allow - for i in range(1000): - success, msg = client.identify('userId', {'trait': 'value'}) + for _ in range(1000): + _, _ = client.identify('userId', {'trait': 'value'}) client.shutdown() # we expect two things after shutdown: # 1. client queue is empty @@ -260,33 +261,33 @@ def test_shutdown(self): self.assertFalse(consumer.is_alive()) def test_synchronous(self): - client = Client('testsecret', sync_mode=True) + client = Client(TEST_WRITE_KEY, sync_mode=True) - success, message = client.identify('userId') + success, _ = client.identify('userId') self.assertFalse(client.consumers) self.assertTrue(client.queue.empty()) self.assertTrue(success) def test_overflow(self): - client = Client('testsecret', max_queue_size=1) + client = Client(TEST_WRITE_KEY, max_queue_size=1) # Ensure consumer thread is no longer uploading client.join() - for i in range(10): + for _ in range(10): client.identify('userId') - success, msg = client.identify('userId') + success, _ = client.identify('userId') # Make sure we are informed that the queue is at capacity self.assertFalse(success) - def test_success_on_invalid_write_key(self): + def test_failure_on_invalid_write_key(self): client = Client('bad_key', on_error=self.fail) client.track('userId', 'event') client.flush() - self.assertFalse(self.failed) + self.assertTrue(self.failed) def test_unicode(self): - Client(six.u('unicode_key')) + Client('unicode_key') def test_numeric_user_id(self): self.client.track(1234, 'python event') @@ -308,30 +309,47 @@ def test_identify_with_date_object(self): self.assertTrue(success) self.assertFalse(self.failed) - self.assertEqual(msg['traits'], {'birthdate': date(1981, 2, 2)}) + self.assertEqual(msg['context']['traits'], {'birthdate': date(1981, 2, 2)}) - def test_user_defined_flush_at(self): - client = Client('testsecret', on_error=self.fail, - flush_at=10, flush_interval=3) + def assert_gzip_enabled_by_default(self): + client = Client(TEST_WRITE_KEY, on_error=self.fail) + self.assertTrue(client.gzip) + def test_gzip(self): + client = Client(TEST_WRITE_KEY, on_error=self.fail, gzip=True) + for _ in range(10): + client.identify('userId', {'trait': 'value'}) + client.flush() + self.assertFalse(self.failed) + + + def test_user_defined_upload_size(self): + client = Client(write_key = TEST_WRITE_KEY, on_error=self.fail, + upload_size=10, upload_interval=3) + print("write_key", TEST_WRITE_KEY) def mock_post_fn(*args, **kwargs): - self.assertEquals(len(kwargs['batch']), 10) + self.assertEqual(len(kwargs['batch']), 10) # the post function should be called 2 times, with a batch size of 10 # each time. - with mock.patch('analytics.consumer.post', side_effect=mock_post_fn) \ + with mock.patch('rudderstack.analytics.consumer.post', side_effect=mock_post_fn) \ as mock_post: for _ in range(20): client.identify('userId', {'trait': 'value'}) time.sleep(1) - self.assertEquals(mock_post.call_count, 2) + self.assertEqual(mock_post.call_count, 2) def test_user_defined_timeout(self): - client = Client('testsecret', timeout=10) + client = Client(TEST_WRITE_KEY, timeout=10) for consumer in client.consumers: - self.assertEquals(consumer.timeout, 10) + self.assertEqual(consumer.timeout, 10) def test_default_timeout_15(self): - client = Client('testsecret') + client = Client(TEST_WRITE_KEY) for consumer in client.consumers: - self.assertEquals(consumer.timeout, 15) + self.assertEqual(consumer.timeout, 15) + + def test_proxies(self): + client = Client(TEST_WRITE_KEY, proxies=TEST_PROXY) + success, msg = client.identify('userId', {'trait': 'value'}) + self.assertTrue(success) diff --git a/rudderstack/analytics/test/test_constants.py b/rudderstack/analytics/test/test_constants.py new file mode 100644 index 00000000..699836c9 --- /dev/null +++ b/rudderstack/analytics/test/test_constants.py @@ -0,0 +1 @@ +TEST_PROXY = '203.243.63.16:80' \ No newline at end of file diff --git a/rudder_analytics/test/consumer.py b/rudderstack/analytics/test/test_consumer.py similarity index 63% rename from rudder_analytics/test/consumer.py rename to rudderstack/analytics/test/test_consumer.py index 739c2c58..17083d76 100644 --- a/rudder_analytics/test/consumer.py +++ b/rudderstack/analytics/test/test_consumer.py @@ -1,15 +1,18 @@ import unittest -import mock +from unittest import mock import time import json +from rudderstack.analytics.test.test_constants import TEST_PROXY + try: from queue import Queue except ImportError: from Queue import Queue -from analytics.consumer import Consumer, MAX_MSG_SIZE -from analytics.request import APIError +from rudderstack.analytics.consumer import Consumer, MAX_MSG_SIZE +from rudderstack.analytics.request import APIError +from rudderstack.analytics.get_env import TEST_WRITE_KEY, TEST_DATA_PLANE_URL class TestConsumer(unittest.TestCase): @@ -23,12 +26,12 @@ def test_next(self): def test_next_limit(self): q = Queue() - flush_at = 50 - consumer = Consumer(q, '', flush_at) + upload_size = 50 + consumer = Consumer(q, '', upload_size) for i in range(10000): q.put(i) next = consumer.next() - self.assertEqual(next, list(range(flush_at))) + self.assertEqual(next, list(range(upload_size))) def test_dropping_oversize_msg(self): q = Queue() @@ -41,7 +44,8 @@ def test_dropping_oversize_msg(self): def test_upload(self): q = Queue() - consumer = Consumer(q, 'testsecret') + consumer = Consumer(q, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY) track = { 'type': 'track', 'event': 'python event', @@ -51,15 +55,17 @@ def test_upload(self): success = consumer.upload() self.assertTrue(success) - def test_flush_interval(self): + def test_upload_interval(self): # Put _n_ items in the queue, pausing a little bit more than - # _flush_interval_ after each one. + # _upload_interval_ after each one. # The consumer should upload _n_ times. q = Queue() - flush_interval = 0.3 - consumer = Consumer(q, 'testsecret', flush_at=10, - flush_interval=flush_interval) - with mock.patch('analytics.consumer.post') as mock_post: + + upload_interval = 0.3 + consumer = Consumer(q, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY, upload_size=10, + upload_interval=upload_interval) + with mock.patch('rudderstack.analytics.consumer.post') as mock_post: consumer.start() for i in range(0, 3): track = { @@ -68,31 +74,35 @@ def test_flush_interval(self): 'userId': 'userId' } q.put(track) - time.sleep(flush_interval * 1.1) + time.sleep(upload_interval * 1.1) self.assertEqual(mock_post.call_count, 3) def test_multiple_uploads_per_interval(self): - # Put _flush_at*2_ items in the queue at once, then pause for - # _flush_interval_. The consumer should upload 2 times. + # Put _upload_size*2_ items in the queue at once, then pause for + # _upload_interval_. The consumer should upload 2 times. q = Queue() - flush_interval = 0.5 - flush_at = 10 - consumer = Consumer(q, 'testsecret', flush_at=flush_at, - flush_interval=flush_interval) - with mock.patch('analytics.consumer.post') as mock_post: + upload_interval = 0.5 + upload_size = 10 + consumer = Consumer(q,host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY, upload_size=upload_size, + upload_interval=upload_interval) + with mock.patch('rudderstack.analytics.consumer.post') as mock_post: consumer.start() - for i in range(0, flush_at * 2): + for i in range(0, upload_size * 2): track = { 'type': 'track', 'event': 'python event %d' % i, 'userId': 'userId' } q.put(track) - time.sleep(flush_interval * 1.1) + time.sleep(upload_interval * 1.1) self.assertEqual(mock_post.call_count, 2) - def test_request(self): - consumer = Consumer(None, 'testsecret') + + @classmethod + def test_request(cls): + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY) track = { 'type': 'track', 'event': 'python event', @@ -109,7 +119,7 @@ def mock_post(*args, **kwargs): raise expected_exception mock_post.call_count = 0 - with mock.patch('analytics.consumer.post', + with mock.patch('rudderstack.analytics.consumer.post', mock.Mock(side_effect=mock_post)): track = { 'type': 'track', @@ -117,7 +127,7 @@ def mock_post(*args, **kwargs): 'userId': 'userId' } # request() should succeed if the number of exceptions raised is - # less than the retries paramater. + # less than the retries parameter. if exception_count <= consumer.retries: consumer.request([track]) else: @@ -135,21 +145,25 @@ def mock_post(*args, **kwargs): def test_request_retry(self): # we should retry on general errors - consumer = Consumer(None, 'testsecret') + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY,) self._test_request_retry(consumer, Exception('generic exception'), 2) # we should retry on server errors - consumer = Consumer(None, 'testsecret') + consumer = Consumer(None,host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY,) self._test_request_retry(consumer, APIError( 500, 'code', 'Internal Server Error'), 2) # we should retry on HTTP 429 errors - consumer = Consumer(None, 'testsecret') + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY,) self._test_request_retry(consumer, APIError( 429, 'code', 'Too Many Requests'), 2) # we should NOT retry on other client errors - consumer = Consumer(None, 'testsecret') + consumer = Consumer(None,host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY,) api_error = APIError(400, 'code', 'Client Errors') try: self._test_request_retry(consumer, api_error, 1) @@ -159,19 +173,22 @@ def test_request_retry(self): self.fail('request() should not retry on client errors') # test for number of exceptions raise > retries value - consumer = Consumer(None, 'testsecret', retries=3) + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY, retries=3) self._test_request_retry(consumer, APIError( 500, 'code', 'Internal Server Error'), 3) def test_pause(self): - consumer = Consumer(None, 'testsecret') + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY,) consumer.pause() self.assertFalse(consumer.running) def test_max_batch_size(self): q = Queue() consumer = Consumer( - q, 'testsecret', flush_at=100000, flush_interval=3) + q,host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY, upload_size=100000, upload_interval=3) track = { 'type': 'track', 'event': 'python event', @@ -179,20 +196,31 @@ def test_max_batch_size(self): } msg_size = len(json.dumps(track).encode()) # number of messages in a maximum-size batch - n_msgs = int(475000 / msg_size) + n_msgs = int((4 << 10 << 10) / msg_size) def mock_post_fn(_, data, **kwargs): res = mock.Mock() res.status_code = 200 - self.assertTrue(len(data.encode()) < 500000, + self.assertTrue(len(data) < 500000, 'batch size (%d) exceeds 500KB limit' - % len(data.encode())) + % len(data)) return res - with mock.patch('analytics.request._session.post', + with mock.patch('rudderstack.analytics.request._session.post', side_effect=mock_post_fn) as mock_post: consumer.start() for _ in range(0, n_msgs + 2): q.put(track) q.join() - self.assertEquals(mock_post.call_count, 2) + self.assertEqual(mock_post.call_count, 2) + + @classmethod + def test_proxies(cls): + consumer = Consumer(None, host=TEST_DATA_PLANE_URL, + write_key=TEST_WRITE_KEY, proxies=TEST_PROXY) + track = { + 'type': 'track', + 'event': 'python event', + 'userId': 'userId' + } + consumer.request([track]) diff --git a/rudder_analytics/test/module.py b/rudderstack/analytics/test/test_module.py similarity index 88% rename from rudder_analytics/test/module.py rename to rudderstack/analytics/test/test_module.py index 6fcff6c8..8243b560 100644 --- a/rudder_analytics/test/module.py +++ b/rudderstack/analytics/test/test_module.py @@ -1,7 +1,7 @@ import unittest -import analytics - +import rudderstack.analytics as analytics +from rudderstack.analytics.get_env import TEST_WRITE_KEY class TestModule(unittest.TestCase): @@ -10,7 +10,7 @@ def failed(self): def setUp(self): self.failed = False - analytics.write_key = 'testsecret' + analytics.write_key = TEST_WRITE_KEY analytics.on_error = self.failed def test_no_write_key(self): diff --git a/rudder_analytics/test/request.py b/rudderstack/analytics/test/test_request.py similarity index 51% rename from rudder_analytics/test/request.py rename to rudderstack/analytics/test/test_request.py index 6eda11c2..3c28e57d 100644 --- a/rudder_analytics/test/request.py +++ b/rudderstack/analytics/test/test_request.py @@ -3,13 +3,14 @@ import json import requests -from analytics.request import post, DatetimeSerializer - +from rudderstack.analytics.request import _gzip_json, post, DatetimeSerializer +from rudderstack.analytics.get_env import TEST_WRITE_KEY, TEST_DATA_PLANE_URL +from rudderstack.analytics.test.test_constants import TEST_PROXY class TestRequests(unittest.TestCase): def test_valid_request(self): - res = post('testsecret', batch=[{ + res = post(TEST_WRITE_KEY,host=TEST_DATA_PLANE_URL, batch=[{ 'userId': 'userId', 'event': 'python event', 'type': 'track' @@ -17,12 +18,12 @@ def test_valid_request(self): self.assertEqual(res.status_code, 200) def test_invalid_request_error(self): - self.assertRaises(Exception, post, 'testsecret', + self.assertRaises(Exception, post, 'TEST_WRITE_KEY', 'https://hosted.rudderlabs.com', False, '[{]') def test_invalid_host(self): - self.assertRaises(Exception, post, 'testsecret', - 'hosted.rudderlabs.com/', batch=[]) + self.assertRaises(Exception, post, TEST_WRITE_KEY, + 'https://invalid_host/', batch=[]) def test_datetime_serialization(self): data = {'created': datetime(2012, 3, 4, 5, 6, 7, 891011)} @@ -37,7 +38,7 @@ def test_date_serialization(self): self.assertEqual(result, expected) def test_should_not_timeout(self): - res = post('testsecret', batch=[{ + res = post(TEST_WRITE_KEY,host=TEST_DATA_PLANE_URL, batch=[{ 'userId': 'userId', 'event': 'python event', 'type': 'track' @@ -46,8 +47,31 @@ def test_should_not_timeout(self): def test_should_timeout(self): with self.assertRaises(requests.ReadTimeout): - post('testsecret', batch=[{ + post(TEST_WRITE_KEY,host=TEST_DATA_PLANE_URL, + batch=[{ 'userId': 'userId', 'event': 'python event', 'type': 'track' }], timeout=0.0001) + + def test_gzip_size_reduction(self): + body = [{ + 'userId': 'userId', + 'event': 'python event', + 'type': 'track' + },{ + 'userId': 'userId', + 'event': 'python event', + 'type': 'track' + }] + data = json.dumps(body, cls=DatetimeSerializer) + self.assertTrue(len(data) > len(_gzip_json(data = data))) + + def test_proxies(self): + res = post(TEST_WRITE_KEY,host=TEST_DATA_PLANE_URL, batch=[{ + 'userId': 'userId', + 'event': 'python event', + 'type': 'track', + 'proxies': TEST_PROXY + }]) + self.assertEqual(res.status_code, 200) diff --git a/rudder_analytics/test/utils.py b/rudderstack/analytics/test/test_utils.py similarity index 84% rename from rudder_analytics/test/utils.py rename to rudderstack/analytics/test/test_utils.py index 0627ecd2..fae2b1ec 100644 --- a/rudder_analytics/test/utils.py +++ b/rudderstack/analytics/test/test_utils.py @@ -1,11 +1,10 @@ -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from decimal import Decimal import unittest from dateutil.tz import tzutc -import six -from analytics import utils +from rudderstack.analytics import utils class TestUtils(unittest.TestCase): @@ -25,7 +24,7 @@ def test_timezone_utils(self): def test_clean(self): simple = { 'decimal': Decimal('0.142857'), - 'unicode': six.u('woo'), + 'unicode': 'woo', 'date': datetime.now(), 'long': 200000000, 'integer': 1, @@ -52,22 +51,18 @@ def test_clean(self): def test_clean_with_dates(self): dict_with_dates = { 'birthdate': date(1980, 1, 1), - 'registration': datetime.utcnow(), + 'registration': datetime.now(timezone.utc), } self.assertEqual(dict_with_dates, utils.clean(dict_with_dates)) - def test_bytes(self): - if six.PY3: - item = bytes(10) - else: - item = bytearray(10) - + @classmethod + def test_bytes(cls): + item = bytes(10) utils.clean(item) def test_clean_fn(self): cleaned = utils.clean({'fn': lambda x: x, 'number': 4}) self.assertEqual(cleaned['number'], 4) - # TODO: fixme, different behavior on python 2 and 3 if 'fn' in cleaned: self.assertEqual(cleaned['fn'], None) diff --git a/rudder_analytics/utils.py b/rudderstack/analytics/utils.py similarity index 87% rename from rudder_analytics/utils.py rename to rudderstack/analytics/utils.py index 4ffe56ec..30d8509a 100644 --- a/rudder_analytics/utils.py +++ b/rudderstack/analytics/utils.py @@ -1,12 +1,12 @@ -from dateutil.tz import tzlocal, tzutc -from datetime import date, datetime -from decimal import Decimal +from enum import Enum import logging import numbers -import six +from decimal import Decimal +from datetime import date, datetime +from dateutil.tz import tzlocal, tzutc -log = logging.getLogger('rudder') +log = logging.getLogger('rudderstack') def is_naive(dt): @@ -31,9 +31,8 @@ def guess_timezone(dt): # this was created using datetime.datetime.now() # so we are in the local timezone return dt.replace(tzinfo=tzlocal()) - else: - # at this point, the best we can do is guess UTC - return dt.replace(tzinfo=tzutc()) + # at this point, the best we can do is guess UTC + return dt.replace(tzinfo=tzutc()) return dt @@ -47,13 +46,15 @@ def remove_trailing_slash(host): def clean(item): if isinstance(item, Decimal): return float(item) - elif isinstance(item, (six.string_types, bool, numbers.Number, datetime, + elif isinstance(item, (str, bool, numbers.Number, datetime, date, type(None))): return item elif isinstance(item, (set, list, tuple)): return _clean_list(item) elif isinstance(item, dict): return _clean_dict(item) + elif isinstance(item, Enum): + return clean(item.value) else: return _coerce_unicode(item) @@ -64,7 +65,7 @@ def _clean_list(list_): def _clean_dict(dict_): data = {} - for k, v in six.iteritems(dict_): + for k, v in dict_.items(): try: data[k] = clean(v) except TypeError: diff --git a/rudderstack/analytics/version.py b/rudderstack/analytics/version.py new file mode 100644 index 00000000..625b2b95 --- /dev/null +++ b/rudderstack/analytics/version.py @@ -0,0 +1 @@ +VERSION = '2.1.4' diff --git a/setup.cfg b/setup.cfg index 0499089e..ad388d24 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,4 @@ universal = 1 [metadata] -description-file = README.md \ No newline at end of file +description_file = README.md diff --git a/setup.py b/setup.py index 12d29be1..75bbe62f 100644 --- a/setup.py +++ b/setup.py @@ -1,35 +1,25 @@ -import os import sys - -try: - from setuptools import setup -except ImportError: - from distutils.core import setup +from setuptools import setup +from pathlib import Path # Don't import rudder_analytics module here, since deps may not be installed -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'rudder_analytics')) -from version import VERSION +sys.path.insert(0, str(Path(__file__).resolve().parent / 'rudderstack' / 'analytics')) +from rudderstack.analytics.version import VERSION -long_description = ''' -RudderStack is a platform for collecting, storing and routing customer event data to dozens -of tools. RudderStack is open-source, can run in your cloud environment -(AWS, GCP, Azure or even your data-centre) and provides a powerful transformation -framework to process your event data on the fly. -''' +long_description = (Path(__file__).resolve().parent / 'README.md').read_text(encoding='utf-8') install_requires = [ - "requests>=2.7,<3.0", - "six>=1.4", - "monotonic>=1.5", - "backoff==1.6.0", - "python-dateutil>2.1" + "requests>=2.32.3,<3.0", + "monotonic>=1.6,<2.0", + "backoff>=2.2.1,<3.0", + "python-dateutil>=2.2,<3.0", + "python-dotenv>=1.0.1,<2.0.0", + "deprecation>=2.1.0,<3.0.0", ] tests_require = [ - "mock==2.0.0", - "pylint==1.9.3", - "flake8==3.7.9", - "coverage==4.5.4" + "mock==5.1.0", + "flake8==7.1.1", ] setup( @@ -37,11 +27,11 @@ version=VERSION, url='https://github.com/rudderlabs/rudder-sdk-python', author='RudderStack', - author_email='arnab@rudderlabs.com', + author_email='sdk@rudderstack.com', maintainer='RudderStack', - maintainer_email='arnab@rudderlabs.com', - test_suite='rudder_analytics.test.all', - packages=['rudder_analytics', 'rudder_analytics.test'], + maintainer_email='sdk@rudderstack.com', + packages=['rudderstack.analytics', 'rudderstack.analytics.test'], + python_requires='>=3.8.0', license='MIT License', install_requires=install_requires, keywords=['rudder', 'rudderstack', 'analytics'], @@ -50,22 +40,21 @@ }, description='RudderStack is an open-source Segment alternative written in Go, built for the enterprise.', long_description=long_description, + long_description_content_type='text/markdown', classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries :: Python Modules", ], + zip_safe=False, + include_package_data=True, ) diff --git a/setup_env.sh b/setup_env.sh new file mode 100755 index 00000000..9946ce6b --- /dev/null +++ b/setup_env.sh @@ -0,0 +1,15 @@ +# setup_env.sh +# Clean up temporarary directories +rm -rf build +rm -rf dist +rm -rf rudder_sdk_python.egg-info + +# Ensure pip is updated +pip install --upgrade pip + +# Install extra dependencies +# Use specific versions to avoid conflicts +pip install pip-tools==7.4.1 setuptools==75.1.0 wheel==0.44.0 twine==5.1.1 + +# Install project dependencies +pip install -r requirements.txt diff --git a/simulator.py b/simulator.py new file mode 100644 index 00000000..76f4374d --- /dev/null +++ b/simulator.py @@ -0,0 +1,119 @@ +import logging +import argparse +import json +import rudderstack.analytics as analytics + +__name__ = 'simulator.py' +__version__ = '0.0.1' +__description__ = 'scripting simulator' + + +def json_hash(str): + if str: + return json.loads(str) + +# python3nsimulator.py -type=<type> --writeKey=<rudderstackWriteKey> [options] + + +parser = argparse.ArgumentParser(description='send a Rudderstack message') + +parser.add_argument('--writeKey', help='the Rudderstack writeKey') +parser.add_argument('--dataPlaneUrl', help='The Rudderstack dataplane url') + +def true_or_false(arg): + ua = str(arg).upper() + if 'TRUE'.startswith(ua): + return True + elif 'FALSE'.startswith(ua): + return False + else: + print("Enter True or False for gzip. Considering False. Invalid gzip value: " + arg) + return False #error condition maybe? + +parser.add_argument('--gzip', default=False, type=true_or_false, help='Pass true to enable gzip compression, else false. Default is false') + +parser.add_argument('--type', help='The Rudderstack message type') + +parser.add_argument('--userId', help='the user id to send the event as') +parser.add_argument( + '--anonymousId', help='the anonymous user id to send the event as') +parser.add_argument( + '--context', help='additional context for the event (JSON-encoded)') + +parser.add_argument('--event', help='the event name to send with the event') +parser.add_argument( + '--properties', help='the event properties to send (JSON-encoded)') + +parser.add_argument( + '--name', help='name of the screen or page to send with the message') + +parser.add_argument( + '--traits', help='the identify/group traits to send (JSON-encoded)') + +parser.add_argument('--groupId', help='the group id') + +options = parser.parse_args() + + + +def failed(status, msg): + raise Exception(msg) + + +def track(): + analytics.track(options.userId, options.event, anonymous_id=options.anonymousId, + properties=json_hash(options.properties), context=json_hash(options.context)) + + +def page(): + analytics.page(options.userId, name=options.name, anonymous_id=options.anonymousId, + properties=json_hash(options.properties), context=json_hash(options.context)) + + +def screen(): + analytics.screen(options.userId, name=options.name, anonymous_id=options.anonymousId, + properties=json_hash(options.properties), context=json_hash(options.context)) + + +def identify(): + analytics.identify(options.userId, anonymous_id=options.anonymousId, + traits=json_hash(options.traits), context=json_hash(options.context)) + + +def group(): + analytics.group(options.userId, options.groupId, json_hash(options.traits), + json_hash(options.context), anonymous_id=options.anonymousId) + + +def unknown(): + print() + + +analytics.write_key = options.writeKey +analytics.on_error = failed +analytics.debug = True +analytics.dataPlaneUrl = options.dataPlaneUrl + +analytics.gzip = options.gzip + +log = logging.getLogger('rudderstack') +ch = logging.StreamHandler() +ch.setLevel(logging.DEBUG) +log.addHandler(ch) + +switcher = { + "track": track, + "page": page, + "screen": screen, + "identify": identify, + "group": group +} + +func = switcher.get(options.type) +if func: + func() + analytics.shutdown() +else: + print("Invalid Message Type " + options.type) + + diff --git a/test.py b/test.py new file mode 100644 index 00000000..c3f323c1 --- /dev/null +++ b/test.py @@ -0,0 +1,5 @@ +import unittest +loader = unittest.TestLoader() +tests = loader.discover('rudderstack/analytics/') +testRunner = unittest.runner.TextTestRunner() +testRunner.run(tests) \ No newline at end of file