diff --git a/.gitignore b/.gitignore index e75beff..4d0f3f4 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,12 @@ profile # AppCode .idea + +# dist +/dist/ + +# MANIFEST +MANIFEST + +#local +local.properties diff --git a/LICENSE b/LICENSE new file mode 100755 index 0000000..0d8fbcb --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +软件名称:神策分析 SDK +版本号:所有版本 +许可协议版本:1.0 + +1. 商业许可协议(用于商业用途需购买许可) +任何商业用途必须获得商业许可。 + +商业许可协议条款: + +- 商业用途:任何直接或间接产生收入的用途都需要购买商业许可。 +- 付款条款:在使用本软件用于商业用途之前,您必须支付全额许可费用。具体的付款方式将在双方联系后提供。 +- 商业支持:购买商业许可后,您将获得一年的技术支持和软件更新服务。 +- 禁止再许可:商业用户不得再许可、转售或转让本软件。每份商业许可仅适用于单一实体或公司。 +- 源代码访问:购买商业许可的用户将获得本软件的代码访问权限,并可根据业务需求进行内部修改。但不得公开发布或再分发修改后的版本。 +- 使用范围限制:商业许可仅限于购买者的内部使用,不得与第三方共享或用于为第三方提供服务。任何超出许可范围的使用行为均需额外授权,并可能产生额外费用。 +- 联系信息:如需购买商业许可,请联系 dv@sensorsdata.com。 +- 知识产权声明:本软件的版权归神策网络科技(北京)有限公司所有。购买商业许可仅授予您使用权,所有权仍归属本公司。 +- 终止条款: 如果您未支付相关费用或违反本协议的任何条款,商业许可将自动终止。您必须立即停止所有商业用途,并销毁或删除所有软件副本。 + +2. 附加授权规则条款 +授权规则条款: + +- 功能限制:未经本软件作者的明确书面许可,您不得移除、绕过或规避本软件中的任何功能限制或试用限制。 +- 商标使用:未经授权,您不得在宣传、市场推广或销售产品时使用本软件的名称、商标或品牌标识。任何商标使用必须得到明确的书面许可。 +- 修改条款:本协议的条款可能会不时更新,用户有责任定期检查最新版本。任何重大更改将通过项目主页或电子邮件通知用户。 + +3. 联系方式 +如需更多信息或申请商业许可,请联系 dv@sensorsdata.com。 diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index 8b48d57..0000000 --- a/LICENSE.txt +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2015 SensorsData Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/README.md b/README.md index 0a2cfad..06fb0cc 100644 --- a/README.md +++ b/README.md @@ -1,32 +1,39 @@ -# Sensors Analytics + -This is the official Python SDK for Sensors Analytics. +[![License](https://img.shields.io/github/license/sensorsdata/sa-sdk-cpp.svg)](https://github.com/sensorsdata/sa-sdk-c/blob/master/LICENSE) +[![GitHub release](https://img.shields.io/github/tag/sensorsdata/sa-sdk-c.svg?label=release)](https://github.com/sensorsdata/sa-sdk-c/releases) +[![GitHub release date](https://img.shields.io/github/release-date/sensorsdata/sa-sdk-c.svg)](https://github.com/sensorsdata/sa-sdk-c/releases) -## Easy Installation +## 神策简介 -You can get Sensors Analytics SDK using pip. +[**神策数据**](https://www.sensorsdata.cn/) +(Sensors Data),隶属于神策网络科技(北京)有限公司,是一家专业的大数据分析服务公司,大数据分析行业开拓者,为客户提供深度用户行为分析平台、以及专业的咨询服务和行业解决方案,致力于帮助客户实现数据驱动。神策数据立足大数据及用户行为分析的技术与实践前沿,业务现已覆盖以互联网、金融、零售快消、高科技、制造等为代表的十多个主要行业、并可支持企业多个职能部门。公司总部在北京,并在上海、深圳、合肥、武汉等地拥有本地化的服务团队,覆盖东区及南区市场;公司拥有专业的服务团队,为客户提供一对一的客户服务。公司在大数据领域积累的核心关键技术,包括在海量数据采集、存储、清洗、分析挖掘、可视化、智能应用、安全与隐私保护等领域。 [**More**](https://www.sensorsdata.cn/about/aboutus.html) -``` - pip install SensorsAnalyticsSDK -``` +## SDK 简介 -Once the SDK is successfully installed, use the Sensors Analytics SDK likes: +SensorsAnalytics SDK 是国内第一家开源商用版用户行为采集 SDK,目前支持代码埋点、全埋点、App 点击图、可视化全埋点等。目前已累计有 1500 多家付费客户,2500+ 的 App 集成使用,作为 App 数据采集利器,致力于帮助客户挖掘更多的商业价值,为其精准运营和业务支撑提供了可靠的数据来源。其采集全面而灵活、性能良好,并一直保持稳定的迭代,经受住了时间和客户的考验。 -```python - import sensorsanalytics +## 神策埋点 SDK 官网 +如需了解神策埋点 SDK 的更多商业授权信息,请访问[神策埋点 SDK 官网](https://jssdk.debugbox.sensorsdata.cn/)获取更多详细信息。 - // Gets the url of Sensors Analytics in the home page. - SA_SERVER_URL = 'YOUR_SERVER_URL' +## 联系我们 +若您有商业合作或产品集成需求,请通过下面的渠道联系我们获取专业服务与支持。 - // Initialized the Sensors Analytics SDK with Default Consumer - consumer = sensorsanalytics.DefaultConsumer(SA_SERVER_URL) - sa = sensorsanalytics.SensorsAnalytics(consumer) +| 加微信号:skycode008,或扫码添加联系人 | 扫码关注「神策埋点 SDK」公众号 ![gzh](https://github.com/sensorsdata/sa-sdk-android/blob/master/gzh.jpeg) | +| ------ | ------ | - // Track the event 'ServerStart' - sa.track("ABCDEFG1234567", "ServerStart") -``` +## 基本要求 +SDK 兼容 Python 2.6+ 和 Python3 3.X,不依赖第三方库。 -## To learn more +## 集成文档 -See our [full manual](http://www.sensorsdata.cn/manual/python_sdk.html) +请参考神策官网 [Python SDK 集成文档](https://manual.sensorsdata.cn/sa/latest/tech_sdk_server_python-1573931.html)。 + +## 新书推荐 + +| [《数据驱动:从方法到实践》](https://item.jd.com/12322322.html) | [《Android 全埋点解决方案》](https://item.jd.com/12574672.html) | [《iOS 全埋点解决方案》](https://item.jd.com/12867068.html) +| ------ | ------ | ------ | + +## License +[License 协议](https://github.com/sensorsdata/sa-sdk-python/blob/master/LICENSE) diff --git a/README.txt b/README.txt deleted file mode 100644 index 4fecb8c..0000000 --- a/README.txt +++ /dev/null @@ -1,14 +0,0 @@ -===================== -Sensors Analytics SDK -===================== - -Sensors Analytics SDK -===================== - -This is the official Python SDK for Sensors Analytics. - -To Learn More -------------- - -See our `full manual `_. - diff --git a/sensorsanalytics/sdk.py b/sensorsanalytics/sdk.py index 71d9310..75ab62b 100644 --- a/sensorsanalytics/sdk.py +++ b/sensorsanalytics/sdk.py @@ -13,6 +13,7 @@ import threading import time import traceback +from enum import Enum try: from urllib.parse import urlparse @@ -25,10 +26,13 @@ import urllib2 import urllib -SDK_VERSION = '1.7.5' +SDK_VERSION = '1.11.0' +batch_consumer_lock = threading.RLock() try: isinstance("", basestring) + + def is_str(s): return isinstance(s, basestring) except NameError: @@ -36,6 +40,8 @@ def is_str(s): return isinstance(s, str) try: isinstance(1, long) + + def is_int(n): return isinstance(n, int) or isinstance(n, long) except NameError: @@ -78,56 +84,60 @@ class SensorsAnalyticsDebugException(Exception): if os.name == 'nt': # pragma: no cover import msvcrt + def lock(file_): try: savepos = file_.tell() - + file_.seek(0) try: msvcrt.locking(file_.fileno(), msvcrt.LK_LOCK, 1) except IOError as e: - raise SensorsAnalyticsFileLockException(e) + raise SensorsAnalyticsFileLockException(e) finally: if savepos: file_.seek(savepos) except IOError as e: - raise SensorsAnalyticsFileLockException(e) + raise SensorsAnalyticsFileLockException(e) + def unlock(file_): try: savepos = file_.tell() if savepos: file_.seek(0) - + try: msvcrt.locking(file_.fileno(), msvcrt.LK_UNLCK, 1) except IOError as e: - raise SensorsAnalyticsFileLockException(e) + raise SensorsAnalyticsFileLockException(e) finally: if savepos: file_.seek(savepos) except IOError as e: - raise SensorsAnalyticsFileLockException(e) + raise SensorsAnalyticsFileLockException(e) elif os.name == 'posix': # pragma: no cover import fcntl + def lock(file_): try: fcntl.flock(file_.fileno(), fcntl.LOCK_EX) except IOError as e: - raise SensorsAnalyticsFileLockException(e) + raise SensorsAnalyticsFileLockException(e) + def unlock(file_): fcntl.flock(file_.fileno(), fcntl.LOCK_UN) else: - raise SensorsAnalyticsFileLockException("SensorsAnalytics SDK is defined for NT and POSIX system.") + raise SensorsAnalyticsFileLockException("SensorsAnalytics SDK is defined for NT and POSIX system.") class SAFileLock(object): - + def __init__(self, file_handler): self._file_handler = file_handler @@ -138,12 +148,15 @@ def __enter__(self): def __exit__(self, t, v, tb): unlock(self._file_handler) + class SensorsAnalytics(object): """ 使用一个 SensorsAnalytics 的实例来进行数据发送。 """ - NAME_PATTERN = re.compile(r"^((?!^distinct_id$|^original_id$|^time$|^properties$|^id$|^first_id$|^second_id$|^users$|^events$|^event$|^user_id$|^date$|^datetime$)[a-zA-Z_$][a-zA-Z\d_$]{0,99})$", re.I) + NAME_PATTERN = re.compile( + r"^((?!^distinct_id$|^original_id$|^time$|^properties$|^id$|^first_id$|^second_id$|^users$|^events$|^event$|^event_id$|^device_id$|^user_id$|^date$|^datetime$|^user_group|^user_tag)[a-zA-Z_$][a-zA-Z\d_$]{0,99})$", + re.I) class DatetimeSerializer(json.JSONEncoder): """ @@ -154,8 +167,8 @@ def default(self, obj): if isinstance(obj, datetime.datetime): head_fmt = "%Y-%m-%d %H:%M:%S" return "{main_part}.{ms_part}".format( - main_part=obj.strftime(head_fmt), - ms_part=int(obj.microsecond/1000)) + main_part=obj.strftime(head_fmt), + ms_part=int(obj.microsecond / 1000)) elif isinstance(obj, datetime.date): fmt = '%Y-%m-%d' return obj.strftime(fmt) @@ -179,7 +192,7 @@ def __init__(self, consumer=None, project_name=None, enable_time_free=False): self._default_project_name = project_name self._enable_time_free = enable_time_free self._super_properties = {} - self.clear_super_properties(); + self.clear_super_properties() @staticmethod def _now(): @@ -189,7 +202,7 @@ def _now(): def _json_dumps(data): return json.dumps(data, separators=(',', ':'), cls=SensorsAnalytics.DatetimeSerializer) - def register_super_properties(self, super_properties): + def register_super_properties(self, super_properties): """ 设置每个事件都带有的一些公共属性,当 track 的 properties 和 super properties 有相同的 key 时,将采用 track 的 @@ -213,12 +226,13 @@ def track(self, distinct_id, event_name, properties=None, is_login_id=False): :param distinct_id: 用户的唯一标识 :param event_name: 事件名称 :param properties: 事件的属性 + :param is_login_id 是否是登陆 ID """ - all_properties = self._super_properties.copy() + all_properties = self._super_properties.copy() if properties: all_properties.update(properties) self._track_event('track', event_name, distinct_id, None, all_properties, is_login_id) - + def track_signup(self, distinct_id, original_id, properties=None): """ 这个接口是一个较为复杂的功能,请在使用前先阅读相关说明:http://www.sensorsdata.cn/manual/track_signup.html, @@ -233,13 +247,48 @@ def track_signup(self, distinct_id, original_id, properties=None): raise SensorsAnalyticsIllegalDataException("property [original_id] must not be empty") if len(str(original_id)) > 255: raise SensorsAnalyticsIllegalDataException("the max length of property [original_id] is 255") - - all_properties = self._super_properties.copy() + + all_properties = self._super_properties.copy() if properties: all_properties.update(properties) - + self._track_event('track_signup', '$SignUp', distinct_id, original_id, all_properties, False) + @staticmethod + def _normalize_properties(data): + if "properties" in data and data["properties"] is not None: + for key, value in data["properties"].items(): + SensorsAnalytics._assert_key(key) + SensorsAnalytics._assert_value(value, key) + + @staticmethod + def _assert_key(key): + if not is_str(key): + raise SensorsAnalyticsIllegalDataException("property key must be a str. [key=%s]" % str(key)) + if len(key) > 255: + raise SensorsAnalyticsIllegalDataException( + "the max length of property key is 256. [key=%s]" % str(key)) + if not SensorsAnalytics.NAME_PATTERN.match(key): + raise SensorsAnalyticsIllegalDataException( + "property key must be a valid variable name. [key=%s]" % str(key)) + + @staticmethod + def _assert_value(value, key=None): + if is_str(value) and len(value) > 8191: + raise SensorsAnalyticsIllegalDataException( + "the max length of property key is 8192. [key=%s]" % str(key)) + + if not is_str(value) and not is_int(value) and not isinstance(value, float) \ + and not isinstance(value, datetime.datetime) and not isinstance(value, datetime.date) \ + and not isinstance(value, list) and value is not None: + raise SensorsAnalyticsIllegalDataException( + "property value must be a str/int/float/datetime/date/list. [value=%s]" % type(value)) + if isinstance(value, list): + for lvalue in value: + if not is_str(lvalue): + raise SensorsAnalyticsIllegalDataException( + "[list] property's value must be a str. [value=%s]" % type(lvalue)) + @staticmethod def _normalize_data(data): # 检查 distinct_id @@ -256,7 +305,7 @@ def _normalize_data(data): ts = int(data['time']) ts_num = len(str(ts)) if ts_num < 10 or ts_num > 13: - raise SensorsAnalyticsIllegalDataException("property [time] must be a timestamp in microseconds") + raise SensorsAnalyticsIllegalDataException("property [time] must be a timestamp in microseconds") if ts_num == 10: ts *= 1000 @@ -264,47 +313,26 @@ def _normalize_data(data): # 检查 Event Name if 'event' in data and not SensorsAnalytics.NAME_PATTERN.match(data['event']): - raise SensorsAnalyticsIllegalDataException("event name must be a valid variable name. [name=%s]" % data['event']) + raise SensorsAnalyticsIllegalDataException( + "event name must be a valid variable name. [name=%s]" % data['event']) # 检查 Event Name if 'project' in data and not SensorsAnalytics.NAME_PATTERN.match(data['project']): - raise SensorsAnalyticsIllegalDataException("project name must be a valid variable name. [project=%s]" % data['project']) + raise SensorsAnalyticsIllegalDataException( + "project name must be a valid variable name. [project=%s]" % data['project']) # 检查 properties - if "properties" in data and data["properties"] is not None: - for key, value in data["properties"].items(): - if not is_str(key): - raise SensorsAnalyticsIllegalDataException("property key must be a str. [key=%s]" % str(key)) - if len(key) > 255: - raise SensorsAnalyticsIllegalDataException("the max length of property key is 256. [key=%s]" % str(key)) - if not SensorsAnalytics.NAME_PATTERN.match(key): - raise SensorsAnalyticsIllegalDataException( - "property key must be a valid variable name. [key=%s]" % str(key)) - - if is_str(value) and len(value) > 8191: - raise SensorsAnalyticsIllegalDataException("the max length of property key is 8192. [key=%s]" % str(key)) - - if not is_str(value) and not is_int(value) and not isinstance(value, float)\ - and not isinstance(value, datetime.datetime) and not isinstance(value, datetime.date)\ - and not isinstance(value, list) and value is not None: - raise SensorsAnalyticsIllegalDataException( - "property value must be a str/int/float/datetime/date/list. [value=%s]" % type(value)) - if isinstance(value, list): - for lvalue in value: - if not is_str(lvalue): - raise SensorsAnalyticsIllegalDataException( - "[list] property's value must be a str. [value=%s]" % type(lvalue)) - + SensorsAnalytics._normalize_properties(data) return data def _get_lib_properties(self): lib_properties = { - '$lib' : 'python', - '$lib_version' : SDK_VERSION, - '$lib_method' : 'code', + '$lib': 'python', + '$lib_version': SDK_VERSION, + '$lib_method': 'code', } - if '$app_version' in self._super_properties: + if '$app_version' in self._super_properties: lib_properties['$app_version'] = self._super_properties['$app_version'] try: @@ -315,21 +343,22 @@ def _get_lib_properties(self): try: file_name = trace[-4][0] line_number = trace[-4][1] - + if trace[-4][2].startswith('<'): function_name = '' else: function_name = trace[-4][2] - + try: if len(trace) > 4 and trace[-5][3]: - class_name = trace[-5][3].split('(')[0] + class_name = trace[-5][3].split('(')[0] else: class_name = '' except: - print(trace.format()) + print(trace.format()) - lib_properties['$lib_detail'] = '%s##%s##%s##%s' % (class_name, function_name, file_name, line_number) + lib_properties['$lib_detail'] = '%s##%s##%s##%s' % ( + class_name, function_name, file_name, line_number) except: pass @@ -349,7 +378,6 @@ def _get_common_properties(self): return common_properties - @staticmethod def _extract_user_time(properties): """ @@ -361,12 +389,35 @@ def _extract_user_time(properties): return t return None + @staticmethod + def _extract_token(properties): + """ + 如果用户传入了 $token 字段,则在 properties 外层加上token,并删除 $token 字段 + """ + if properties is not None and '$token' in properties: + t = properties['$token'] + del (properties['$token']) + return t + return None + + @staticmethod + def _extract_project(properties): + """ + 如果用户传入了 $project 字段,则在 properties 外层加上 project,并删除 $project 字段 + """ + if properties is not None and '$project' in properties: + t = properties['$project'] + del (properties['$project']) + return t + return None + def profile_set(self, distinct_id, profiles, is_login_id=False): """ 直接设置一个用户的 Profile,如果已存在则覆盖 :param distinct_id: 用户的唯一标识 :param profiles: 用户属性 + :param is_login_id: distinct_id 是否是登陆 id """ return self._track_event('profile_set', None, distinct_id, None, profiles, is_login_id) @@ -376,8 +427,9 @@ def profile_set_once(self, distinct_id, profiles, is_login_id=False): :param distinct_id: 用户的唯一标识 :param profiles: 用户属性 + :param is_login_id: distinct_id 是否是登陆 id """ - return self._track_event('profile_set_once', None, distinct_id, None, profiles, is_login_id) + return self._track_event('profile_set_once', None, distinct_id, None, profiles, is_login_id) def profile_increment(self, distinct_id, profiles, is_login_id=False): """ @@ -385,7 +437,14 @@ def profile_increment(self, distinct_id, profiles, is_login_id=False): :param distinct_id: 用户的唯一标识 :param profiles: 用户属性 + :param is_login_id: distinct_id 是否是登陆 id """ + if isinstance(profiles, dict): + for key, value in profiles.items(): + if not is_int(value): + raise SensorsAnalyticsIllegalDataException("property value must be Number. [key=%s]" % str(key)) + else: + raise SensorsAnalyticsIllegalDataException("profiles must be dict type.") return self._track_event('profile_increment', None, distinct_id, None, profiles, is_login_id) def profile_append(self, distinct_id, profiles, is_login_id=False): @@ -394,7 +453,14 @@ def profile_append(self, distinct_id, profiles, is_login_id=False): :param distinct_id: 用户的唯一标识 :param profiles: 用户属性 + :param is_login_id: distinct_id 是否是登陆 id """ + if isinstance(profiles, dict): + for key, value in profiles.items(): + if not isinstance(value,list): + raise SensorsAnalyticsIllegalDataException("property value must be list. [key=%s]" % str(key)) + else: + raise SensorsAnalyticsIllegalDataException("profiles must be dict type.") return self._track_event('profile_append', None, distinct_id, None, profiles, is_login_id) def profile_unset(self, distinct_id, profile_keys, is_login_id=False): @@ -403,6 +469,7 @@ def profile_unset(self, distinct_id, profile_keys, is_login_id=False): :param distinct_id: 用户的唯一标识 :param profile_keys: 用户属性键值列表 + :param is_login_id: distinct_id 是否是登陆 id """ if isinstance(profile_keys, list): profile_keys = dict((key, True) for key in profile_keys) @@ -413,11 +480,74 @@ def profile_delete(self, distinct_id, is_login_id=False): 删除整个用户的信息。 :param distinct_id: 用户的唯一标识 + :param is_login_id: distinct_id 是否是登陆 id """ return self._track_event('profile_delete', None, distinct_id, None, {}, is_login_id) - def _track_event(self, event_type, event_name, distinct_id, original_id, properties, is_login_id): + def item_set(self, item_type, item_id, properties=None): + """ + 直接设置一个物品,如果已存在则覆盖。 + + :param item_type: 物品类型 + :param item_id: 物品的唯一标识 + :param properties: 物品属性 + """ + return self._track_item('item_set', item_type, item_id, properties) + + def item_delete(self, item_type, item_id, properties=None): + """ + 删除一个物品。 + + :param item_type: 物品类型 + :param item_id: 物品的唯一标识 + :param properties: 物品属性 + """ + return self._track_item('item_delete', item_type, item_id, properties) + + @staticmethod + def _normalize_item_data(data): + # 检查 item_type + if not SensorsAnalytics.NAME_PATTERN.match(data['item_type']): + raise SensorsAnalyticsIllegalDataException( + "item_type must be a valid variable name. [key=%s]" % str(data['item_type'])) + + # 检查 item_id + if data['item_id'] is None or len(str(data['item_id'])) == 0: + raise SensorsAnalyticsIllegalDataException("item_id must not be empty") + if len(str(data['item_id'])) > 255: + raise SensorsAnalyticsIllegalDataException("the max length of item_id is 255") + # 检查 properties + SensorsAnalytics._normalize_properties(data) + return data + + def _track_item(self, event_type, item_type, item_id, properties): + if properties is None: + properties = {} + data = { + 'type': event_type, + 'time': self._now(), + 'lib': self._get_lib_properties(), + 'item_type': item_type, + 'item_id': item_id, + } + + if self._default_project_name is not None: + data['project'] = self._default_project_name + + if properties and '$project' in properties and len(str(properties['$project'])) != 0: + data['project'] = properties['$project'] + properties.pop('$project') + + data['properties'] = properties + + data = self._normalize_item_data(data) + self._json_dumps(data) + self._consumer.send(self._json_dumps(data)) + + def _track_event(self, event_type, event_name, distinct_id, original_id, properties, is_login_id, *identities): event_time = self._extract_user_time(properties) or self._now() + event_token = self._extract_token(properties) + event_project = self._extract_project(properties) data = { 'type': event_type, @@ -426,24 +556,48 @@ def _track_event(self, event_type, event_name, distinct_id, original_id, propert 'properties': properties, 'lib': self._get_lib_properties(), } + + if identities: + identities_data = dict() + for identity in identities: + identities_data[identity.key] = identity.value + data["identities"] = identities_data + if self._default_project_name is not None: data['project'] = self._default_project_name - if event_type == "track" or event_type == "track_signup": + if event_type == EventType.TRACK.value or event_type == EventType.TRACK_SIGNUP.value \ + or event_type == EventType.TRACK_ID_BIND.value \ + or event_type == EventType.TRACK_ID_UNBIND.value: data["event"] = event_name - if event_type == "track_signup": + if event_type == EventType.TRACK_SIGNUP.value: data["original_id"] = original_id if self._enable_time_free: data["time_free"] = True - if is_login_id: + if is_login_id or self._is_identity_has_login_id(*identities): properties["$is_login_id"] = True + if event_token is not None: + data["token"] = event_token + + if event_project is not None: + data["project"] = event_project + data = self._normalize_data(data) self._consumer.send(self._json_dumps(data)) + @staticmethod + def _is_identity_has_login_id(*identities): + if not identities: + return False + for identity in identities: + if identity.key == SensorsAnalyticsIdentity.LOGIN_ID: + return True + return False + def flush(self): """ 对于不立即发送数据的 Consumer,调用此接口应当立即进行已有数据的发送。 @@ -457,6 +611,159 @@ def close(self): """ self._consumer.close() + @staticmethod + def _check_identity_type(*identities): + if not identities: + raise SensorsAnalyticsIllegalDataException("Identity (or list) can not be none or empty") + key_repeat_map = {} + duplicate_keys = set() + for identity in identities: + if not isinstance(identity, SensorsAnalyticsIdentity): + raise SensorsAnalyticsIllegalDataException("Identity type must be SensorsAnalyticsIdentity") + SensorsAnalytics._assert_key(identity.key) + if not is_str(identity.value): + raise SensorsAnalyticsIllegalDataException("identity value must be a str. [key=%s]" % str(identity.key)) + if not len(identity.value.strip()): + raise SensorsAnalyticsIllegalDataException("identity value is empty. [key=%s]" % str(identity.key)) + if len(identity.value) > 255: + raise SensorsAnalyticsIllegalDataException( + "the max length of property value is 256. [key=%s]" % str(identity.key)) + count = key_repeat_map.get(identity.key, 0) + count += 1 + key_repeat_map[identity.key] = count + if count > 1: + duplicate_keys.add(identity.key) + if duplicate_keys: + raise SensorsAnalyticsIllegalDataException("Identity has duplicate key. [key=%s]" % str(duplicate_keys)) + + @staticmethod + def _get_distinct_id(*identities): + if not identities: + return None + distinct_id = "%s+%s" % (identities[0].key, identities[0].value) + for identity in identities: + if SensorsAnalyticsIdentity.LOGIN_ID == identity.key: + distinct_id = identity.value + break + return distinct_id + + def bind(self, first_identity, second_identity, *other_identities): + """ + 绑定用户标识。至少需要提供两个用户标识信息。identity 的数据类型为 SensorsAnalyticsIdentity + + :param first_identity 待绑定的用户标识, + :param second_identity 待绑定的用户标识 + :param other_identities 其他需要绑定的用户标识 + """ + SensorsAnalytics._check_identity_type(first_identity, second_identity, *other_identities) + all_properties = self._super_properties.copy() + self._track_event(EventType.TRACK_ID_BIND.value, "$BindID", + SensorsAnalytics._get_distinct_id(first_identity, second_identity, *other_identities), + None, all_properties, None, first_identity, second_identity, *other_identities) + + def unbind(self, identity): + """ + 解绑用户标识 + :param identity SensorsAnalyticsIdentity + """ + SensorsAnalytics._check_identity_type(identity) + # if identity.key == SensorsAnalyticsIdentity.LOGIN_ID: + # raise SensorsAnalyticsIllegalDataException("Can not unbind login id.") + all_properties = self._super_properties.copy() + self._track_event(EventType.TRACK_ID_UNBIND.value, "$UnbindID", SensorsAnalytics._get_distinct_id(identity), + None, all_properties, None, identity) + + def track_by_id(self, event_name, properties, *identities): + """ + 使用用户标识 3.0 进行事件埋点 + :param event_name 事件名 + :param properties 事件属性,数据类型为 dict + :param identities 用户标识 + """ + SensorsAnalytics._check_identity_type(*identities) + all_properties = self._super_properties.copy() + if properties: + if not isinstance(properties, dict): + raise SensorsAnalyticsIllegalDataException("properties must be a dict type.") + all_properties.update(properties) + self._track_event(EventType.TRACK.value, event_name, + SensorsAnalytics._get_distinct_id(*identities), + None, all_properties, None, *identities) + + def profile_set_by_id(self, properties, *identities): + """ + 设置用户的属性。如果要设置的 properties 的 key,之前在这个用户的 profile 中已经存在,则覆盖,否则,新创建 + :param properties 用户属性 + :param identities 用户标识,类型是 SensorsAnalyticsIdentity + """ + self._profile_options_by_id(EventType.PROFILE_SET, properties, *identities) + + def profile_set_once_by_id(self, properties, *identities): + """ + 首次设置用户的属性。与 profile_set_by_id 接口不同的是:如果被设置的用户属性已存在,则这条记录会被忽略而不会覆盖已有数据,如果属性不存在则会自动创建 + :param properties 用户属性 + :param identities 用户标识,类型是 SensorsAnalyticsIdentity + """ + self._profile_options_by_id(EventType.PROFILE_SET_ONCE, properties, *identities) + + def profile_unset_by_id(self, profile_keys, *identities): + """ + 删除用户某一个属性 + :param profile_keys: 用户属性键值列表 + :param identities 用户标识,类型是 SensorsAnalyticsIdentity + """ + if isinstance(profile_keys, list): + profile_keys = dict((key, True) for key in profile_keys) + else: + raise SensorsAnalyticsIllegalDataException("profile_keys must be a list.") + self._profile_options_by_id(EventType.PROFILE_UNSET, profile_keys, *identities) + + def profile_append_by_id(self, profiles, *identities): + """ + 追加一个用户的某一个或者多个集合类型的 Profile。 + :param profiles 用户属性,其 key 必须是 str 类型,value 必须是 str 集合类型 + :param identities 用户标识,类型是 SensorsAnalyticsIdentity + """ + if isinstance(profiles, dict): + for key, value in profiles.items(): + if not isinstance(value, list): + raise SensorsAnalyticsIllegalDataException("property value must be list. [key=%s]" % str(key)) + else: + raise SensorsAnalyticsIllegalDataException("profiles must be dict type.") + self._profile_options_by_id(EventType.PROFILE_APPEND, profiles, *identities) + + def profile_delete_by_id(self, *identities): + """ + 删除用户的所有属性 + :param identities 用户标识,类型是 SensorsAnalyticsIdentity + """ + self._profile_options_by_id(EventType.PROFILE_DELETE, {}, *identities) + + def profile_increment_by_id(self, profiles, *identities): + """ + 为用户的一个或多个数值类型的属性累加一个数值,若该属性不存在,则创建它并设置默认值为 0。属性取值只接受 Number类型。 + :param profiles 用户属性,类型是 dict,value 必须是 Number 类型 + :param identities 用户标识,可以是 identity、list、tuple + """ + if isinstance(profiles, dict): + for key, value in profiles.items(): + if not is_int(value): + raise SensorsAnalyticsIllegalDataException("property value must be Number. [key=%s]" % str(key)) + else: + raise SensorsAnalyticsIllegalDataException("profiles must be dict type.") + self._profile_options_by_id(EventType.PROFILE_INCREMENT, profiles, *identities) + + def _profile_options_by_id(self, event_type, properties, *identities): + if not (event_type == EventType.PROFILE_DELETE): + if not properties: + raise SensorsAnalyticsIllegalDataException("Properties can not be None or Empty") + if not isinstance(properties, dict): + raise SensorsAnalyticsIllegalDataException("Properties type must be dict") + SensorsAnalytics._check_identity_type(*identities) + self._track_event(event_type.value, None, SensorsAnalytics._get_distinct_id(*identities), None, properties, + None, *identities) + + class DefaultConsumer(object): """ 默认的 Consumer实现,逐条、同步的发送数据给接收服务器。 @@ -467,7 +774,7 @@ def __init__(self, url_prefix, request_timeout=None): 初始化 Consumer。 :param url_prefix: 服务器的 URL 地址。 - :param request_timeout: 请求的超时时间,单位毫秒。 + :param request_timeout: 请求的超时时间,单位为秒。 """ self._url_prefix = url_prefix self._request_timeout = request_timeout @@ -531,7 +838,7 @@ def __init__(self, url_prefix, max_size=50, request_timeout=None): :param url_prefix: 服务器的 URL 地址。 :param max_size: 批量发送的阈值。 - :param request_timeout: 请求服务器的超时时间,单位毫秒。 + :param request_timeout: 请求服务器的超时时间,单位为秒。 :return: """ super(BatchConsumer, self).__init__(url_prefix, request_timeout) @@ -539,21 +846,23 @@ def __init__(self, url_prefix, max_size=50, request_timeout=None): self._max_size = min(50, max_size) def send(self, json_message): - self._buffers.append(json_message) - if len(self._buffers) >= self._max_size: - self.flush() + with batch_consumer_lock: + self._buffers.append(json_message) + if len(self._buffers) >= self._max_size: + self.flush() def flush(self): """ 用户可以主动调用 flush 接口,以便在需要的时候立即进行数据发送。 """ - while self._buffers: - msg_list = self._buffers[:self._max_size] - self._do_request({ - 'data_list': self._encode_msg_list(msg_list), - 'gzip': 1 - }) - self._buffers = self._buffers[self._max_size:] + with batch_consumer_lock: + while self._buffers: + msg_list = self._buffers[:self._max_size] + self._do_request({ + 'data_list': self._encode_msg_list(msg_list), + 'gzip': 1 + }) + self._buffers = self._buffers[self._max_size:] def close(self): """ @@ -610,7 +919,7 @@ def __init__(self, url_prefix, flush_max_time=3, flush_size=20, :param flush_size: 队列缓存的阈值,超过此值将立即进行发送。 :param max_batch_size: 单个请求发送的最大大小。 :param max_size: 整个缓存队列的最大大小。 - :param request_timeout: 请求的超时时间,单位毫秒。 + :param request_timeout: 请求的超时时间,单位为秒。 """ super(AsyncBatchConsumer, self).__init__(url_prefix, request_timeout) @@ -687,13 +996,16 @@ def __init__(self, url_prefix, write_data=True, request_timeout=None): 初始化Consumer :param url_prefix: 服务器提供的用于Debug的API的URL地址,特别注意,它与导入数据的API并不是同一个 :param write_data: 发送过去的数据,是真正写入,还是仅仅进行检查 - :param request_timeout:请求的超时时间,单位毫秒 + :param request_timeout:请求的超时时间,单位为秒 :return: """ debug_url = urlparse(url_prefix) ## 将 URI Path 替换成 Debug 模式的 '/debug' - debug_url = debug_url._replace(path = '/debug') - + url_path = debug_url.path + index = url_path.rfind('/') + debug_url_path = url_path[0:index] + '/debug' + debug_url = debug_url._replace(path=debug_url_path) + self._debug_url_prefix = debug_url.geturl() self._request_timeout = request_timeout self._debug_write_data = write_data @@ -719,7 +1031,7 @@ def _do_request(self, data): encoded_data = urllib.urlencode(data).encode('utf8') try: request = urllib2.Request(self._debug_url_prefix, encoded_data) - if not self._debug_write_data: # 说明只检查,不真正写入数据 + if not self._debug_write_data: # 说明只检查,不真正写入数据 request.add_header('Dry-Run', 'true') if self._request_timeout is not None: response = urllib2.urlopen(request, timeout=self._request_timeout) @@ -796,6 +1108,7 @@ def flush(self): def close(self): self.logger.handlers[0].close() + class ConcurrentLoggingConsumer(object): """ 将数据输出到指定路径,并按天切割,支持多进程并行输出到同一个文件名 @@ -811,7 +1124,7 @@ def close(self): self._file.close() def isValid(self, filename): - return self._filename == filename + return self._filename == filename def write(self, messages): with SAFileLock(self._file): @@ -822,7 +1135,7 @@ def write(self, messages): @classmethod def construct_filename(cls, prefix): - return prefix + '.' + datetime.datetime.now().strftime('%Y-%m-%d') + return prefix + '.' + datetime.datetime.now().strftime('%Y-%m-%d') def __init__(self, prefix, bufferSize=8192): self._prefix = prefix @@ -846,7 +1159,7 @@ def send(self, msg): if len(self._buffer) > self._bufferSize: messages = self._buffer - filename = ConcurrentLoggingConsumer.construct_filename(self._prefix) + filename = ConcurrentLoggingConsumer.construct_filename(self._prefix) if not self._writer.isValid(filename): self._writer.close() self._writer = ConcurrentLoggingConsumer.ConcurrentFileWriter(filename) @@ -854,7 +1167,7 @@ def send(self, msg): self._buffer = [] self._mutex.put(1) - + if messages: self._writer.write(messages) @@ -866,7 +1179,7 @@ def flush(self): if len(self._buffer) > 0: messages = self._buffer - filename = ConcurrentLoggingConsumer.construct_filename(self._prefix) + filename = ConcurrentLoggingConsumer.construct_filename(self._prefix) if not self._writer.isValid(filename): self._writer.close() self._writer = ConcurrentLoggingConsumer.ConcurrentFileWriter(filename) @@ -874,10 +1187,43 @@ def flush(self): self._buffer = [] self._mutex.put(1) - + if messages: self._writer.write(messages) def close(self): self.flush() self._writer.close() + + +# ID-Mapping 3 业务逻辑 +class SensorsAnalyticsIdentity(object): + LOGIN_ID = "$identity_login_id" + """ + 用户登录 id + """ + MOBILE = "$identity_mobile" + """ + 手机号 + """ + EMAIL = "$identity_email" + """ + 邮箱 + """ + + def __init__(self, key, value): + self.key = key + self.value = value + + +class EventType(Enum): + TRACK = "track" + TRACK_SIGNUP = "track_signup" + TRACK_ID_BIND = "track_id_bind" + TRACK_ID_UNBIND = "track_id_unbind" + PROFILE_SET = "profile_set" + PROFILE_SET_ONCE = "profile_set_once" + PROFILE_UNSET = "profile_unset" + PROFILE_APPEND = "profile_append" + PROFILE_DELETE = "profile_delete" + PROFILE_INCREMENT = "profile_increment" diff --git a/sensorsanalytics/test_sdk.py b/sensorsanalytics/test_sdk.py index 3783a04..a5cd4f2 100644 --- a/sensorsanalytics/test_sdk.py +++ b/sensorsanalytics/test_sdk.py @@ -6,8 +6,8 @@ from sdk import * -TEST_URL_PREFIX = 'http://10.10.11.209:8006/sa?token=bbb' -TEST_DEBUG_URL_PREFIX = 'http://10.10.11.209:8006/debug?token=bbb' +TEST_URL_PREFIX = 'https://sdk-test.datasink.sensorsdata.cn/sa?project=yuejianzhong&token=95c73ae661f85aa0' +TEST_DEBUG_URL_PREFIX = 'https://sdk-test.datasink.sensorsdata.cn/sa?project=yuejianzhong&token=95c73ae661f85aa0' class NormalTest(unittest.TestCase): @@ -53,11 +53,14 @@ def clear_msg_counter(self): def testDebug(self): consumer = DebugConsumer(TEST_DEBUG_URL_PREFIX, False) sa = SensorsAnalytics(consumer) - sa.track(1234, 'Test', {'From': 'Baidu'}, is_login_id=True) + sa.track(1234, 'Test', {'From1': 'Baidu'}, is_login_id=True) consumer = DebugConsumer(TEST_DEBUG_URL_PREFIX, True) sa = SensorsAnalytics(consumer) - sa.track(1234, 'Test', {'From': 456}) - sa.track(1234, 'Test', {'From': 'Baidu'}) + sa.track(1234, 'Test', {'From2': 456}) + sa.track(1234, 'Test', {'From1': 'Baidu'}) + sa.track(1234, 'Test', {'From1': 'Baidu', '$project': "yuejianzhong"}) + sa.track(1234, 'Test', {'From1': 'Baidu', '$token': "dhuw393jdcioj39"}) + sa.track(1234, 'Test', {'From1': 'Baidu', '$token': "dhuw393jdcioj39",'$project': "yuejianzhong"}) def testNormal(self): consumer = DefaultConsumer(TEST_URL_PREFIX) @@ -124,7 +127,7 @@ def testDefaultConsumer(self): sa.track('1234', 'Test', {'From': 'Baidu'}) sa.track_signup('1234', 'abcd', {'Channel': 'Hongbao'}) sa.profile_delete('1234') - sa.profile_append('1234', {'Gender': 'Male'}) + sa.profile_append('1234', {'Gender': ['Male']}) sa.profile_increment('1234', {'CardNum': 1}) sa.profile_set('1234', {'City': '北京'}) sa.profile_unset('1234', ['City']) @@ -138,7 +141,7 @@ def testBatchConsumer(self): sa.track('1234', 'Test', {'From': 'Baidu'}) sa.track_signup('1234', 'abcd', {'Channel': 'Hongbao'}) sa.profile_delete('1234') - sa.profile_append('1234', {'Gender': 'Male'}) + sa.profile_append('1234', {'Gender': ['Male']}) self.assertEqual(self.msg_counter, 0) sa.profile_increment('1234', {'CardNum': 1}) self.assertEqual(self.msg_counter, 5) @@ -158,7 +161,7 @@ def testAsyncBatchConsumer(self): sa.track('1234', 'Test', {'From': 'Baidu'}) sa.track_signup('1234', 'abcd', {'Channel': 'Hongbao'}) sa.profile_delete('1234') - sa.profile_append('1234', {'Gender': 'Male'}) + sa.profile_append('1234', {'Gender': ["male", "femal"]}) self.assertEqual(self.msg_counter, 0) sa.profile_increment('1234', {'CardNum': 1}) time.sleep(0.1) @@ -177,6 +180,34 @@ def testAsyncBatchConsumer(self): time.sleep(0.1) self.assertEqual(self.msg_counter, 9) + def testIDM3(self): + consumer = DefaultConsumer(TEST_URL_PREFIX) + # consumer._do_request = self.mock_request + sa = SensorsAnalytics(consumer) + + # sa.bind(SensorsAnalyticsIdentity("s1", "sv1"), SensorsAnalyticsIdentity("s2", "sv2"), SensorsAnalyticsIdentity("s3", "sv3")) + # sa.bind(SensorsAnalyticsIdentity("s1", "sv1"), SensorsAnalyticsIdentity("s2", "sv2"), SensorsAnalyticsIdentity(SensorsAnalyticsIdentity.LOGIN_ID, "sv3")) + # sa.unbind(SensorsAnalyticsIdentity(SensorsAnalyticsIdentity.EMAIL, "sv1")) + + # sa.track_by_id("hello", None, SensorsAnalyticsIdentity(SensorsAnalyticsIdentity.LOGIN_ID, "sv1"), SensorsAnalyticsIdentity("s2", "sv2")) + + # sa.profile_set_by_id({"p1": "v1"}, SensorsAnalyticsIdentity("s1", "sv1")) + # sa.profile_unset_by_id(["k1", "k2"], SensorsAnalyticsIdentity("s1", "sv1")) + + # sa.profile_append("sss", {"k1": "ss"}, False) + + # sa.profile_append_by_id({"k1": ["a1", "a2", "a3"]}, SensorsAnalyticsIdentity("s1", "sv1"), + # SensorsAnalyticsIdentity("s2", "sv2")) + + # sa.profile_delete_by_id(SensorsAnalyticsIdentity("s1", "sv1"), + # SensorsAnalyticsIdentity("s2", "sv2")) + + # sa.profile_increment_by_id({"age": "123"}, SensorsAnalyticsIdentity("s1", "sv1")) + + sa.flush() + time.sleep(2) + pass + if __name__ == '__main__': unittest.main() diff --git a/setup.py b/setup.py index 436a138..828babb 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,16 @@ -from distutils.core import setup +import setuptools -setup( - name='SensorsAnalyticsSDK', - version='1.7.5', - author='Yuhan ZOU', - author_email='zouyuhan@sensorsdata.cn', - url='http://www.sensorsdata.cn', - license='LICENSE.txt', - packages=['sensorsanalytics'], - description='This is the official Python SDK for Sensors Analytics.', - long_description=open('README.txt').read(), -) +# 读取项目的readme介绍 +with open("README.md", "r") as fh: + long_description = fh.read() +setuptools.setup( + name="SensorsAnalyticsSDK", + version="1.11.0", + author="Jianzhong YUE", # 项目作者 + author_email="yuejianzhong@sensorsdata.cn", + description="This is the official Python SDK for Sensors Analytics.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/sensorsdata/sa-sdk-python", + packages=setuptools.find_packages(), +) \ No newline at end of file