diff --git a/meltano.yml b/meltano.yml index b5ca9781..c3fde50a 100644 --- a/meltano.yml +++ b/meltano.yml @@ -19,6 +19,7 @@ plugins: kind: string - name: auth_token kind: password + sensitive: true - name: additional_auth_tokens kind: array - name: auth_app_keys @@ -37,6 +38,8 @@ plugins: kind: array - name: user_ids kind: array + - name: issues_check_transfer + kind: array - name: stream_options.milestones.state kind: options options: diff --git a/poetry.lock b/poetry.lock index 7103b5f4..18688130 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.0.0 and should not be changed by hand. [[package]] name = "appdirs" @@ -6,6 +6,7 @@ version = "1.4.4" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"}, {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"}, @@ -17,6 +18,7 @@ version = "25.1.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"}, {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"}, @@ -36,6 +38,8 @@ version = "2.2.1" description = "Function decoration for backoff and retry" optional = false python-versions = ">=3.7,<4.0" +groups = ["main"] +markers = "python_version < \"4\"" files = [ {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, @@ -47,6 +51,8 @@ version = "2.0.3" description = "Backport of Python 3.11's datetime.fromisoformat" optional = false python-versions = ">3" +groups = ["main"] +markers = "python_version < \"3.11\"" files = [ {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f681f638f10588fa3c101ee9ae2b63d3734713202ddfcfb6ec6cea0778a29d4"}, {file = "backports_datetime_fromisoformat-2.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:cd681460e9142f1249408e5aee6d178c6d89b49e06d44913c8fdfb6defda8d1c"}, @@ -103,6 +109,7 @@ version = "4.12.3" description = "Screen-scraping library" optional = false python-versions = ">=3.6.0" +groups = ["main"] files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, @@ -124,6 +131,7 @@ version = "24.1.2" description = "Composable complex class support for attrs and dataclasses." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "cattrs-24.1.2-py3-none-any.whl", hash = "sha256:67c7495b760168d931a10233f979b28dc04daf853b30752246f4f8471c6d68d0"}, {file = "cattrs-24.1.2.tar.gz", hash = "sha256:8028cfe1ff5382df59dd36474a86e02d817b06eaf8af84555441bac915d2ef85"}, @@ -150,6 +158,7 @@ version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, @@ -161,6 +170,8 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "python_full_version > \"3.9.0\" and python_full_version != \"3.9.1\" and platform_python_implementation != \"PyPy\"" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -240,6 +251,7 @@ version = "3.4.1" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7" +groups = ["main", "dev"] files = [ {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, @@ -341,6 +353,7 @@ version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, @@ -355,10 +368,12 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} [[package]] name = "cryptography" @@ -366,6 +381,8 @@ version = "44.0.0" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = "!=3.9.0,!=3.9.1,>=3.7" +groups = ["main"] +markers = "python_full_version > \"3.9.0\" and python_full_version != \"3.9.1\"" files = [ {file = "cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123"}, {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092"}, @@ -415,6 +432,8 @@ version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -429,6 +448,7 @@ version = "2.4.16" description = "Python's filesystem abstraction layer" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "fs-2.4.16-py2.py3-none-any.whl", hash = "sha256:660064febbccda264ae0b6bace80a8d1be9e089e0a5eb2427b7d517f9a91545c"}, {file = "fs-2.4.16.tar.gz", hash = "sha256:ae97c7d51213f4b70b6a958292530289090de3a7e15841e108fbe144f069d313"}, @@ -448,6 +468,7 @@ version = "2024.12.0" description = "File-system specification" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2"}, {file = "fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f"}, @@ -487,6 +508,8 @@ version = "3.1.1" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" +groups = ["main"] +markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, @@ -573,6 +596,7 @@ version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" +groups = ["main", "dev"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -587,6 +611,8 @@ version = "8.6.1" description = "Read metadata from Python packages" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "python_version < \"3.12\"" files = [ {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"}, {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"}, @@ -610,6 +636,8 @@ version = "6.5.2" description = "Read resources from Python packages" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "python_version < \"3.10\"" files = [ {file = "importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec"}, {file = "importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c"}, @@ -632,6 +660,7 @@ version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, @@ -643,6 +672,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["dev"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -654,6 +684,7 @@ version = "1.4.2" description = "Lightweight pipelining with Python functions" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, @@ -665,6 +696,7 @@ version = "1.7.0" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"}, {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"}, @@ -680,6 +712,7 @@ version = "4.23.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, @@ -701,6 +734,7 @@ version = "2024.10.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, @@ -715,6 +749,7 @@ version = "0.2.25" description = "Python functions for working with deeply nested documents (lists and dicts)" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "nested-lookup-0.2.25.tar.gz", hash = "sha256:6fa832748c90381f2291d850809e32492519ee5f253d6a5acbc29d937eca02e8"}, ] @@ -728,6 +763,7 @@ version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, @@ -739,6 +775,7 @@ version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, @@ -755,6 +792,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -770,6 +808,7 @@ version = "3.11" description = "Python Lex & Yacc" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, @@ -781,6 +820,8 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" +groups = ["main"] +markers = "python_full_version > \"3.9.0\" and python_full_version != \"3.9.1\" and platform_python_implementation != \"PyPy\"" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -792,6 +833,7 @@ version = "2.10.1" description = "JSON Web Token implementation in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb"}, {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, @@ -809,6 +851,7 @@ version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, @@ -831,6 +874,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -845,6 +889,7 @@ version = "1.0.1" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, @@ -853,12 +898,25 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "pytz" +version = "2025.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, + {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, +] + [[package]] name = "pyyaml" version = "6.0.2" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, @@ -921,6 +979,7 @@ version = "0.36.2" description = "JSON Referencing + Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, @@ -937,6 +996,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -958,6 +1018,7 @@ version = "1.2.1" description = "A persistent cache for python requests" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "requests_cache-1.2.1-py3-none-any.whl", hash = "sha256:1285151cddf5331067baa82598afe2d47c7495a1334bfe7a7d329b43e9fd3603"}, {file = "requests_cache-1.2.1.tar.gz", hash = "sha256:68abc986fdc5b8d0911318fbb5f7c80eebcd4d01bfacc6685ecf8876052511d1"}, @@ -988,6 +1049,7 @@ version = "0.22.3" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "rpds_py-0.22.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6c7b99ca52c2c1752b544e310101b98a659b720b21db00e65edca34483259967"}, {file = "rpds_py-0.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be2eb3f2495ba669d2a985f9b426c1797b7d48d6963899276d22f23e33d47e37"}, @@ -1100,6 +1162,7 @@ version = "70.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, @@ -1115,6 +1178,7 @@ version = "1.0.3" description = "A simple, safe single expression evaluator library." optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "simpleeval-1.0.3-py3-none-any.whl", hash = "sha256:e3bdbb8c82c26297c9a153902d0fd1858a6c3774bf53ff4f134788c3f2035c38"}, {file = "simpleeval-1.0.3.tar.gz", hash = "sha256:67bbf246040ac3b57c29cf048657b9cf31d4e7b9d6659684daa08ca8f1e45829"}, @@ -1126,6 +1190,7 @@ version = "3.19.3" description = "Simple, fast, extensible JSON encoder/decoder for Python" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.5" +groups = ["main"] files = [ {file = "simplejson-3.19.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f39caec26007a2d0efab6b8b1d74873ede9351962707afab622cc2285dd26ed0"}, {file = "simplejson-3.19.3-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:83c87706265ae3028e8460d08b05f30254c569772e859e5ba61fe8af2c883468"}, @@ -1245,6 +1310,8 @@ version = "0.44.0" description = "A framework for building Singer taps" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "python_version >= \"3.10\"" files = [ {file = "singer_sdk-0.44.0-py3-none-any.whl", hash = "sha256:15d896edae6e16c2614e5211f52546a63ea58cd88578e61efc259dfefc032972"}, {file = "singer_sdk-0.44.0.tar.gz", hash = "sha256:050ab063188a0d6f6989456d0d3d9c52cb6da706950ec6f8d8ad1a60ecae7a1b"}, @@ -1272,10 +1339,7 @@ simpleeval = ">=0.9.13,<1.0.1 || >1.0.1" simplejson = ">=3.17.6" sqlalchemy = ">=1.4,<3.0" typing-extensions = ">=4.5.0" -urllib3 = [ - {version = "<2", markers = "python_version < \"3.10\""}, - {version = "*", markers = "python_version >= \"3.10\""}, -] +urllib3 = {version = "*", markers = "python_version >= \"3.10\""} [package.extras] docs = ["furo (>=2024.5.6)", "myst-parser (>=3)", "pytest (>=7.2.1)", "sphinx (>=7)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2023.4.21)", "sphinx-notfound-page (>=1.0.0)", "sphinx-reredirects (>=0.1.5)"] @@ -1286,12 +1350,58 @@ s3 = ["fs-s3fs (>=1.1.1)", "s3fs (>=2024.9.0)"] ssh = ["paramiko (>=3.3.0)"] testing = ["pytest (>=7.2.1)"] +[[package]] +name = "singer-sdk" +version = "0.44.3" +description = "A framework for building Singer taps" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version < \"3.10\"" +files = [ + {file = "singer_sdk-0.44.3-py3-none-any.whl", hash = "sha256:25cdde6552750485aaa0c98f5c5ba6bdd7e4c69e982c7d4c2631c6df3e64b1ae"}, + {file = "singer_sdk-0.44.3.tar.gz", hash = "sha256:f2d7eacb4a05270613dc7c02cdcef383f3146e859a14dbdb7c207c90466c7701"}, +] + +[package.dependencies] +backoff = {version = ">=2.0.0", markers = "python_version < \"4\""} +backports-datetime-fromisoformat = {version = ">=2.0.1", markers = "python_version < \"3.11\""} +click = ">=8.0,<9.0" +fs = ">=2.4.16" +fsspec = ">=2024.9.0" +importlib-metadata = {version = "<9.0.0", markers = "python_version < \"3.12\""} +importlib-resources = {version = ">=5.12.0,<6.2.0 || >6.2.0,<6.3.0 || >6.3.0,<6.3.1 || >6.3.1", markers = "python_version < \"3.10\""} +inflection = ">=0.5.1" +joblib = ">=1.3.0" +jsonpath-ng = ">=1.5.3" +jsonschema = ">=4.16.0" +packaging = ">=23.1" +python-dotenv = ">=0.20" +pyyaml = ">=6.0" +referencing = ">=0.30.0" +requests = ">=2.25.1" +setuptools = "<=70.3.0" +simpleeval = ">=0.9.13,<1.0.1 || >1.0.1" +simplejson = ">=3.17.6" +sqlalchemy = ">=1.4,<3.0" +typing-extensions = ">=4.5.0" + +[package.extras] +docs = ["furo (>=2024.5.6)", "myst-parser (>=3)", "pytest (>=7.2.1)", "sphinx (>=7)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2023.4.21)", "sphinx-notfound-page (>=1.0.0)", "sphinx-reredirects (>=0.1.5)"] +faker = ["faker (>=22.5)"] +jwt = ["cryptography (>=3.4.6)", "pyjwt (>=2.4,<3.0)"] +parquet = ["numpy (>=1.22)", "pyarrow (>=13)"] +s3 = ["fs-s3fs (>=1.1.1)", "s3fs (>=2024.9.0)"] +ssh = ["paramiko (>=3.3.0)"] +testing = ["pytest (>=7.2.1)"] + [[package]] name = "six" version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -1303,6 +1413,7 @@ version = "2.6" description = "A modern CSS selector implementation for Beautiful Soup." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, @@ -1314,6 +1425,7 @@ version = "2.0.37" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da36c3b0e891808a7542c5c89f224520b9a16c7f5e4d6a1156955605e54aef0e"}, {file = "SQLAlchemy-2.0.37-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e7402ff96e2b073a98ef6d6142796426d705addd27b9d26c3b32dbaa06d7d069"}, @@ -1409,6 +1521,8 @@ version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -1450,6 +1564,7 @@ version = "4.12.0.20241020" description = "Typing stubs for beautifulsoup4" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-beautifulsoup4-4.12.0.20241020.tar.gz", hash = "sha256:158370d08d0cd448bd11b132a50ff5279237a5d4b5837beba074de152a513059"}, {file = "types_beautifulsoup4-4.12.0.20241020-py3-none-any.whl", hash = "sha256:c95e66ce15a4f5f0835f7fbc5cd886321ae8294f977c495424eaf4225307fd30"}, @@ -1464,6 +1579,7 @@ version = "1.1.11.20241018" description = "Typing stubs for html5lib" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa"}, {file = "types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403"}, @@ -1475,31 +1591,19 @@ version = "2.9.0.20241206" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53"}, {file = "types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb"}, ] -[[package]] -name = "types-requests" -version = "2.31.0.6" -description = "Typing stubs for requests" -optional = false -python-versions = ">=3.7" -files = [ - {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"}, - {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"}, -] - -[package.dependencies] -types-urllib3 = "*" - [[package]] name = "types-requests" version = "2.32.0.20241016" description = "Typing stubs for requests" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"}, {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"}, @@ -1514,32 +1618,24 @@ version = "3.19.0.20241221" description = "Typing stubs for simplejson" optional = false python-versions = ">=3.8" +groups = ["dev"] files = [ {file = "types_simplejson-3.19.0.20241221-py3-none-any.whl", hash = "sha256:179dfaef8c357156c781fa47cfdfcd953a7953fc375dfe9ab19a20054a828980"}, {file = "types_simplejson-3.19.0.20241221.tar.gz", hash = "sha256:114af9db0f49ad15755d2b6ad8e6fd04b5a493815e2fc1e011729d4650defc70"}, ] -[[package]] -name = "types-urllib3" -version = "1.26.25.14" -description = "Typing stubs for urllib3" -optional = false -python-versions = "*" -files = [ - {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"}, - {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"}, -] - [[package]] name = "typing-extensions" version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +markers = {dev = "python_version < \"3.11\""} [[package]] name = "url-normalize" @@ -1547,6 +1643,7 @@ version = "1.4.3" description = "URL normalization for Python" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +groups = ["dev"] files = [ {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, @@ -1555,28 +1652,13 @@ files = [ [package.dependencies] six = "*" -[[package]] -name = "urllib3" -version = "1.26.20" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, - {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, -] - -[package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - [[package]] name = "urllib3" version = "2.3.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" +groups = ["main", "dev"] files = [ {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, @@ -1594,6 +1676,8 @@ version = "3.21.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.9" +groups = ["main"] +markers = "python_version < \"3.12\"" files = [ {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, @@ -1608,6 +1692,6 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = ">=3.9" -content-hash = "0f44403abfb5026eb350efeb24bae9360d5c1e735d247b4426ec80c18b428cc8" +content-hash = "88fcc1afffeecf6f6a8399f78f16074b8c6632dea218af1f8e470459a5c93f0f" diff --git a/pyproject.toml b/pyproject.toml index a9738670..de251421 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ requests = "~=2.32.3" # For local SDK dev: # singer-sdk = {path = "../singer-sdk", develop = true} singer-sdk = "~=0.44.0" +pytz = "^2025.1" [tool.poetry.group.dev.dependencies] pytest = ">=7.3.1" diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index 4e5f512e..21c5e065 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -353,10 +353,6 @@ def get_next_auth_token(self) -> None: self.logger.info("Switching to fresh auth token") return - raise RuntimeError( - "All GitHub tokens have hit their rate limit. Stopping here." - ) - def update_rate_limit( self, response_headers: requests.models.CaseInsensitiveDict ) -> None: diff --git a/tap_github/client.py b/tap_github/client.py index b4ff6a03..9c06a688 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -5,9 +5,10 @@ import email.utils import inspect import random +import backoff import time from types import FrameType -from typing import TYPE_CHECKING, Any, ClassVar, cast +from typing import TYPE_CHECKING, Any, ClassVar, cast, Generator from urllib.parse import parse_qs, urlparse from dateutil.parser import parse @@ -35,11 +36,15 @@ class GitHubRestStream(RESTStream): DEFAULT_API_BASE_URL = "https://api.github.com" LOG_REQUEST_METRIC_URLS = True + # GitHub is missing the "since" parameter on a few endpoints # set this parameter to True if your stream needs to navigate data in descending order # noqa: E501 # and try to exit early on its own. # This only has effect on streams whose `replication_key` is `updated_at`. use_fake_since_parameter = False + + # Some endpoints can benefit from cursor pagination. + use_cursor_paging = False _authenticator: GitHubTokenAuthenticator | None = None @@ -128,14 +133,27 @@ def get_next_page_token( # Use header links returned by the GitHub API. parsed_url = urlparse(response.links["next"]["url"]) - captured_page_value_list = parse_qs(parsed_url.query).get("page") - next_page_string = ( - captured_page_value_list[0] if captured_page_value_list else None - ) - if next_page_string and next_page_string.isdigit(): - return int(next_page_string) + + if self.use_cursor_paging: + captured_after = parse_qs(parsed_url.query).get("after") + self.logger.warning( + f"Captured after cursor: {captured_after} for url: {response.links['next']['url']} - Previous token: {previous_token}" # noqa: E501 + ) - return (previous_token or 1) + 1 + if captured_after and captured_after != previous_token: + return captured_after[0] + # If no "after" cursor is found or the same token was found, + # we return None to stop pagination. + return None + else: + captured_page_value_list = parse_qs(parsed_url.query).get("page") + next_page_string = ( + captured_page_value_list[0] if captured_page_value_list else None + ) + if next_page_string and next_page_string.isdigit(): + return int(next_page_string) + + return (previous_token or 1) + 1 def get_url_params( self, @@ -145,7 +163,11 @@ def get_url_params( """Return a dictionary of values to be used in URL parameterization.""" params: dict = {"per_page": self.MAX_PER_PAGE} if next_page_token: - params["page"] = next_page_token + if self.use_cursor_paging: + # If using cursor pagination, we need to pass the next page token as a parameter. + params["after"] = next_page_token + else: + params["page"] = next_page_token if self.replication_key == "updated_at": params["sort"] = "updated" @@ -214,9 +236,10 @@ def validate_response(self, response: requests.Response) -> None: response.status_code == 403 and "rate limit exceeded" in str(response.content).lower() ): + # Wait about 15 minutes and retry + time.sleep((60 * 15) + 30 * random.random()) # Update token self.authenticator.get_next_auth_token() - # Raise an error to force a retry with the new token. raise RetriableAPIError(msg, response) # Retry on secondary rate limit @@ -231,9 +254,10 @@ def validate_response(self, response: requests.Response) -> None: # The GitHub API randomly returns 401 Unauthorized errors, so we try again. if ( response.status_code == 401 - # if the token is invalid, we are also told about it - and "bad credentials" not in str(response.content).lower() ): + if "bad credentials" in str(response.content).lower(): + # Update token + self.authenticator.get_next_auth_token() raise RetriableAPIError(msg, response) # all other errors are fatal @@ -305,10 +329,77 @@ def calculate_sync_cost( """Return the cost of the last REST API call.""" return {"rest": 1, "graphql": 0, "search": 0} + def backoff_max_tries(self) -> int: # noqa: PLR6301 + """The number of attempts before giving up when retrying requests. + """ + return 15 + + + def backoff_wait_generator(self) -> Generator[float, None, None]: # noqa: PLR6301 + """The wait generator used by the backoff decorator on request failure. + + Using higher factor (8) and longer max_value (300) to provide more spacing between + retry attempts to better handle rate limits. + """ + return backoff.expo(factor=8, max_value=300) class GitHubGraphqlStream(GraphQLStream, GitHubRestStream): """GitHub Graphql stream class.""" + tolerated_graphql_error_types = ["NOT_FOUND", "FORBIDDEN"] + + def check_rate_limits(self, headers: dict = None) -> None: + """Check rate limits from response headers and pause if we're near the limit. + + GitHub GraphQL API provides rate limit information in response headers. + This method checks those headers and adds a delay if needed. + + Args: + headers: Headers from a previous response. If not provided, + function will return without checking. + """ + if not headers: + return + + try: + # Extract rate limit information from headers + limit = int(headers.get("X-RateLimit-Limit", "5000")) + remaining = int(headers.get("X-RateLimit-Remaining", "5000")) + reset_time = headers.get("X-RateLimit-Reset", "") + used = int(headers.get("X-RateLimit-Used", "0")) + resource = headers.get("X-RateLimit-Resource", "graphql") + + # Calculate reset time in human-readable format + reset_datetime = "" + if reset_time: + try: + import datetime + reset_datetime = datetime.datetime.fromtimestamp(int(reset_time)).strftime("%Y-%m-%d %H:%M:%S") + except Exception: + reset_datetime = reset_time # Use raw value if conversion fails + + # Log the current rate limit status + self.logger.info( + f"Rate limit status for {resource}: " + f"{remaining}/{limit} remaining, {used} used. " + f"Resets at {reset_datetime}" + ) + + # If less than 10% of points remain, pause to avoid hitting limits + if remaining < (limit * 0.1): + self.logger.warning( + f"Rate limit for {resource} is getting low: " + f"{remaining}/{limit} remaining. Resets at {reset_datetime}" + ) + # Calculate a wait time with random jitter to avoid all clients hitting at once + wait_time = 60 + random.uniform(0, 30) + self.logger.info(f"Waiting for {wait_time:.2f} seconds before continuing...") + time.sleep(wait_time) + except Exception as e: + # If checking rate limits fails, log but continue + self.logger.warning(f"Failed to check rate limits from headers: {e}") + # Don't raise the exception as this is just a precautionary check + @property def url_base(self) -> str: return f'{self.config.get("api_url_base", self.DEFAULT_API_BASE_URL)}/graphql' @@ -328,6 +419,9 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: .. _requests.Response: https://docs.python-requests.org/en/latest/api/#requests.Response """ + # Check rate limits from response headers + self.check_rate_limits(response.headers) + resp_json = response.json() yield from extract_jsonpath(self.query_jsonpath, input=resp_json) @@ -393,6 +487,12 @@ def get_next_page_token( next_page_cursor = next( cursor for cursor in next_page_end_cursor_results if cursor is not None ) + + # Prevent pagination loops - if cursor is the same as before, stop pagination + if previous_token and next_page_key in previous_token and previous_token[next_page_key] == next_page_cursor: + self.logger.warning(f"Identical pagination token detected: {next_page_cursor}. Stopping pagination.") + return None + next_page_cursors[next_page_key] = next_page_cursor return next_page_cursors @@ -450,4 +550,25 @@ def validate_response(self, response: requests.Response) -> None: rj = response.json() if "errors" in rj: msg = rj["errors"] - raise FatalAPIError(f"Graphql error: {msg}", response) + + # First check for rate limiting errors specifically + for error in rj["errors"]: + if error.get("type") == "RATE_LIMITED": + self.logger.warning(f"Rate limited: {error['message']}. Waiting and will retry...") + # Wait for 60 seconds + some random jitter (up to 30 seconds) + wait_time = 60 + random.uniform(0, 30) + self.logger.info(f"Waiting for {wait_time:.2f} seconds before retrying...") + time.sleep(wait_time) + # Update token to possibly use a different one + self.authenticator.get_next_auth_token() + raise RetriableAPIError(f"GraphQL rate limit exceeded: {error['message']}", response) + + # Then check for other errors + for error in rj["errors"]: + if error.get("type") in self.tolerated_graphql_error_types: + self.logger.info( + f"Tolerated Graphql Error: {error['message']} for path: {response.url}" + ) + else: + raise FatalAPIError(f"Graphql error: {msg}", response) + diff --git a/tap_github/issue_transfer_streams.py b/tap_github/issue_transfer_streams.py new file mode 100644 index 00000000..c873b8ec --- /dev/null +++ b/tap_github/issue_transfer_streams.py @@ -0,0 +1,74 @@ +"""Repository Stream types classes for tap-github.""" + +from typing import Any, Dict, Iterable, List, Optional, Tuple +from urllib.parse import parse_qs, urlparse + +import requests +from dateutil.parser import parse +from singer_sdk import typing as th # JSON Schema typing helpers +from singer_sdk.exceptions import FatalAPIError +from singer_sdk.helpers.jsonpath import extract_jsonpath + +from tap_github.client import GitHubGraphqlStream, GitHubRestStream +from tap_github.schema_objects import ( + label_object, + milestone_object, + reactions_object, + user_object, +) + + +class IssueTransfersStream(GitHubRestStream): + """Defines 'IssueTransfers' stream which returns Issues that have been transferred to another repository.""" + + name = "issue_transfers" + path = "/repos/{org}/{repo}/issues/{issue_number}" + primary_keys = ["id"] + # Do not fail if the issue has been transferred from a deleted repository + tolerated_http_errors = [404] + + @property + def partitions(self) -> Optional[List[Dict]]: + """Return a list of partitions.""" + if "issues_check_transfer" in self.config: + issues_check = [] + for issue in self.config["issues_check_transfer"]: + issue_data = issue.split("|") + issues_check.append( + { + "org": issue_data[0], + "repo": issue_data[1], + "issue_number": issue_data[2], + } + ) + return issues_check + return None + + def get_url_params( + self, context: Optional[Dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization.""" + assert context is not None, f"Context cannot be empty for '{self.name}' stream." + params = super().get_url_params(context, next_page_token) + return params + + def post_process(self, row: dict, context: Optional[Dict] = None) -> dict: + row = super().post_process(row, context) + + requested_url = f"https://api.github.com/repos/{context['org']}/{context['repo']}/issues/{context['issue_number']}" + if requested_url == row["url"]: + return None + outData = { + "org": context["org"], + "repo": context["repo"], + "issue_number": int(context["issue_number"]), + "transferred_to_url": row["url"], + } + return outData + + schema = th.PropertiesList( + th.Property("repo", th.StringType), + th.Property("org", th.StringType), + th.Property("issue_number", th.IntegerType), + th.Property("transferred_to_url", th.StringType), + ).to_dict() diff --git a/tap_github/organization_streams.py b/tap_github/organization_streams.py index 66950b1f..f01cdf74 100644 --- a/tap_github/organization_streams.py +++ b/tap_github/organization_streams.py @@ -4,9 +4,11 @@ from typing import TYPE_CHECKING, Any, ClassVar +import requests + from singer_sdk import typing as th # JSON Schema typing helpers -from tap_github.client import GitHubRestStream +from tap_github.client import GitHubGraphqlStream, GitHubRestStream if TYPE_CHECKING: from collections.abc import Iterable @@ -182,3 +184,459 @@ class TeamRolesStream(GitHubRestStream): th.Property("role", th.StringType), th.Property("state", th.StringType), ).to_dict() + +class OrganizationProjectsV2Stream(GitHubGraphqlStream): + """Defines the 'projects_v2' stream for GitHub Projects at org level.""" + + name = "projects_v2" + parent_stream_type = OrganizationStream + ignore_parent_replication_key = True + state_partitioning_keys: ClassVar[list[str]] = ["org"] + primary_keys: ClassVar[list[str]] = ["id"] + replication_key = "updatedAt" + + def get_url_params( + self, + context: dict | None, + next_page_token: Any | None, # noqa: ANN401 + ) -> dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization.""" + return super().get_url_params(context, next_page_token) + + def post_process(self, row: dict, context: dict | None = None) -> dict: + """Process organization project record.""" + row = super().post_process(row, context) + self.logger.debug(f"Processing organization project: {row.get('id')} - {row.get('title')}") + return row + + def get_child_context(self, record: dict, context: dict | None) -> dict: + """Return a context dictionary for child streams.""" + if context is None: + context = {} + + self.logger.debug(f"Creating child context from project record: {record.get('id')} - {record.get('title')}") + return { + "org": record.get("org", context.get("org")), + "project_id": record.get("id") + } + + @property + def query_jsonpath(self) -> str: + """Return the jsonpath for the query results based on configuration.""" + return "$.data.organization.projectsV2.nodes[*]" + + @property + def query(self) -> str: + """Return the GraphQL query for Organization ProjectsV2.""" + return """ + query($org: String!, $per_page: Int!, $nextPageCursor_0: String) { + organization(login: $org) { + projectsV2(first: $per_page, after: $nextPageCursor_0) { + nodes { + id + databaseId + number + title + shortDescription + url + public + closed + createdAt + updatedAt + creator { + login + __typename + } + } + pageInfo { + hasNextPage_0: hasNextPage + endCursor_0: endCursor + startCursor_0: startCursor + } + } + } + } + """ + + schema = th.PropertiesList( + # Parent keys + th.Property("org", th.StringType), + # Rest + th.Property("id", th.StringType), + th.Property("databaseId", th.IntegerType), + th.Property("number", th.IntegerType), + th.Property("title", th.StringType), + th.Property("shortDescription", th.StringType), + th.Property("url", th.StringType), + th.Property("public", th.BooleanType), + th.Property("closed", th.BooleanType), + th.Property("createdAt", th.DateTimeType), + th.Property("updatedAt", th.DateTimeType), + th.Property( + "creator", + th.ObjectType( + th.Property("login", th.StringType), + th.Property("__typename", th.StringType), + ), + ), + ).to_dict() + + +class ProjectV2ItemsStream(GitHubGraphqlStream): + """Defines the 'project_v2_items' stream for GitHub Project items.""" + + MAX_PER_PAGE = 20 + name = "project_v2_items" + parent_stream_type = OrganizationProjectsV2Stream + ignore_parent_replication_key = True + primary_keys: ClassVar[list[str]] = ["id"] + replication_key = "updatedAt" + state_partitioning_keys: ClassVar[list[str]] = ["org", "project_id"] + + query_jsonpath = "$.data.node.items.nodes[*]" + + def get_child_context(self, record: dict, context: dict | None) -> dict: + """Return a context dictionary for child streams.""" + if context is None: + context = {} + + return { + "org": record.get("org", context.get("org")), + "project_id": record.get("id") + } + + @property + def query(self) -> str: + """Return the GraphQL query for ProjectV2Items.""" + return """ + query($project_id: ID!, $per_page: Int!, $nextPageCursor_0: String) { + node(id: $project_id) { + ... on ProjectV2 { + items(first: $per_page, after: $nextPageCursor_0) { + nodes { + id + type + fieldValues(first: 5) { + nodes { + ... on ProjectV2ItemFieldTextValue { + text + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldDateValue { + date + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldSingleSelectValue { + name + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldNumberValue { + number + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldIterationValue { + title + startDate + duration + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldLabelValue { + labels(first: 5) { + nodes { + name + color + } + } + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldMilestoneValue { + milestone { + title + dueOn + } + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldPullRequestValue { + pullRequests(first: 5) { + nodes { + title + number + } + } + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldRepositoryValue { + repository { + name + owner { + login + } + } + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + ... on ProjectV2ItemFieldUserValue { + users(first: 5) { + nodes { + login + } + } + field { + ... on ProjectV2FieldCommon { + name + id + } + } + } + } + } + content { + ... on Issue { + id + number + title + url + state + repository { + id + databaseId + name + owner { + login + } + } + } + ... on PullRequest { + id + number + title + url + state + repository { + id + databaseId + name + owner { + login + } + } + } + ... on DraftIssue { + id + title + } + } + createdAt + updatedAt + } + pageInfo { + hasNextPage_0: hasNextPage + endCursor_0: endCursor + startCursor_0: startCursor + } + } + } + } + } + """ + + def get_url_params( + self, + context: dict | None, + next_page_token: Any | None, # noqa: ANN401 + ) -> dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization.""" + params = super().get_url_params(context, next_page_token) + + if context is None: + self.logger.error("Context is None when trying to get project_id for API call") + raise ValueError("Context is required for ProjectV2ItemsStream") + + if "project_id" not in context: + self.logger.error(f"Missing project_id in context: {context}") + raise ValueError("project_id is required in context for ProjectV2ItemsStream") + + params["project_id"] = context["project_id"] + return params + + def get_next_page_token( + self, + response: requests.Response, + previous_token: Any | None, # noqa: ANN401 + ) -> Any | None: # noqa: ANN401 + """Return a token for identifying next page or None if no more pages.""" + next_token = super().get_next_page_token(response, previous_token) + + # Extra handling for pagination issues + if next_token == previous_token: + self.logger.warning(f"Identical pagination token detected: {next_token}. Stopping pagination.") + return None + + return next_token + + def post_process(self, row: dict, context: dict | None = None) -> dict: + """Process ProjectV2 item records.""" + assert context is not None, f"Context cannot be empty for '{self.name}' stream." + row = super().post_process(row, context) + + if "project_id" in context: + row["project_id"] = context["project_id"] + else: + self.logger.warning(f"Missing project_id in context during post_process: {context}") + + # Extract repository ID if available + if row.get("content") and row["content"].get("repository"): + # Use the numeric database ID for consistency with other streams + if "databaseId" in row["content"]["repository"]: + row["repo_id"] = row["content"]["repository"]["databaseId"] + + # Also add repository name and owner for convenience + if "name" in row["content"]["repository"]: + row["repo"] = row["content"]["repository"]["name"] + + if "owner" in row["content"]["repository"] and "login" in row["content"]["repository"]["owner"]: + row["repo_owner"] = row["content"]["repository"]["owner"]["login"] + + # Process field values into a more usable structure + if row.get("fieldValues") and row["fieldValues"].get("nodes"): + # Create a fields object to hold field values in a more accessible format + row["fields"] = {} + + for field_value in row["fieldValues"]["nodes"]: + if not field_value: + # Skip empty field values + continue + + if field_value.get("field") and field_value["field"].get("name"): + field_name = field_value["field"]["name"] + + # Extract the appropriate value based on field type + if "text" in field_value and field_value["text"] is not None: + row["fields"][field_name] = field_value["text"] + elif "date" in field_value and field_value["date"] is not None: + row["fields"][field_name] = field_value["date"] + elif "name" in field_value and field_value["name"] is not None: + row["fields"][field_name] = field_value["name"] + elif "number" in field_value and field_value["number"] is not None: + row["fields"][field_name] = field_value["number"] + elif "title" in field_value and field_value["title"] is not None: + # For iteration fields, we combine relevant info + if "startDate" in field_value and "duration" in field_value: + row["fields"][field_name] = { + "title": field_value["title"], + "startDate": field_value["startDate"], + "duration": field_value["duration"] + } + else: + row["fields"][field_name] = field_value["title"] + elif "labels" in field_value and field_value["labels"] is not None: + if field_value["labels"].get("nodes"): + row["fields"][field_name] = [ + node["name"] for node in field_value["labels"]["nodes"] + ] + elif "milestone" in field_value and field_value["milestone"] is not None: + row["fields"][field_name] = field_value["milestone"]["title"] + elif "pullRequests" in field_value and field_value["pullRequests"] is not None: + if field_value["pullRequests"].get("nodes"): + row["fields"][field_name] = [ + {"number": pr["number"], "title": pr["title"]} + for pr in field_value["pullRequests"]["nodes"] + ] + elif "repository" in field_value and field_value["repository"] is not None: + if field_value["repository"].get("name") and field_value["repository"].get("owner"): + row["fields"][field_name] = f"{field_value['repository']['owner']['login']}/{field_value['repository']['name']}" + elif "users" in field_value and field_value["users"] is not None: + if field_value["users"].get("nodes"): + row["fields"][field_name] = [ + user["login"] for user in field_value["users"]["nodes"] + ] + + # Remove the raw fieldValues object from the output + del row["fieldValues"] + + return row + + schema = th.PropertiesList( + # Parent keys + th.Property("org", th.StringType), + th.Property("project_id", th.StringType), + # Repository info extracted from content + th.Property("repo_id", th.IntegerType), + th.Property("repo", th.StringType), + th.Property("repo_owner", th.StringType), + # Rest + th.Property("id", th.StringType), + th.Property("type", th.StringType), + # Field values as a structured object with all field values + th.Property("fields", th.ObjectType(additional_properties=True)), + th.Property( + "content", + th.ObjectType( + th.Property("id", th.StringType), + th.Property("number", th.IntegerType), + th.Property("title", th.StringType), + th.Property("url", th.StringType), + th.Property("state", th.StringType), + th.Property( + "repository", + th.ObjectType( + th.Property("id", th.StringType), + th.Property("databaseId", th.IntegerType), + th.Property("name", th.StringType), + th.Property( + "owner", + th.ObjectType( + th.Property("login", th.StringType), + ), + ), + ), + ), + ), + ), + th.Property("createdAt", th.DateTimeType), + th.Property("updatedAt", th.DateTimeType), + ).to_dict() \ No newline at end of file diff --git a/tap_github/repository_streams.py b/tap_github/repository_streams.py index b2658637..4a3ffbb8 100644 --- a/tap_github/repository_streams.py +++ b/tap_github/repository_streams.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING, Any, ClassVar from urllib.parse import parse_qs, urlparse +import datetime +import pytz from dateutil.parser import parse from singer_sdk import typing as th # JSON Schema typing helpers from singer_sdk.exceptions import FatalAPIError @@ -840,6 +842,7 @@ class IssuesStream(GitHubRestStream): parent_stream_type = RepositoryStream ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] + use_cursor_paging = True def get_url_params( self, @@ -909,7 +912,22 @@ def post_process(self, row: dict, context: dict | None = None) -> dict: th.Property("author_association", th.StringType), th.Property("body", th.StringType), th.Property("type", th.StringType), + th.Property("active_lock_reason", th.StringType), + th.Property("draft", th.BooleanType), + th.Property("state_reason", th.StringType), + th.Property( + "performed_via_github_app", + th.ObjectType( + th.Property("id", th.IntegerType), + th.Property("node_id", th.StringType), + th.Property("slug", th.StringType), + th.Property("name", th.StringType), + th.Property("external_url", th.StringType), + th.Property("html_url", th.StringType), + ), + ), th.Property("user", user_object), + th.Property("closed_by", user_object), th.Property( "labels", th.ArrayType(label_object), @@ -1235,6 +1253,8 @@ def post_process(self, row: dict, context: dict | None = None) -> dict: if row["title"] is not None: row["title"] = row["title"].replace("\x00", "") + + # replace +1/-1 emojis to avoid downstream column name errors. if "reactions" in row: row["reactions"]["plus_one"] = row["reactions"].pop("+1", None) @@ -1249,12 +1269,18 @@ def get_child_context(self, record: dict, context: dict | None) -> dict: "repo_id": context["repo_id"], "pull_number": record["number"], "pull_id": record["id"], + "created_at": record["created_at"], + "closed_at": record["closed_at"], + "node_id": record["node_id"], } return { "pull_number": record["number"], "org": record["base"]["user"]["login"], "repo": record["base"]["repo"]["name"], "repo_id": record["base"]["repo"]["id"], + "created_at": record["created_at"], + "closed_at": record["closed_at"], + "node_id": record["node_id"], } schema = th.PropertiesList( @@ -1440,6 +1466,7 @@ def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict if context is not None and "pull_number" in context: row["pull_number"] = context["pull_number"] return row + class PullRequestDiffsStream(GitHubRestStream): @@ -1509,43 +1536,108 @@ def post_process(self, row: dict, context: dict[str, str] | None = None) -> dict th.Property("diff", th.StringType), ).to_dict() - -class ReviewsStream(GitHubRestStream): +class ReviewsStream(GitHubGraphqlStream): name = "reviews" - path = "/repos/{org}/{repo}/pulls/{pull_number}/reviews" - primary_keys: ClassVar[list[str]] = ["id"] - parent_stream_type = PullRequestsStream - ignore_parent_replication_key = False + query_jsonpath = "$.data.repository.pullRequests.nodes.[*].reviews.nodes.[*]" + primary_keys: ClassVar[list[str]] = ["org", "repo", "pull_number", "id"] + parent_stream_type = RepositoryStream state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] + ignore_parent_replication_key = True + + @property + def query(self) -> str: + """Return dynamic GraphQL query.""" + # Graphql id is equivalent to REST node_id. To keep the tap consistent, we rename "id" to "node_id". # noqa: E501 + # Will get only the first 100 reviews per PR, should be enough for most cases. + return """ + query repositoryReviews($repo: String! $org: String! $nextPageCursor_0: String) { + repository(owner:$org name:$repo) { + pullRequests(first:100 orderBy:{field:UPDATED_AT, direction:DESC} after: $nextPageCursor_0){ + pageInfo { + hasNextPage_0: hasNextPage + startCursor_0: startCursor + endCursor_0: endCursor + } + nodes{ + reviews (first:100){ + nodes { + node_id: id + id: fullDatabaseId + body + state + url + submitted_at: submittedAt + commit{ + id + } + author{ + login + avatar_url: avatarUrl + html_url: url + } + author_association: authorAssociation + pullRequest{ + number + } + } + } + } + } + } + rateLimit { + cost + } + } + """ # noqa: E501 + + + def post_process(self, row: dict, context: dict | None = None) -> dict: + """ + Add a user_id top-level field to be used as state replication key. + """ + row = super().post_process(row, context) + row["id"] = int(row["id"]) + if context is not None: + # Get PR ID from context + row["org"] = context["org"] + row["repo"] = context["repo"] + row["repo_id"] = context["repo_id"] + row["pull_number"] = row['pullRequest']['number'] + return row schema = th.PropertiesList( - # Parent keys - th.Property("pull_number", th.IntegerType), - th.Property("org", th.StringType), + # Parent Keys th.Property("repo", th.StringType), th.Property("repo_id", th.IntegerType), - # Rest - th.Property("id", th.IntegerType), + th.Property("org", th.StringType), + th.Property("pull_number", th.IntegerType), + # Review Info th.Property("node_id", th.StringType), - th.Property("user", user_object), + th.Property("id", th.IntegerType), th.Property("body", th.StringType), th.Property("state", th.StringType), - th.Property("html_url", th.StringType), - th.Property("pull_request_url", th.StringType), - th.Property( - "_links", - th.ObjectType( - th.Property("html", th.ObjectType(th.Property("href", th.StringType))), - th.Property( - "pull_request", th.ObjectType(th.Property("href", th.StringType)) - ), - ), - ), - th.Property("submitted_at", th.DateTimeType), - th.Property("commit_id", th.StringType), + th.Property("url", th.StringType), + th.Property("commit", th.ObjectType( + th.Property("id", th.StringType), + )), + th.Property("author", user_object), th.Property("author_association", th.StringType), + th.Property("submitted_at", th.DateTimeType), ).to_dict() - + + + def get_records(self, context: dict | None = None) -> Iterable[dict[str, Any]]: + """Filter out PRs that are closed for at least 7 days and have been synced to reduce API costs""" + threshold_closed = datetime.datetime.now(pytz.UTC) - datetime.timedelta(days = 7) + + if (context + and 'closed_at' in context + and self.get_starting_timestamp(context) > parse(self.config['start_date']).replace(tzinfo=pytz.UTC) + and (context['closed_at'] is None or parse(context['closed_at']) < threshold_closed)): + self.logger.debug(f"PR Closed and synced. Skipping '{self.name}' for PR '{context['repo']}/{context['pull_number']}'.") + return [] + + return super().get_records(context) class ReviewCommentsStream(GitHubRestStream): name = "review_comments" @@ -1906,112 +1998,6 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: th.Property("site_admin", th.BooleanType), ).to_dict() - -class ProjectsStream(GitHubRestStream): - name = "projects" - path = "/repos/{org}/{repo}/projects" - ignore_parent_replication_key = True - replication_key = "updated_at" - primary_keys: ClassVar[list[str]] = ["id"] - parent_stream_type = RepositoryStream - state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] - - def get_child_context(self, record: dict, context: dict | None) -> dict: - return { - "project_id": record["id"], - "repo_id": context["repo_id"] if context else None, - "org": context["org"] if context else None, - "repo": context["repo"] if context else None, - } - - schema = th.PropertiesList( - # Parent keys - th.Property("repo", th.StringType), - th.Property("org", th.StringType), - th.Property("repo_id", th.IntegerType), - # Rest - th.Property("owner_url", th.StringType), - th.Property("url", th.StringType), - th.Property("html_url", th.StringType), - th.Property("columns_url", th.StringType), - th.Property("id", th.IntegerType), - th.Property("node_id", th.StringType), - th.Property("name", th.StringType), - th.Property("body", th.StringType), - th.Property("number", th.IntegerType), - th.Property("state", th.StringType), - th.Property("creator", user_object), - th.Property("created_at", th.DateTimeType), - th.Property("updated_at", th.DateTimeType), - ).to_dict() - - -class ProjectColumnsStream(GitHubRestStream): - name = "project_columns" - path = "/projects/{project_id}/columns" - ignore_parent_replication_key = True - replication_key = "updated_at" - primary_keys: ClassVar[list[str]] = ["id"] - parent_stream_type = ProjectsStream - state_partitioning_keys: ClassVar[list[str]] = ["project_id", "repo", "org"] - - def get_child_context(self, record: dict, context: dict | None) -> dict: - return { - "column_id": record["id"], - "repo_id": context["repo_id"] if context else None, - "org": context["org"] if context else None, - "repo": context["repo"] if context else None, - } - - schema = th.PropertiesList( - # Parent Keys - th.Property("repo", th.StringType), - th.Property("org", th.StringType), - th.Property("repo_id", th.IntegerType), - th.Property("project_id", th.IntegerType), - # Rest - th.Property("url", th.StringType), - th.Property("project_url", th.StringType), - th.Property("cards_url", th.StringType), - th.Property("id", th.IntegerType), - th.Property("node_id", th.StringType), - th.Property("name", th.StringType), - th.Property("created_at", th.DateTimeType), - th.Property("updated_at", th.DateTimeType), - ).to_dict() - - -class ProjectCardsStream(GitHubRestStream): - name = "project_cards" - path = "/projects/columns/{column_id}/cards" - ignore_parent_replication_key = True - replication_key = "updated_at" - primary_keys: ClassVar[list[str]] = ["id"] - parent_stream_type = ProjectColumnsStream - state_partitioning_keys: ClassVar[list[str]] = ["project_id", "repo", "org"] - - schema = th.PropertiesList( - # Parent Keys - th.Property("repo", th.StringType), - th.Property("org", th.StringType), - th.Property("repo_id", th.IntegerType), - th.Property("project_id", th.IntegerType), - th.Property("column_id", th.IntegerType), - # Properties - th.Property("url", th.StringType), - th.Property("id", th.IntegerType), - th.Property("node_id", th.StringType), - th.Property("note", th.StringType), - th.Property("creator", user_object), - th.Property("created_at", th.DateTimeType), - th.Property("updated_at", th.DateTimeType), - th.Property("archived", th.BooleanType), - th.Property("column_url", th.StringType), - th.Property("content_url", th.StringType), - th.Property("project_url", th.StringType), - ).to_dict() - - class WorkflowsStream(GitHubRestStream): """Defines 'workflows' stream.""" @@ -2051,11 +2037,13 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: class WorkflowRunsStream(GitHubRestStream): """Defines 'workflow_runs' stream.""" + + MAX_PER_PAGE=1000 name = "workflow_runs" path = "/repos/{org}/{repo}/actions/runs" primary_keys: ClassVar[list[str]] = ["id"] - replication_key = None + replication_key = "created_at" parent_stream_type = RepositoryStream ignore_parent_replication_key = False state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] @@ -2100,6 +2088,19 @@ class WorkflowRunsStream(GitHubRestStream): th.Property("workflow_url", th.StringType), ).to_dict() + def get_url_params( + self, + context: dict | None, + next_page_token: Any | None, # noqa: ANN401 + ) -> dict[str, Any]: + params: dict = {"per_page": self.MAX_PER_PAGE} + if next_page_token: + params["page"] = next_page_token + params["status"] = "completed" + since = self.get_starting_timestamp(context) + params['created'] = f'>{since.isoformat(sep="T")}' + return params + def parse_response(self, response: requests.Response) -> Iterable[dict]: """Parse the response and return an iterator of result rows.""" yield from extract_jsonpath(self.records_jsonpath, input=response.json()) @@ -2128,8 +2129,9 @@ class WorkflowRunJobsStream(GitHubRestStream): primary_keys: ClassVar[list[str]] = ["id"] parent_stream_type = WorkflowRunsStream ignore_parent_replication_key = False - state_partitioning_keys: ClassVar[list[str]] = ["repo", "org", "run_id"] + state_partitioning_keys: ClassVar[list[str]] = [] # No state partitioning keys records_jsonpath = "$.jobs[*]" + tolerated_http_errors = [500] # Sometimes with workflow runs, GitHub returns a 500 error when there are no jobs. schema = th.PropertiesList( # Parent keys @@ -2177,6 +2179,8 @@ def __init__(self, *args, **kwargs) -> None: # noqa: ANN002, ANN003 def parse_response(self, response: requests.Response) -> Iterable[dict]: """Parse the response and return an iterator of result rows.""" + if response.status_code in self.tolerated_http_errors: + return yield from extract_jsonpath(self.records_jsonpath, input=response.json()) def get_url_params( @@ -2460,7 +2464,6 @@ class TrafficClonesStream(TrafficRestStream): ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] records_jsonpath = "$.clones[*]" - selected_by_default = False schema = th.PropertiesList( # Parent keys @@ -2485,7 +2488,6 @@ class TrafficReferralPathsStream(TrafficRestStream): ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] records_jsonpath = "[*]" - selected_by_default = False schema = th.PropertiesList( # Parent keys @@ -2511,7 +2513,6 @@ class TrafficReferrersStream(TrafficRestStream): ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] records_jsonpath = "[*]" - selected_by_default = False schema = th.PropertiesList( # Parent keys @@ -2536,7 +2537,6 @@ class TrafficPageViewsStream(TrafficRestStream): ignore_parent_replication_key = True state_partitioning_keys: ClassVar[list[str]] = ["repo", "org"] records_jsonpath = "$.views[*]" - selected_by_default = False schema = th.PropertiesList( # Parent keys diff --git a/tap_github/streams.py b/tap_github/streams.py index f72861d9..33cf52b9 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -8,6 +8,8 @@ TeamMembersStream, TeamRolesStream, TeamsStream, + OrganizationProjectsV2Stream, + ProjectV2ItemsStream, ) from tap_github.repository_streams import ( AnonymousContributorsStream, @@ -27,9 +29,6 @@ LabelsStream, LanguagesStream, MilestonesStream, - ProjectCardsStream, - ProjectColumnsStream, - ProjectsStream, PullRequestCommits, PullRequestDiffsStream, PullRequestsStream, @@ -51,6 +50,7 @@ WorkflowsStream, ) from tap_github.user_streams import StarredStream, UserContributedToStream, UserStream +from tap_github.issue_transfer_streams import IssueTransfersStream if TYPE_CHECKING: from singer_sdk.streams.core import Stream @@ -87,9 +87,6 @@ def __init__(self, valid_queries: set[str], streams: list[type[Stream]]) -> None LabelsStream, LanguagesStream, MilestonesStream, - ProjectCardsStream, - ProjectColumnsStream, - ProjectsStream, PullRequestCommits, PullRequestDiffsStream, PullRequestsStream, @@ -122,7 +119,18 @@ def __init__(self, valid_queries: set[str], streams: list[type[Stream]]) -> None ) ORGANIZATIONS = ( {"organizations"}, - [OrganizationStream, TeamMembersStream, TeamRolesStream, TeamsStream], + [ + OrganizationStream, + TeamMembersStream, + TeamRolesStream, + TeamsStream, + OrganizationProjectsV2Stream, + ProjectV2ItemsStream, + ], + ) + ISSUES_TRANSFER = ( + {"issues_check_transfer"}, + [IssueTransfersStream], ) @classmethod diff --git a/tap_github/tap.py b/tap_github/tap.py index 97f7c601..f34598ee 100644 --- a/tap_github/tap.py +++ b/tap_github/tap.py @@ -124,10 +124,15 @@ def logger(cls: type[TapGitHub]) -> logging.Logger: # noqa: N805 def discover_streams(self) -> list[Stream]: """Return a list of discovered streams for each query.""" + specified_options = { + key: value + for key, value in self.config.items() + if not value is None and len(value) > 0 + } # If the config is empty, assume we are running --help or --capabilities. if ( self.config - and len(Streams.all_valid_queries().intersection(self.config)) != 1 + and len(Streams.all_valid_queries().intersection(specified_options)) != 1 ): raise ValueError( "This tap requires one and only one of the following path options: " @@ -135,8 +140,8 @@ def discover_streams(self) -> list[Stream]: ) streams = [] for stream_type in Streams: - if (not self.config) or len( - stream_type.valid_queries.intersection(self.config) + if (not specified_options) or len( + stream_type.valid_queries.intersection(specified_options) ) > 0: streams += [ StreamClass(tap=self) for StreamClass in stream_type.streams diff --git a/tap_github/user_streams.py b/tap_github/user_streams.py index 4433663e..46021fcb 100644 --- a/tap_github/user_streams.py +++ b/tap_github/user_streams.py @@ -22,6 +22,7 @@ class UserStream(GitHubRestStream): name = "users" replication_key = "updated_at" + tolerated_http_errors = [404, 403] @property def path(self) -> str: # type: ignore