From 398690f8caa4c56c54e615d54032e49d6e10c9df Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:18:26 -0700 Subject: [PATCH 01/95] feat: update object detection test cases. --- tests/test_object_detection.py | 158 +++++++++++++++++++++++++++------ 1 file changed, 130 insertions(+), 28 deletions(-) diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 521189c..031459f 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -1,43 +1,145 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError import jigsawstack import pytest -import asyncio import logging - +from dotenv import load_dotenv +load_dotenv() +import os logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" -def test_object_detection_response(): - try: - result = jigsaw.vision.object_detection( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" - } - ) - print(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") +TEST_CASES = [ + { + "name": "with_url_only", + "params": { + "url": IMAGE_URL + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": IMAGE_URL, + "options": None, + }, + { + "name": "annotated_image_true", + "blob": IMAGE_URL, + "options": { + "annotated_image": True + }, + }, + { + "name": "with_annotated_image_false", + "blob": IMAGE_URL, + "options": { + "annotated_image": False + }, + }, + { + "name": "with_blob_both_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "annotated_image": True, + "return_type": "url" + }, + }, + { + "name": "with_blob_gui_features", + "blob": IMAGE_URL, + "options": { + "features": ["gui"], + "annotated_image": False + }, + }, + { + "name": "with_blob_object_detection_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection"], + "annotated_image": True, + "return_type": "base64" + }, + }, + { + "name": "with_prompts", + "blob": IMAGE_URL, + "options": { + "prompts": ["castle", "tree"], + "annotated_image": True, + }, + }, + { + "name": "with_all_options", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "prompts": ["car", "road", "tree"], + "annotated_image": True, + "return_type": "base64", + "return_masks": False, + }, + }, + ] -def test_object_detection_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() +class TestObjectDetectionSync: + """Test synchronous object detection methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize("test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]) + def test_object_detection(self, test_case): + """Test synchronous object detection with various inputs""" try: - result = await client.vision.object_detection( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" - } - ) - print(result) + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.vision.object_detection( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.vision.object_detection(test_case["params"]) + + print(f"Test {test_case['name']}: {result}") assert result["success"] == True except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestObjectDetectionAsync: + """Test asynchronous object detection methods""" - asyncio.run(_test()) + async_test_cases = TEST_CASES + + @pytest.mark.parametrize("test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]) + @pytest.mark.asyncio + async def test_object_detection_async(self, test_case): + """Test asynchronous object detection with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.vision.object_detection( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.vision.object_detection(test_case["params"]) + + print(f"Test {test_case['name']}: {result}") + assert result["success"] == True + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file From fc18d4e0f787bff26037bf061b2095982bd433d4 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:23:56 -0700 Subject: [PATCH 02/95] feat: updating testcases for file_store api. --- tests/test_file_store.py | 177 +++++++++++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 45 deletions(-) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index daef198..c44090b 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -1,64 +1,151 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os +import uuid -# flake8: noqa +load_dotenv() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -client = JigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +TEXT_FILE_CONTENT = b"This is a test file content for JigsawStack storage" +JSON_FILE_CONTENT = b'{"test": "data", "key": "value"}' +BINARY_FILE_CONTENT = requests.get("https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg").content -@pytest.mark.skip(reason="Skipping TestStoreAPI class for now") -class TestStoreAPI(unittest.TestCase): - def test_upload_success_response(self) -> None: - # Sample file content as bytes - file_content = b"This is a test file content" - options = { - "key": "test-file.txt", +TEST_CASES_UPLOAD = [ + { + "name": "upload_text_file_with_key", + "file": TEXT_FILE_CONTENT, + "options": { + "key": "sample_file.txt", "content_type": "text/plain", "overwrite": True, + }, + }, + { + "name": "upload_image_with_temp_url", + "file": BINARY_FILE_CONTENT, + "options": { + "key": f"test_image.jpg", + "content_type": "image/jpeg", + "overwrite": True, "temp_public_url": True, - } - try: - result = client.store.upload(file_content, options) - assert result["success"] == True - except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + }, + }, + { + "name": "upload_binary_file", + "file": BINARY_FILE_CONTENT, + "options": { + "overwrite": True, + }, + }, + { + "name": "upload_file_no_options", + "file": TEXT_FILE_CONTENT, + "options": None, + }, +] - def test_get_success_response(self) -> None: - key = "test-file.txt" + +class TestFileStoreSync: + """Test synchronous file store operations""" + + uploaded_keys = [] # Track uploaded files for cleanup + + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + def test_file_upload(self, test_case): + """Test synchronous file upload with various options""" try: - result = client.store.get(key) - # For file retrieval, we expect the actual file content - assert result is not None + result = jigsaw.store.upload(test_case["file"], test_case["options"]) + + print(f"Upload test {test_case['name']}: {result}") + assert result.get("key") is not None + assert result.get("url") is not None + assert result.get("size") > 0 + + # Check temp_public_url if requested + if test_case.get("options") and test_case["options"].get("temp_public_url"): + assert result.get("temp_public_url") is not None + + # Store key for cleanup + self.uploaded_keys.append(result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." - - def test_delete_success_response(self) -> None: - key = "test-file.txt" + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + def test_file_get(self): + """Test synchronous file retrieval""" + # First upload a file to retrieve + test_key = f"test-get-{uuid.uuid4().hex[:8]}.txt" try: - result = client.store.delete(key) - assert result["success"] == True + upload_result = jigsaw.store.upload( + TEXT_FILE_CONTENT, + {"key": test_key, "content_type": "text/plain"} + ) + + # Now retrieve it + file_content = jigsaw.store.get(upload_result["key"]) + assert file_content is not None + print(f"Retrieved file with key {upload_result['key']}") + + # Cleanup + self.uploaded_keys.append(upload_result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in file get: {e}") - def test_upload_without_options_success_response(self) -> None: - # Test upload without optional parameters - file_content = b"This is another test file content" + +class TestFileStoreAsync: + """Test asynchronous file store operations""" + + uploaded_keys = [] # Track uploaded files for cleanup + + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.asyncio + async def test_file_upload_async(self, test_case): + """Test asynchronous file upload with various options""" try: - result = client.store.upload(file_content) - assert result["success"] == True + result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) + + print(f"Async upload test {test_case['name']}: {result}") + assert result.get("key") is not None + assert result.get("url") is not None + assert result.get("size") > 0 + + # Check temp_public_url if requested + if test_case.get("options") and test_case["options"].get("temp_public_url"): + assert result.get("temp_public_url") is not None + + # Store key for cleanup + self.uploaded_keys.append(result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." - - def test_upload_with_partial_options_success_response(self) -> None: - # Test upload with partial options - file_content = b"This is a test file with partial options" - options = {"key": "partial-test-file.txt", "overwrite": False} + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.asyncio + async def test_file_get_async(self): + """Test asynchronous file retrieval""" + # First upload a file to retrieve + test_key = f"test-async-get-{uuid.uuid4().hex[:8]}.txt" try: - result = client.store.upload(file_content, options) - assert result["success"] == True + upload_result = await async_jigsaw.store.upload( + TEXT_FILE_CONTENT, + {"key": test_key, "content_type": "text/plain"} + ) + + # Now retrieve it + file_content = await async_jigsaw.store.get(upload_result["key"]) + assert file_content is not None + print(f"Async retrieved file with key {upload_result['key']}") + + # Cleanup + self.uploaded_keys.append(upload_result["key"]) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") \ No newline at end of file From 7792bb1efa59ec48416eb7e69d67c1aaaf8b2c8f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:36:22 -0700 Subject: [PATCH 03/95] test: dropping test cases for geo service (merged/deprecated with v3 in April) --- tests/test_geo.py | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 tests/test_geo.py diff --git a/tests/test_geo.py b/tests/test_geo.py deleted file mode 100644 index e97e3fb..0000000 --- a/tests/test_geo.py +++ /dev/null @@ -1,38 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_country_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.geo.country({"country_code": "SGP"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_search_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.geo.search({"search_value": "Nigeria"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From c57091356df64f6a2ee2df7583378bb51177cde9 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 21:39:43 -0700 Subject: [PATCH 04/95] refactor: del custom_types dot py, utlized by text to speech. --- jigsawstack/custom_typing.py | 574 ----------------------------------- 1 file changed, 574 deletions(-) delete mode 100644 jigsawstack/custom_typing.py diff --git a/jigsawstack/custom_typing.py b/jigsawstack/custom_typing.py deleted file mode 100644 index e77adde..0000000 --- a/jigsawstack/custom_typing.py +++ /dev/null @@ -1,574 +0,0 @@ -from typing import Literal - -SupportedAccents = Literal[ - "af-ZA-female-1", - "af-ZA-male-1", - "am-ET-female-1", - "am-ET-male-1", - "ar-AE-female-1", - "ar-AE-male-1", - "ar-BH-female-1", - "ar-BH-male-1", - "ar-DZ-female-1", - "ar-DZ-male-1", - "ar-EG-female-1", - "ar-EG-male-1", - "ar-IQ-female-1", - "ar-IQ-male-1", - "ar-JO-female-1", - "ar-JO-male-1", - "ar-KW-female-1", - "ar-KW-male-1", - "ar-LB-female-1", - "ar-LB-male-1", - "ar-LY-female-1", - "ar-LY-male-1", - "ar-MA-female-1", - "ar-MA-male-1", - "ar-OM-female-1", - "ar-OM-male-1", - "ar-QA-female-1", - "ar-QA-male-1", - "ar-SA-female-1", - "ar-SA-male-1", - "ar-SY-female-1", - "ar-SY-male-1", - "ar-TN-female-1", - "ar-TN-male-1", - "ar-YE-female-1", - "ar-YE-male-1", - "as-IN-male-1", - "as-IN-female-1", - "az-AZ-female-1", - "az-AZ-male-1", - "bg-BG-female-1", - "bg-BG-male-1", - "bn-BD-female-1", - "bn-BD-male-1", - "bn-IN-female-1", - "bn-IN-male-1", - "bs-BA-female-1", - "bs-BA-male-1", - "ca-ES-female-1", - "ca-ES-male-1", - "ca-ES-female-2", - "cs-CZ-female-1", - "cs-CZ-male-1", - "cy-GB-female-1", - "cy-GB-male-1", - "da-DK-female-1", - "da-DK-male-1", - "de-AT-female-1", - "de-AT-male-1", - "de-CH-female-1", - "de-CH-male-1", - "de-DE-female-1", - "de-DE-male-1", - "de-DE-female-2", - "de-DE-male-2", - "de-DE-male-3", - "de-DE-female-3", - "de-DE-male-4", - "de-DE-male-5", - "de-DE-female-4", - "de-DE-male-6", - "de-DE-male-7", - "de-DE-female-5", - "de-DE-male-8", - "de-DE-female-6", - "de-DE-female-7", - "de-DE-male-9", - "de-DE-female-8", - "de-DE-female-9", - "de-DE-female-10", - "el-GR-female-2", - "el-GR-male-2", - "en-AU-female-2", - "en-AU-male-2", - "en-AU-female-3", - "en-AU-female-4", - "en-AU-male-3", - "en-AU-male-4", - "en-AU-female-5", - "en-AU-female-6", - "en-AU-female-7", - "en-AU-male-5", - "en-AU-female-8", - "en-AU-male-6", - "en-AU-male-7", - "en-AU-female-9", - "en-CA-female-2", - "en-CA-male-2", - "en-GB-female-2", - "en-GB-male-2", - "en-GB-female-3", - "en-GB-female-4", - "en-GB-male-3", - "en-GB-female-5", - "en-GB-male-4", - "en-GB-male-5", - "en-GB-female-6", - "en-GB-female-7", - "en-GB-male-6", - "en-GB-male-7", - "en-GB-female-8", - "en-GB-male-8", - "en-GB-female-9", - "en-GB-female-10", - "en-GB-male-9", - "en-GB-male-10", - "en-GB-female-11", - "en-HK-female-1", - "en-HK-male-1", - "en-IE-female-3", - "en-IE-male-3", - "en-IN-female-3", - "en-IN-male-3", - "en-IN-male-4", - "en-IN-female-4", - "en-IN-female-5", - "en-IN-female-6", - "en-IN-male-5", - "en-IN-male-6", - "en-KE-female-1", - "en-KE-male-1", - "en-NG-female-1", - "en-NG-male-1", - "en-NZ-female-1", - "en-NZ-male-1", - "en-PH-female-1", - "en-PH-male-1", - "en-SG-female-1", - "en-SG-male-1", - "en-TZ-female-1", - "en-TZ-male-1", - "en-US-female-3", - "en-US-female-4", - "en-US-male-3", - "en-US-male-4", - "en-US-female-5", - "en-US-female-6", - "en-US-male-5", - "en-US-male-6", - "en-US-female-7", - "en-US-male-7", - "en-US-female-8", - "en-US-male-8", - "en-US-female-9", - "en-US-male-9", - "en-US-female-10", - "en-US-male-10", - "en-US-female-11", - "en-US-male-11", - "en-US-female-12", - "en-US-male-12", - "en-US-female-13", - "en-US-female-14", - "en-US-female-15", - "en-US-female-16", - "en-US-male-13", - "en-US-male-14", - "en-US-female-17", - "en-US-female-18", - "en-US-male-15", - "en-US-male-16", - "en-US-female-19", - "en-US-female-20", - "en-US-female-21", - "en-US-female-22", - "en-US-male-17", - "en-US-male-18", - "en-US-male-19", - "en-US-male-20", - "en-US-male-21", - "en-US-female-23", - "en-US-male-22", - "en-US-male-23", - "en-US-neutral-1", - "en-US-male-24", - "en-US-male-25", - "en-US-male-26", - "en-US-male-27", - "en-US-female-24", - "en-US-female-25", - "en-US-female-26", - "en-US-female-27", - "en-US-male-28", - "en-US-female-28", - "en-US-female-29", - "en-US-female-30", - "en-US-male-29", - "en-US-male-30", - "en-ZA-female-1", - "en-ZA-male-1", - "es-AR-female-1", - "es-AR-male-1", - "es-BO-female-1", - "es-BO-male-1", - "es-CL-female-1", - "es-CL-male-1", - "es-CO-female-1", - "es-CO-male-1", - "es-CR-female-1", - "es-CR-male-1", - "es-CU-female-1", - "es-CU-male-1", - "es-DO-female-1", - "es-DO-male-1", - "es-EC-female-1", - "es-EC-male-1", - "es-ES-female-9", - "es-ES-male-10", - "es-ES-female-10", - "es-ES-male-11", - "es-ES-male-12", - "es-ES-male-13", - "es-ES-female-11", - "es-ES-female-12", - "es-ES-female-13", - "es-ES-female-14", - "es-ES-male-14", - "es-ES-male-15", - "es-ES-male-16", - "es-ES-female-15", - "es-ES-female-16", - "es-ES-female-17", - "es-ES-female-18", - "es-ES-female-19", - "es-ES-female-20", - "es-ES-female-21", - "es-ES-male-17", - "es-ES-male-18", - "es-ES-female-22", - "es-ES-female-23", - "es-GQ-female-1", - "es-GQ-male-1", - "es-GT-female-1", - "es-GT-male-1", - "es-HN-female-1", - "es-HN-male-1", - "es-MX-female-12", - "es-MX-male-11", - "es-MX-female-13", - "es-MX-female-14", - "es-MX-female-15", - "es-MX-male-12", - "es-MX-male-13", - "es-MX-female-16", - "es-MX-male-14", - "es-MX-male-15", - "es-MX-female-17", - "es-MX-female-18", - "es-MX-male-16", - "es-MX-female-19", - "es-MX-male-17", - "es-NI-female-1", - "es-NI-male-1", - "es-PA-female-1", - "es-PA-male-1", - "es-PE-female-1", - "es-PE-male-1", - "es-PR-female-1", - "es-PR-male-1", - "es-PY-female-1", - "es-PY-male-1", - "es-SV-female-1", - "es-SV-male-1", - "es-US-female-1", - "es-US-male-1", - "es-UY-female-1", - "es-UY-male-1", - "es-VE-female-1", - "es-VE-male-1", - "et-EE-female-11", - "et-EE-male-10", - "eu-ES-female-11", - "eu-ES-male-10", - "fa-IR-female-11", - "fa-IR-male-10", - "fi-FI-female-12", - "fi-FI-male-11", - "fi-FI-female-13", - "fil-PH-female-11", - "fil-PH-male-10", - "fr-BE-female-12", - "fr-BE-male-11", - "fr-CA-female-12", - "fr-CA-male-11", - "fr-CA-male-12", - "fr-CA-male-13", - "fr-CH-female-12", - "fr-CH-male-11", - "fr-FR-female-12", - "fr-FR-male-11", - "fr-FR-male-12", - "fr-FR-female-13", - "fr-FR-female-14", - "fr-FR-male-13", - "fr-FR-female-15", - "fr-FR-female-16", - "fr-FR-female-17", - "fr-FR-male-14", - "fr-FR-female-18", - "fr-FR-male-15", - "fr-FR-male-16", - "fr-FR-male-17", - "fr-FR-female-19", - "fr-FR-female-20", - "fr-FR-male-18", - "fr-FR-female-21", - "fr-FR-male-19", - "fr-FR-male-20", - "ga-IE-female-12", - "ga-IE-male-12", - "gl-ES-female-12", - "gl-ES-male-12", - "gu-IN-female-1", - "gu-IN-male-1", - "he-IL-female-12", - "he-IL-male-12", - "hi-IN-female-13", - "hi-IN-male-13", - "hi-IN-male-14", - "hi-IN-female-14", - "hi-IN-female-15", - "hi-IN-male-15", - "hi-IN-male-16", - "hr-HR-female-12", - "hr-HR-male-12", - "hu-HU-female-13", - "hu-HU-male-13", - "hy-AM-female-12", - "hy-AM-male-12", - "id-ID-female-13", - "id-ID-male-13", - "is-IS-female-12", - "is-IS-male-12", - "it-IT-female-13", - "it-IT-female-14", - "it-IT-male-13", - "it-IT-male-14", - "it-IT-male-15", - "it-IT-male-16", - "it-IT-female-15", - "it-IT-female-16", - "it-IT-male-17", - "it-IT-male-18", - "it-IT-female-17", - "it-IT-female-18", - "it-IT-male-19", - "it-IT-female-19", - "it-IT-female-20", - "it-IT-male-20", - "it-IT-male-21", - "it-IT-male-22", - "it-IT-male-23", - "it-IT-male-24", - "it-IT-female-21", - "it-IT-female-22", - "it-IT-male-25", - "it-IT-male-26", - "iu-Cans-CA-female-1", - "iu-Cans-CA-male-1", - "iu-Latn-CA-female-1", - "iu-Latn-CA-male-1", - "ja-JP-female-14", - "ja-JP-male-16", - "ja-JP-female-15", - "ja-JP-male-17", - "ja-JP-female-16", - "ja-JP-male-18", - "ja-JP-female-17", - "ja-JP-male-19", - "ja-JP-male-20", - "jv-ID-female-13", - "jv-ID-male-16", - "ka-GE-female-13", - "ka-GE-male-16", - "kk-KZ-female-13", - "kk-KZ-male-16", - "km-KH-female-13", - "km-KH-male-16", - "kn-IN-female-13", - "kn-IN-male-16", - "ko-KR-female-14", - "ko-KR-male-17", - "ko-KR-male-18", - "ko-KR-male-19", - "ko-KR-male-20", - "ko-KR-female-15", - "ko-KR-female-16", - "ko-KR-female-17", - "ko-KR-female-18", - "ko-KR-male-21", - "ko-KR-male-22", - "lo-LA-female-13", - "lo-LA-male-17", - "lt-LT-female-13", - "lt-LT-male-17", - "lv-LV-female-13", - "lv-LV-male-17", - "mk-MK-female-13", - "mk-MK-male-17", - "ml-IN-female-13", - "ml-IN-male-17", - "mn-MN-female-13", - "mn-MN-male-17", - "mr-IN-female-1", - "mr-IN-male-1", - "ms-MY-female-13", - "ms-MY-male-17", - "mt-MT-female-13", - "mt-MT-male-17", - "my-MM-female-13", - "my-MM-male-17", - "nb-NO-female-14", - "nb-NO-male-18", - "nb-NO-female-15", - "ne-NP-female-13", - "ne-NP-male-17", - "nl-BE-female-14", - "nl-BE-male-18", - "nl-NL-female-14", - "nl-NL-male-18", - "nl-NL-female-15", - "or-IN-female-1", - "or-IN-male-1", - "pa-IN-male-1", - "pa-IN-female-1", - "pl-PL-female-14", - "pl-PL-male-18", - "pl-PL-female-15", - "ps-AF-female-13", - "ps-AF-male-17", - "pt-BR-female-14", - "pt-BR-male-18", - "pt-BR-female-15", - "pt-BR-male-19", - "pt-BR-female-16", - "pt-BR-male-20", - "pt-BR-female-17", - "pt-BR-male-21", - "pt-BR-male-22", - "pt-BR-female-18", - "pt-BR-female-19", - "pt-BR-female-20", - "pt-BR-male-23", - "pt-BR-female-21", - "pt-BR-male-24", - "pt-BR-female-22", - "pt-BR-male-25", - "pt-BR-male-26", - "pt-BR-female-23", - "pt-BR-female-24", - "pt-PT-female-15", - "pt-PT-male-19", - "pt-PT-female-16", - "ro-RO-female-14", - "ro-RO-male-18", - "ru-RU-female-15", - "ru-RU-male-19", - "ru-RU-female-16", - "si-LK-female-14", - "si-LK-male-18", - "sk-SK-female-14", - "sk-SK-male-18", - "sl-SI-female-14", - "sl-SI-male-18", - "so-SO-female-14", - "so-SO-male-18", - "sq-AL-female-14", - "sq-AL-male-18", - "sr-Latn-RS-male-1", - "sr-Latn-RS-female-1", - "sr-RS-female-14", - "sr-RS-male-18", - "su-ID-female-14", - "su-ID-male-18", - "sv-SE-female-15", - "sv-SE-male-19", - "sv-SE-female-16", - "sw-KE-female-14", - "sw-KE-male-18", - "sw-TZ-female-1", - "sw-TZ-male-1", - "ta-IN-female-14", - "ta-IN-male-18", - "ta-LK-female-1", - "ta-LK-male-1", - "ta-MY-female-1", - "ta-MY-male-1", - "ta-SG-female-1", - "ta-SG-male-1", - "te-IN-female-14", - "te-IN-male-18", - "th-TH-female-15", - "th-TH-male-19", - "th-TH-female-16", - "tr-TR-female-15", - "tr-TR-male-19", - "uk-UA-female-14", - "uk-UA-male-18", - "ur-IN-female-1", - "ur-IN-male-1", - "ur-PK-female-14", - "ur-PK-male-18", - "uz-UZ-female-14", - "uz-UZ-male-18", - "vi-VN-female-14", - "vi-VN-male-18", - "wuu-CN-female-1", - "wuu-CN-male-1", - "yue-CN-female-1", - "yue-CN-male-1", - "zh-CN-female-15", - "zh-CN-male-19", - "zh-CN-male-20", - "zh-CN-female-16", - "zh-CN-male-21", - "zh-CN-female-17", - "zh-CN-female-18", - "zh-CN-female-19", - "zh-CN-female-20", - "zh-CN-female-21", - "zh-CN-female-22", - "zh-CN-female-23", - "zh-CN-female-24", - "zh-CN-female-25", - "zh-CN-female-26", - "zh-CN-female-27", - "zh-CN-female-28", - "zh-CN-female-29", - "zh-CN-female-30", - "zh-CN-female-31", - "zh-CN-female-32", - "zh-CN-female-33", - "zh-CN-female-34", - "zh-CN-male-22", - "zh-CN-male-23", - "zh-CN-male-24", - "zh-CN-male-25", - "zh-CN-male-26", - "zh-CN-male-27", - "zh-CN-male-28", - "zh-CN-male-29", - "zh-CN-male-30", - "zh-CN-male-31", - "zh-CN-male-32", - "zh-CN-male-33", - "zh-CN-guangxi-male-1", - "zh-CN-henan-male-1", - "zh-CN-liaoning-female-2", - "zh-CN-liaoning-male-1", - "zh-CN-shaanxi-female-2", - "zh-CN-shandong-male-1", - "zh-CN-sichuan-male-1", - "zh-HK-female-18", - "zh-HK-male-22", - "zh-HK-female-19", - "zh-TW-female-19", - "zh-TW-male-22", - "zh-TW-female-20", - "zu-ZA-female-17", - "zu-ZA-male-21", -] From 43fc3c57a0b3ba8bba248a4ec4f65a2de570e689 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 22:23:40 -0700 Subject: [PATCH 05/95] fix: updating response types, and formatting for image_generation endpoint. --- jigsawstack/image_generation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index b868ada..d615b6d 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -1,9 +1,8 @@ -from typing import Any, Dict, List, Union, cast +from typing import Any, Dict, Union, cast from typing_extensions import NotRequired, TypedDict, Literal, Required from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig @@ -77,9 +76,9 @@ class ImageGenerationResponse(TypedDict): """ Indicates whether the image generation was successful. """ - image: bytes + url: NotRequired[str] """ - The generated image as a blob. + The generated image as a URL or base64 string. """ @@ -103,7 +102,7 @@ def __init__( def image_generation( self, params: ImageGenerationParams - ) -> ImageGenerationResponse: + ) -> Union[ImageGenerationResponse, bytes]: path = "/ai/image_generation" resp = Request( config=self.config, @@ -134,7 +133,7 @@ def __init__( async def image_generation( self, params: ImageGenerationParams - ) -> ImageGenerationResponse: + ) -> Union[ImageGenerationResponse, bytes]: path = "/ai/image_generation" resp = await AsyncRequest( config=self.config, From 3421bf44cc7e5b6eccecaa9518818ee11cf0ec78 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Wed, 10 Sep 2025 22:24:28 -0700 Subject: [PATCH 06/95] test: updating testcases for image-generation service. --- tests/test_image_generation.py | 247 ++++++++++++++++++++++++++++----- 1 file changed, 209 insertions(+), 38 deletions(-) diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 6cf275a..fe2dc79 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -1,57 +1,228 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError import jigsawstack import pytest -import asyncio import logging -import io +from dotenv import load_dotenv +import os +import base64 +load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" +FILE_STORE_KEY = jigsaw.store.upload(requests.get(IMAGE_URL).content, { + "filename": "test_image.jpg", + "content_type": "image/jpeg", + "overwrite": True + }) -def test_image_generation_response(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() +TEST_CASES = [ + { + "name": "basic_generation_with_prompt", + "params": { + "prompt": "A beautiful mountain landscape at sunset", + }, + }, + { + "name": "with_aspect_ratio", + "params": { + "prompt": "A serene lake with mountains in the background", + "aspect_ratio": "16:9" + }, + }, + { + "name": "with_custom_dimensions", + "params": { + "prompt": "A futuristic city skyline", + "width": 1024, + "height": 768 + }, + }, + { + "name": "with_output_format_png", + "params": { + "prompt": "A colorful abstract painting", + "output_format": "png" + }, + }, + { + "name": "with_advanced_config", + "params": { + "prompt": "A realistic portrait of a person", + "advance_config": { + "negative_prompt": "blurry, low quality, distorted", + "guidance": 7, + "seed": 42 + } + }, + }, + { + "name": "with_steps", + "params": { + "prompt": "A detailed botanical illustration", + "steps": 30, + "aspect_ratio": "3:4", + "return_type": "base64" + }, + }, + { + "name": "with_return_type_url", + "params": { + "prompt": "A vintage car on a desert road", + "return_type": "url" + }, + }, + { + "name": "with_return_type_base64", + "params": { + "prompt": "A fantasy castle on a hill", + "return_type": "base64" + } + }, + { + "name": "with_all_options", + "params": { + "prompt": "An intricate steampunk clockwork mechanism", + "aspect_ratio": "4:3", + "steps": 25, + "output_format": "png", + "advance_config": { + "negative_prompt": "simple, plain, boring", + "guidance": 8, + "seed": 12345 + }, + "return_type": "base64" + }, + }, +] + +# Test cases for image-to-image generation (using existing images as input) +IMAGE_TO_IMAGE_TEST_CASES = [ + { + "name": "with_url", + "params": { + "prompt": "Add snow effects to this image", + "url": IMAGE_URL, + "return_type": "base64" + }, + }, + { + "name": "with_file_store_key", + "params": { + "prompt": "Apply a cyberpunk style to this image", + "file_store_key": FILE_STORE_KEY, + }, + } +] + + +class TestImageGenerationSync: + """Test synchronous image generation methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + def test_image_generation(self, test_case): + """Test synchronous image generation with various parameters""" try: - result = await client.image_generation( - { - "prompt": "A beautiful mountain landscape at sunset", - "aspect_ratio": "16:9", - } - ) - # Just check if we got some data back + result = jigsaw.image_generation(test_case["params"]) + + print(type(result)) + + if isinstance(result, dict): + print(result) + # Check response structure assert result is not None - assert len(result) > 0 - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - asyncio.run(_test()) + if type(result) is dict: + # Check for image data based on return_type + if test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + assert isinstance(result["url"], str) + elif test_case["params"].get("return_type") == "base64": + assert result.get("url") is not None + elif test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + def test_image_to_image_generation(self, test_case): + """Test image-to-image generation with URL input""" + try: + + result = jigsaw.image_generation(test_case["params"]) + + print(f"Test {test_case['name']}: Generated image from input") + assert result is not None + + if type(result) is dict: + assert result.get("success") == True + assert result.get("url") is not None + elif type(result) is bytes: + assert isinstance(result, bytes) + else: + pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") -def test_image_generation_with_advanced_config(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() + +class TestImageGenerationAsync: + """Test asynchronous image generation methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.asyncio + async def test_image_generation_async(self, test_case): + """Test asynchronous image generation with various parameters""" try: - result = await client.image_generation( - { - "prompt": "A beautiful mountain landscape at sunset", - "output_format": "png", - "advance_config": { - "negative_prompt": "blurry, low quality", - "guidance": 7, - "seed": 42, - }, - } - ) - # Just check if we got some data back + result = await async_jigsaw.image_generation(test_case["params"]) + + print(f"Async test {test_case['name']}: Generated image") + + # Check response structure assert result is not None - assert len(result) > 0 + if type(result) is dict: + # Check for image data based on return_type + if test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + assert isinstance(result["url"], str) + assert result["url"].startswith("http") + elif test_case["params"].get("return_type") == "base64": + assert result.get("url") is not None + elif test_case["params"].get("return_type") == "url": + assert result.get("url") is not None + assert requests.get(result["url"]).status_code == 200 + else: + assert isinstance(result, bytes) + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.asyncio + async def test_image_to_image_generation_async(self, test_case): + """Test asynchronous image-to-image generation with URL input""" + try: + result = await async_jigsaw.image_generation(test_case["params"]) - asyncio.run(_test()) + assert result is not None + if type(result) is dict: + assert result.get("success") == True + assert result.get("url") is not None + elif type(result) is bytes: + assert isinstance(result, bytes) + else: + pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") \ No newline at end of file From 2b7f91ecd0a73648bdd49a40e11fdee8f693333c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 07:18:22 -0700 Subject: [PATCH 07/95] test: adding new test cases for STT. --- tests/test_audio.py | 262 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 tests/test_audio.py diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 0000000..96169d2 --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,262 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +# Sample audio URLs for testing +AUDIO_URL = AUDIO_URL_LONG = "https://jigsawstack.com/preview/stt-example.wav" + + +TEST_CASES = [ + { + "name": "with_url_only", + "params": { + "url": AUDIO_URL + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_and_language", + "params": { + "url": AUDIO_URL, + "language": "en" + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_auto_detect_language", + "params": { + "url": AUDIO_URL, + "language": "auto" + }, + "blob": None, + "options": None, + }, + { + "name": "with_url_and_translate", + "params": { + "url": AUDIO_URL, + "translate": True + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": AUDIO_URL, + "options": None, + }, + { + "name": "with_blob_and_language", + "params": None, + "blob": AUDIO_URL, + "options": { + "language": "en" + }, + }, + { + "name": "with_blob_auto_detect", + "params": None, + "blob": AUDIO_URL, + "options": { + "language": "auto" + }, + }, + { + "name": "with_blob_and_translate", + "params": None, + "blob": AUDIO_URL, + "options": { + "translate": True, + "language": "en" + }, + }, + { + "name": "with_by_speaker", + "params": { + "url": AUDIO_URL_LONG, + "by_speaker": True + }, + "blob": None, + "options": None, + }, + { + "name": "with_chunk_settings", + "params": { + "url": AUDIO_URL, + "batch_size": 5, + "chunk_duration": 15 + }, + "blob": None, + "options": None, + }, + { + "name": "with_all_options", + "params": None, + "blob": AUDIO_URL_LONG, + "options": { + "language": "auto", + "translate": False, + "by_speaker": True, + "batch_size": 10, + "chunk_duration": 15 + }, + }, +] + +# Test cases with webhook (separate as they return different response) +WEBHOOK_TEST_CASES = [ + { + "name": "with_webhook_url", + "params": { + "url": AUDIO_URL, + "webhook_url": "https://webhook.site/test-webhook" + }, + "blob": None, + "options": None, + }, + { + "name": "with_blob_and_webhook", + "params": None, + "blob": AUDIO_URL, + "options": { + "webhook_url": "https://webhook.site/test-webhook", + "language": "en" + }, + }, +] + + +class TestAudioSync: + """Test synchronous audio speech-to-text methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + def test_speech_to_text(self, test_case): + """Test synchronous speech-to-text with various inputs""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.audio.speech_to_text(test_case["params"]) + # Verify response structure + assert result["success"] == True + assert result.get("text", None) is not None and isinstance(result["text"], str) + + # Check for chunks + if result.get("chunks", None): + assert isinstance(result["chunks"], list) + + # Check for speaker diarization if requested + if result.get("speakers", None): + assert isinstance(result["speakers"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + def test_speech_to_text_webhook(self, test_case): + """Test synchronous speech-to-text with webhook""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.audio.speech_to_text(test_case["params"]) + + print(f"Test {test_case['name']}: Webhook response") + + # Verify webhook response structure + assert result["success"] == True + assert result.get("status") in ["processing", "error"] + assert "id" in result + assert isinstance(result["id"], str) + + except JigsawStackError as e: + # Webhook URLs might fail if invalid + print(f"Expected possible error for webhook test {test_case['name']}: {e}") + + +class TestAudioAsync: + """Test asynchronous audio speech-to-text methods""" + + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.asyncio + async def test_speech_to_text_async(self, test_case): + """Test asynchronous speech-to-text with various inputs""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.audio.speech_to_text(test_case["params"]) + + # Verify response structure + assert result["success"] == True + assert result.get("text", None) is not None and isinstance(result["text"], str) + + # Check for chunks + if result.get("chunks", None): + assert isinstance(result["chunks"], list) + + # Check for speaker diarization if requested + if result.get("speakers", None): + assert isinstance(result["speakers"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.asyncio + async def test_speech_to_text_webhook_async(self, test_case): + """Test asynchronous speech-to-text with webhook""" + try: + if test_case.get("blob"): + # Download audio content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.audio.speech_to_text( + blob_content, + test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.audio.speech_to_text(test_case["params"]) + + print(f"Async test {test_case['name']}: Webhook response") + + # Verify webhook response structure + assert result["success"] == True + assert result.get("status") in ["processing", "error"] + assert "id" in result + assert isinstance(result["id"], str) + + except JigsawStackError as e: + # Webhook URLs might fail if invalid + print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file From 17eb2de96fde5d548448b5c91f7824b2d4fd4f60 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:19:58 -0700 Subject: [PATCH 08/95] fix: formatting, unused imports and param encoding. --- jigsawstack/audio.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 2046c58..cb4f199 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -3,9 +3,7 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from typing import Any, Dict, List, cast -from typing_extensions import NotRequired, TypedDict, Literal -from .custom_typing import SupportedAccents +from typing_extensions import Literal from .helpers import build_path from ._types import BaseResponse @@ -80,22 +78,21 @@ def speech_to_text( blob: Union[SpeechToTextParams, bytes], options: Optional[SpeechToTextParams] = None, ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: + options = options or {} + path = "/ai/transcribe" + content_type = options.get("content_type", "application/octet-stream") + headers = {"Content-Type": content_type} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = Request( config=self.config, - path="/ai/transcribe", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/transcribe", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - resp = Request( config=self.config, path=path, @@ -137,20 +134,19 @@ async def speech_to_text( blob: Union[SpeechToTextParams, bytes], options: Optional[SpeechToTextParams] = None, ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: + options = options or {} + path = "/ai/transcribe" + content_type = options.get("content_type", "application/octet-stream") + headers = {"Content-Type": content_type} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, - path="/ai/transcribe", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/transcribe", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - resp = await AsyncRequest( config=self.config, path=path, From 88388918b6944f6d8fe323b451d32ef07f38d24e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:21:42 -0700 Subject: [PATCH 09/95] fix: form requests for multipart blob + params async req. --- jigsawstack/async_request.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 033b39b..b87ca1b 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -3,6 +3,7 @@ from typing_extensions import Literal, TypeVar from .exceptions import NoContentError, raise_for_code_and_type import json +from io import BytesIO RequestVerb = Literal["get", "post", "put", "patch", "delete"] @@ -243,12 +244,26 @@ async def make_request( ) else: if data is not None: + form_data = aiohttp.FormData() + form_data.add_field('file', BytesIO(data), content_type=headers.get("Content-Type", "application/octet-stream"), filename="file") + + if self.params and isinstance(self.params, dict): + for key, value in self.params.items(): + if isinstance(value, bool): + form_data.add_field(key, str(value).lower()) + elif isinstance(value, (list, dict, tuple, int, float)): + form_data.add_field(key, json.dumps(value)) + else: + form_data.add_field(key, str(value)) + + multipart_headers = headers.copy() + multipart_headers.pop('Content-Type', None) + return await session.request( verb, url, - data=data, - params=converted_params, # Use converted params - headers=headers, + data=form_data, + headers=multipart_headers, ) else: return await session.request( From 05dcdc97b4475e51de1aa998932e6acdd94b3ce0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:23:01 -0700 Subject: [PATCH 10/95] fix: param encoding for vision endpoints. --- jigsawstack/vision.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 4bb6ff5..49191af 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -3,7 +3,6 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse @@ -190,6 +189,8 @@ def vocr( blob: Union[VOCRParams, bytes], options: VOCRParams = None, ) -> OCRResponse: + path = "/vocr" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument @@ -201,8 +202,6 @@ def vocr( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/vocr", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -230,17 +229,17 @@ def object_detection( blob: Union[ObjectDetectionParams, bytes], options: ObjectDetectionParams = None, ) -> ObjectDetectionResponse: + path = "/object_detection" + options = options or {} if isinstance(blob, dict): resp = Request( config=self.config, - path="/object_detection", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - - options = options or {} - path = build_path(base_path="/object_detection", params=options) + content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -281,17 +280,17 @@ async def vocr( blob: Union[VOCRParams, bytes], options: VOCRParams = None, ) -> OCRResponse: + path = "/vocr" + options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, - path="/vocr", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/vocr", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} @@ -319,19 +318,19 @@ async def object_detection( blob: Union[ObjectDetectionParams, bytes], options: ObjectDetectionParams = None, ) -> ObjectDetectionResponse: + path = "/object_detection" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = await AsyncRequest( config=self.config, - path="/object_detection", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/object_detection", params=options) content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} From 6819aedfde0226c83ba4ace4cf79a25f11b63510 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 08:28:07 -0700 Subject: [PATCH 11/95] test: defining test cases for STT with format changes. --- tests/test_audio.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 96169d2..1345621 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -158,7 +158,7 @@ def test_speech_to_text(self, test_case): # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure - assert result["success"] == True + assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks @@ -186,14 +186,8 @@ def test_speech_to_text_webhook(self, test_case): else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) - - print(f"Test {test_case['name']}: Webhook response") - # Verify webhook response structure - assert result["success"] == True - assert result.get("status") in ["processing", "error"] - assert "id" in result - assert isinstance(result["id"], str) + assert result["success"] except JigsawStackError as e: # Webhook URLs might fail if invalid @@ -220,7 +214,7 @@ async def test_speech_to_text_async(self, test_case): result = await async_jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure - assert result["success"] == True + assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks @@ -252,11 +246,8 @@ async def test_speech_to_text_webhook_async(self, test_case): print(f"Async test {test_case['name']}: Webhook response") # Verify webhook response structure - assert result["success"] == True - assert result.get("status") in ["processing", "error"] - assert "id" in result - assert isinstance(result["id"], str) - + assert result["success"] + except JigsawStackError as e: # Webhook URLs might fail if invalid print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file From 7c8c46768fa7d444ee91de334de9d7e9d822afe7 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:10:43 -0700 Subject: [PATCH 12/95] test: defining test cases for classification endpoint. --- tests/test_classification.py | 190 ++++++++++++++++++++++++----------- 1 file changed, 134 insertions(+), 56 deletions(-) diff --git a/tests/test_classification.py b/tests/test_classification.py index 6c301c5..98ce725 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,75 +1,118 @@ from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os -# flake8: noqa +load_dotenv() -client = JigsawStack() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -@pytest.mark.parametrize( - "dataset,labels", - [ - ( - [ +TEST_CASES = [ + { + "name": "text_classification_programming", + "params": { + "dataset": [ {"type": "text", "value": "I love programming"}, {"type": "text", "value": "I love reading books"}, {"type": "text", "value": "I love watching movies"}, {"type": "text", "value": "I love playing games"}, ], - [ + "labels": [ {"type": "text", "value": "programming"}, {"type": "text", "value": "reading"}, {"type": "text", "value": "watching"}, {"type": "text", "value": "playing"}, ], - ), - ( - [ + }, + }, + { + "name": "text_classification_sentiment", + "params": { + "dataset": [ {"type": "text", "value": "This is awesome!"}, {"type": "text", "value": "I hate this product"}, {"type": "text", "value": "It's okay, nothing special"}, ], - [ + "labels": [ {"type": "text", "value": "positive"}, {"type": "text", "value": "negative"}, {"type": "text", "value": "neutral"}, ], - ), - ( - [ + }, + }, + { + "name": "text_classification_weather", + "params": { + "dataset": [ {"type": "text", "value": "The weather is sunny today"}, {"type": "text", "value": "It's raining heavily outside"}, {"type": "text", "value": "Snow is falling gently"}, ], - [ + "labels": [ {"type": "text", "value": "sunny"}, {"type": "text", "value": "rainy"}, {"type": "text", "value": "snowy"}, ], - ), - ], -) -def test_classification_text_success_response(dataset, labels) -> None: - params = { - "dataset": dataset, - "labels": labels, - } - try: - result = client.classification.text(params) - print(result) - assert result["success"] == True - except JigsawStackError as e: - print(str(e)) - assert e.message == "Failed to parse API response. Please try again." - - -@pytest.mark.parametrize( - "dataset,labels", - [ - ( - [ + }, + }, + { + "name": "image_classification_fruits", + "params": { + "dataset": [ + { + "type": "image", + "value": "https://as2.ftcdn.net/v2/jpg/02/24/11/57/1000_F_224115780_2ssvcCoTfQrx68Qsl5NxtVIDFWKtAgq2.jpg", + }, + { + "type": "image", + "value": "https://t3.ftcdn.net/jpg/02/95/44/22/240_F_295442295_OXsXOmLmqBUfZreTnGo9PREuAPSLQhff.jpg", + }, + { + "type": "image", + "value": "https://as1.ftcdn.net/v2/jpg/05/54/94/46/1000_F_554944613_okdr3fBwcE9kTOgbLp4BrtVi8zcKFWdP.jpg", + }, + ], + "labels": [ + {"type": "text", "value": "banana"}, + { + "type": "image", + "value": "https://upload.wikimedia.org/wikipedia/commons/8/8a/Banana-Single.jpg", + }, + {"type": "text", "value": "kisses"}, + ], + }, + }, + { + "name": "text_classification_multiple_labels", + "params": { + "dataset": [ + { + "type": "text", + "value": "Python is a great programming language for data science", + }, + { + "type": "text", + "value": "JavaScript is essential for web development", + }, + ], + "labels": [ + {"type": "text", "value": "programming"}, + {"type": "text", "value": "data science"}, + {"type": "text", "value": "web development"}, + ], + "multiple_labels": True, + }, + }, + { + "name": "image_classification_with_multiple_labels", + "params": { + "dataset": [ { "type": "image", "value": "https://as2.ftcdn.net/v2/jpg/02/24/11/57/1000_F_224115780_2ssvcCoTfQrx68Qsl5NxtVIDFWKtAgq2.jpg", @@ -83,7 +126,7 @@ def test_classification_text_success_response(dataset, labels) -> None: "value": "https://as1.ftcdn.net/v2/jpg/05/54/94/46/1000_F_554944613_okdr3fBwcE9kTOgbLp4BrtVi8zcKFWdP.jpg", }, ], - [ + "labels": [ {"type": "text", "value": "banana"}, { "type": "image", @@ -91,18 +134,53 @@ def test_classification_text_success_response(dataset, labels) -> None: }, {"type": "text", "value": "kisses"}, ], - ), - ], -) -def test_classification_image_success_response(dataset, labels) -> None: - params = { - "dataset": dataset, - "labels": labels, - } - try: - result = client.classification.image(params) - print(result) - assert result["success"] == True - except JigsawStackError as e: - print(str(e)) - assert e.message == "Failed to parse API response. Please try again." + }, + }, +] + + +class TestClassificationSync: + """Test synchronous classification methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_classification(self, test_case): + """Test synchronous classification with various inputs""" + try: + result = jigsaw.classification(test_case["params"]) + assert result["success"] == True + assert "predictions" in result + if test_case.get("multiple_labels"): + # Ensure predictions are lists when multiple_labels is True + for prediction in result["predictions"]: + assert isinstance(prediction, list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestClassificationAsync: + """Test asynchronous classification methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_classification_async(self, test_case): + """Test asynchronous classification with various inputs""" + try: + result = await async_jigsaw.classification(test_case["params"]) + assert result["success"] == True + assert "predictions" in result + + if test_case.get("multiple_labels"): + # Ensure predictions are lists when multiple_labels is True + for prediction in result["predictions"]: + assert isinstance(prediction, list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 199eb27bfb0c0979cfffe3c06bc39edc373c802e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:13:25 -0700 Subject: [PATCH 13/95] tests: formatting previously defined test cases. --- tests/test_classification.py | 4 +- tests/test_object_detection.py | 164 ++++++++++++++++----------------- 2 files changed, 82 insertions(+), 86 deletions(-) diff --git a/tests/test_classification.py b/tests/test_classification.py index 98ce725..a5cf66c 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -151,7 +151,7 @@ def test_classification(self, test_case): """Test synchronous classification with various inputs""" try: result = jigsaw.classification(test_case["params"]) - assert result["success"] == True + assert result["success"] assert "predictions" in result if test_case.get("multiple_labels"): # Ensure predictions are lists when multiple_labels is True @@ -175,7 +175,7 @@ async def test_classification_async(self, test_case): """Test asynchronous classification with various inputs""" try: result = await async_jigsaw.classification(test_case["params"]) - assert result["success"] == True + assert result["success"] assert "predictions" in result if test_case.get("multiple_labels"): diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 031459f..8c0f409 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -4,8 +4,11 @@ import pytest import logging from dotenv import load_dotenv -load_dotenv() import os + +load_dotenv() + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -16,88 +19,81 @@ IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" TEST_CASES = [ - { - "name": "with_url_only", - "params": { - "url": IMAGE_URL - }, - "blob": None, - "options": None, - }, - { - "name": "with_blob_only", - "params": None, - "blob": IMAGE_URL, - "options": None, - }, - { - "name": "annotated_image_true", - "blob": IMAGE_URL, - "options": { - "annotated_image": True - }, - }, - { - "name": "with_annotated_image_false", - "blob": IMAGE_URL, - "options": { - "annotated_image": False - }, + { + "name": "with_url_only", + "params": {"url": IMAGE_URL}, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": IMAGE_URL, + "options": None, + }, + { + "name": "annotated_image_true", + "blob": IMAGE_URL, + "options": {"annotated_image": True}, + }, + { + "name": "with_annotated_image_false", + "blob": IMAGE_URL, + "options": {"annotated_image": False}, + }, + { + "name": "with_blob_both_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "annotated_image": True, + "return_type": "url", }, - { - "name": "with_blob_both_features", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection", "gui"], - "annotated_image": True, - "return_type": "url" - }, + }, + { + "name": "with_blob_gui_features", + "blob": IMAGE_URL, + "options": {"features": ["gui"], "annotated_image": False}, + }, + { + "name": "with_blob_object_detection_features", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection"], + "annotated_image": True, + "return_type": "base64", }, - { - "name": "with_blob_gui_features", - "blob": IMAGE_URL, - "options": { - "features": ["gui"], - "annotated_image": False - }, + }, + { + "name": "with_prompts", + "blob": IMAGE_URL, + "options": { + "prompts": ["castle", "tree"], + "annotated_image": True, }, - { - "name": "with_blob_object_detection_features", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection"], - "annotated_image": True, - "return_type": "base64" - }, + }, + { + "name": "with_all_options", + "blob": IMAGE_URL, + "options": { + "features": ["object_detection", "gui"], + "prompts": ["car", "road", "tree"], + "annotated_image": True, + "return_type": "base64", + "return_masks": False, }, - { - "name": "with_prompts", - "blob": IMAGE_URL, - "options": { - "prompts": ["castle", "tree"], - "annotated_image": True, - }, - }, - { - "name": "with_all_options", - "blob": IMAGE_URL, - "options": { - "features": ["object_detection", "gui"], - "prompts": ["car", "road", "tree"], - "annotated_image": True, - "return_type": "base64", - "return_masks": False, - }, - }, - ] + }, +] class TestObjectDetectionSync: """Test synchronous object detection methods""" - + sync_test_cases = TEST_CASES - - @pytest.mark.parametrize("test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]) + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) def test_object_detection(self, test_case): """Test synchronous object detection with various inputs""" try: @@ -105,15 +101,14 @@ def test_object_detection(self, test_case): # Download blob content blob_content = requests.get(test_case["blob"]).content result = jigsaw.vision.object_detection( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) - + print(f"Test {test_case['name']}: {result}") - assert result["success"] == True + assert result["success"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -123,7 +118,9 @@ class TestObjectDetectionAsync: async_test_cases = TEST_CASES - @pytest.mark.parametrize("test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]) + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) @pytest.mark.asyncio async def test_object_detection_async(self, test_case): """Test asynchronous object detection with various inputs""" @@ -132,14 +129,13 @@ async def test_object_detection_async(self, test_case): # Download blob content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.vision.object_detection( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly result = await async_jigsaw.vision.object_detection(test_case["params"]) - + print(f"Test {test_case['name']}: {result}") - assert result["success"] == True + assert result["success"] except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From b695d3648518bf4759baf085e0adfcc4483efcc6 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 13:15:18 -0700 Subject: [PATCH 14/95] chore: clean up redudant test file, actual store test file is test_file_store. --- tests/test_store.py | 51 --------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 tests/test_store.py diff --git a/tests/test_store.py b/tests/test_store.py deleted file mode 100644 index 4d59ac7..0000000 --- a/tests/test_store.py +++ /dev/null @@ -1,51 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestAsyncFileOperations: - """ - Test class for async file operations. - Add your file operation tests here. - """ - - def test_async_file_upload(self): - # Template for future file upload tests - pass - - def test_async_file_retrieval(self): - # Template for future file retrieval tests - pass - - def test_async_file_deletion(self): - # Template for future file deletion tests - pass - - -# Example file upload test -# Uncomment and modify as needed -""" -def test_async_file_upload_example(): - async def _test(): - client = AsyncJigsawStack() - try: - file_content = b"test file content" - result = await client.store.upload( - file_content, - {"filename": "test.txt", "overwrite": True} - ) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) -""" From 621d7ba30d56d94e0c0637f0673fea973a38358f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:24:41 -0700 Subject: [PATCH 15/95] fix: naming convention and drop deprecated overflow mode chunk. --- jigsawstack/{embeddingV2.py => embedding_v2.py} | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) rename jigsawstack/{embeddingV2.py => embedding_v2.py} (97%) diff --git a/jigsawstack/embeddingV2.py b/jigsawstack/embedding_v2.py similarity index 97% rename from jigsawstack/embeddingV2.py rename to jigsawstack/embedding_v2.py index d7559bb..64c7d11 100644 --- a/jigsawstack/embeddingV2.py +++ b/jigsawstack/embedding_v2.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from .embedding import Chunk @@ -14,7 +13,7 @@ class EmbeddingV2Params(TypedDict): type: Literal["text", "text-other", "image", "audio", "pdf"] url: NotRequired[str] file_store_key: NotRequired[str] - token_overflow_mode: NotRequired[Literal["truncate", "chunk", "error"]] = "chunk" + token_overflow_mode: NotRequired[Literal["truncate", "error"]] speaker_fingerprint: NotRequired[bool] From 91f8e0fdfaf49c13792cf02b3dabd1b5d2ea2a27 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:33:00 -0700 Subject: [PATCH 16/95] test: defining test cases for embedding v1 & v2 --- tests/test_embedding.py | 327 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 tests/test_embedding.py diff --git a/tests/test_embedding.py b/tests/test_embedding.py new file mode 100644 index 0000000..0106751 --- /dev/null +++ b/tests/test_embedding.py @@ -0,0 +1,327 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" +SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" +SAMPLE_PDF_URL = ( + "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" +) + +# Test cases for Embedding V1 +EMBEDDING_V1_TEST_CASES = [ + { + "name": "text_embedding_basic", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + }, + }, + { + "name": "text_embedding_with_truncate", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, # Long text to test truncation + "token_overflow_mode": "truncate", + }, + }, + { + "name": "text_embedding_with_error_mode", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + "token_overflow_mode": "error", + }, + }, + { + "name": "image_embedding_from_url", + "params": { + "type": "image", + "url": SAMPLE_IMAGE_URL, + }, + }, + { + "name": "audio_embedding_from_url", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + }, + }, + { + "name": "pdf_embedding_from_url", + "params": { + "type": "pdf", + "url": SAMPLE_PDF_URL, + }, + }, + { + "name": "text_other_type", + "params": { + "type": "text-other", + "text": "This is a different text type for embedding", + }, + }, +] + +# Test cases for Embedding V2 +EMBEDDING_V2_TEST_CASES = [ + { + "name": "text_embedding_v2_basic", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + }, + }, + { + "name": "text_embedding_v2_with_error", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, # Long text to test chunking + "token_overflow_mode": "error", + }, + }, + { + "name": "text_embedding_v2_with_truncate", + "params": { + "type": "text", + "text": SAMPLE_TEXT * 100, + "token_overflow_mode": "truncate", + }, + }, + { + "name": "text_embedding_v2_with_error_mode", + "params": { + "type": "text", + "text": SAMPLE_TEXT, + "token_overflow_mode": "error", + }, + }, + { + "name": "image_embedding_v2_from_url", + "params": { + "type": "image", + "url": SAMPLE_IMAGE_URL, + }, + }, + { + "name": "audio_embedding_v2_basic", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + }, + }, + { + "name": "audio_embedding_v2_with_speaker_fingerprint", + "params": { + "type": "audio", + "url": SAMPLE_AUDIO_URL, + "speaker_fingerprint": True, + }, + }, + { + "name": "pdf_embedding_v2_from_url", + "params": { + "type": "pdf", + "url": SAMPLE_PDF_URL, + }, + }, +] + +# Test cases for blob inputs +BLOB_TEST_CASES = [ + { + "name": "image_blob_embedding", + "blob_url": SAMPLE_IMAGE_URL, + "options": { + "type": "image", + }, + }, + { + "name": "pdf_blob_embedding", + "blob_url": SAMPLE_PDF_URL, + "options": { + "type": "pdf", + }, + }, +] + + +class TestEmbeddingV1Sync: + """Test synchronous Embedding V1 methods""" + + sync_test_cases = EMBEDDING_V1_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_embedding_v1(self, test_case): + """Test synchronous embedding v1 with various inputs""" + try: + result = jigsaw.embedding(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + if "chunks" in result: + assert isinstance(result["chunks"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + def test_embedding_v1_blob(self, test_case): + """Test synchronous embedding v1 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.embedding(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV1Async: + """Test asynchronous Embedding V1 methods""" + + async_test_cases = EMBEDDING_V1_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_embedding_v1_async(self, test_case): + """Test asynchronous embedding v1 with various inputs""" + try: + result = await async_jigsaw.embedding(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + if "chunks" in result: + assert isinstance(result["chunks"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_embedding_v1_blob_async(self, test_case): + """Test asynchronous embedding v1 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.embedding(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV2Sync: + """Test synchronous Embedding V2 methods""" + + sync_test_cases = EMBEDDING_V2_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_embedding_v2(self, test_case): + """Test synchronous embedding v2 with various inputs""" + try: + result = jigsaw.embeddingV2(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + + # Check for chunks when chunking mode is used + if test_case["params"].get("token_overflow_mode") == "error": + assert "chunks" in result + assert isinstance(result["chunks"], list) + + # Check for speaker embeddings when speaker fingerprint is requested + if test_case["params"].get("speaker_fingerprint"): + assert "speaker_embeddings" in result + assert isinstance(result["speaker_embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + def test_embedding_v2_blob(self, test_case): + """Test synchronous embedding v2 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.embeddingV2(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestEmbeddingV2Async: + """Test asynchronous Embedding V2 methods""" + + async_test_cases = EMBEDDING_V2_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_embedding_v2_async(self, test_case): + """Test asynchronous embedding v2 with various inputs""" + try: + result = await async_jigsaw.embeddingV2(test_case["params"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + + # Check for chunks when chunking mode is used + if test_case["params"].get("token_overflow_mode") == "error": + assert "chunks" in result + assert isinstance(result["chunks"], list) + + # Check for speaker embeddings when speaker fingerprint is requested + if test_case["params"].get("speaker_fingerprint"): + assert "speaker_embeddings" in result + assert isinstance(result["speaker_embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_embedding_v2_blob_async(self, test_case): + """Test asynchronous embedding v2 with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.embeddingV2(blob_content, test_case["options"]) + assert result["success"] + assert "embeddings" in result + assert isinstance(result["embeddings"], list) + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From a629fa25f5344522a850a04adf162b9ae31f3b40 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:45:26 -0700 Subject: [PATCH 17/95] feat: format fixes and new testcases for sentiment. --- jigsawstack/sentiment.py | 3 +- tests/test_sentiment.py | 145 +++++++++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 14 deletions(-) diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index 8970110..805dd80 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -1,8 +1,7 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index cd3c602..e46e44f 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -1,21 +1,142 @@ -from unittest.mock import MagicMock -import unittest from jigsawstack.exceptions import JigsawStackError import jigsawstack - import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +TEST_CASES = [ + { + "name": "positive_sentiment_excited", + "params": { + "text": "I am so excited about this new product! It's absolutely amazing and I can't wait to use it every day." + }, + }, + { + "name": "negative_sentiment_disappointed", + "params": { + "text": "I'm really disappointed with this purchase. The quality is terrible and it broke after just one day." + }, + }, + { + "name": "neutral_sentiment_factual", + "params": { + "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." + }, + }, + { + "name": "mixed_sentiment_paragraph", + "params": { + "text": "The product arrived on time which was great. However, the packaging was damaged. The item itself works fine, but the instructions were confusing." + }, + }, + { + "name": "positive_sentiment_love", + "params": { + "text": "I absolutely love this! Best purchase I've made all year. Highly recommend to everyone!" + }, + }, + { + "name": "negative_sentiment_angry", + "params": { + "text": "This is unacceptable! I want a refund immediately. Worst customer service ever!" + }, + }, + { + "name": "single_sentence_positive", + "params": {"text": "This made my day!"}, + }, + { + "name": "single_sentence_negative", + "params": {"text": "I hate this."}, + }, + { + "name": "complex_multi_sentence", + "params": { + "text": "The first part of the movie was boring and I almost fell asleep. But then it got really exciting! The ending was spectacular and now it's one of my favorites." + }, + }, + { + "name": "question_sentiment", + "params": { + "text": "Why is this product so amazing? I can't believe how well it works!" + }, + }, +] -# flake8: noqa -client = jigsawstack.JigsawStack() +class TestSentimentSync: + """Test synchronous sentiment analysis methods""" + sync_test_cases = TEST_CASES -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestSentimentAPI(unittest.TestCase): - def test_sentiment_response_success(self) -> None: - params = {"text": "I am so excited"} + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_sentiment_analysis(self, test_case): + """Test synchronous sentiment analysis with various inputs""" try: - result = client.sentiment(params) - assert result["success"] == True + result = jigsaw.sentiment(test_case["params"]) + + assert result["success"] + assert "sentiment" in result + assert "emotion" in result["sentiment"] + assert "sentiment" in result["sentiment"] + assert "score" in result["sentiment"] + + # Check if sentences analysis is included + if "sentences" in result["sentiment"]: + assert isinstance(result["sentiment"]["sentences"], list) + for sentence in result["sentiment"]["sentences"]: + assert "text" in sentence + assert "sentiment" in sentence + assert "emotion" in sentence + assert "score" in sentence + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSentimentAsync: + """Test asynchronous sentiment analysis methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_sentiment_analysis_async(self, test_case): + """Test asynchronous sentiment analysis with various inputs""" + try: + result = await async_jigsaw.sentiment(test_case["params"]) + + assert result["success"] + assert "sentiment" in result + assert "emotion" in result["sentiment"] + assert "sentiment" in result["sentiment"] + assert "score" in result["sentiment"] + + # Check if sentences analysis is included + if "sentences" in result["sentiment"]: + assert isinstance(result["sentiment"]["sentences"], list) + for sentence in result["sentiment"]["sentences"]: + assert "text" in sentence + assert "sentiment" in sentence + assert "emotion" in sentence + assert "score" in sentence + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 5ea1c3d0e9c14e58896e58676546bb83995a3814 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:46:33 -0700 Subject: [PATCH 18/95] chore: deleting outdated vision test file. --- tests/test_vision.py | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 tests/test_vision.py diff --git a/tests/test_vision.py b/tests/test_vision.py deleted file mode 100644 index 7d8fcf0..0000000 --- a/tests/test_vision.py +++ /dev/null @@ -1,28 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_vocr_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.vision.vocr( - { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg?t=2024-03-22T09%3A22%3A48.442Z", - "prompt": ["Hello"], - } - ) - - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From 0e8bdb392bf5f25c3d81503f24aee413260bad6e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:50:36 -0700 Subject: [PATCH 19/95] feat: updated formating for JigsawStack module. --- jigsawstack/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index e860936..2c5d775 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -16,7 +16,7 @@ from .image_generation import ImageGeneration, AsyncImageGeneration from .classification import Classification, AsyncClassification from .prompt_engine import PromptEngine, AsyncPromptEngine -from .embeddingV2 import EmbeddingV2, AsyncEmbeddingV2 +from .embedding_v2 import EmbeddingV2, AsyncEmbeddingV2 class JigsawStack: @@ -51,7 +51,7 @@ def __init__( if api_url is None: api_url = os.environ.get("JIGSAWSTACK_API_URL") if api_url is None: - api_url = f"https://api.jigsawstack.com/" + api_url = "https://api.jigsawstack.com/" self.api_key = api_key self.api_url = api_url @@ -171,7 +171,7 @@ def __init__( if api_url is None: api_url = os.environ.get("JIGSAWSTACK_API_URL") if api_url is None: - api_url = f"https://api.jigsawstack.com/" + api_url = "https://api.jigsawstack.com/" self.api_key = api_key self.api_url = api_url From 24fc7913cb1b96237e9f511a14183e50c4cdb270 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:51:23 -0700 Subject: [PATCH 20/95] feat: updated formatting for async_request.py --- jigsawstack/async_request.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index b87ca1b..4f90a2c 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -245,20 +245,23 @@ async def make_request( else: if data is not None: form_data = aiohttp.FormData() - form_data.add_field('file', BytesIO(data), content_type=headers.get("Content-Type", "application/octet-stream"), filename="file") - + form_data.add_field( + "file", + BytesIO(data), + content_type=headers.get( + "Content-Type", "application/octet-stream" + ), + filename="file", + ) + if self.params and isinstance(self.params, dict): - for key, value in self.params.items(): - if isinstance(value, bool): - form_data.add_field(key, str(value).lower()) - elif isinstance(value, (list, dict, tuple, int, float)): - form_data.add_field(key, json.dumps(value)) - else: - form_data.add_field(key, str(value)) - + form_data.add_field( + "body", json.dumps(self.params), content_type="application/json" + ) + multipart_headers = headers.copy() - multipart_headers.pop('Content-Type', None) - + multipart_headers.pop("Content-Type", None) + return await session.request( verb, url, From b7bc5befdfea05916123add9b669750a4950e25c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:52:14 -0700 Subject: [PATCH 21/95] chore: deleting outdated async test cases for embedding. --- tests/test_embedding_async.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 tests/test_embedding_async.py diff --git a/tests/test_embedding_async.py b/tests/test_embedding_async.py deleted file mode 100644 index bf2e1e6..0000000 --- a/tests/test_embedding_async.py +++ /dev/null @@ -1,23 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_embedding_generation_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.embedding({"text": "Hello, World!", "type": "text"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) From 3974f32f835324adc35c55693ed08946c76afc1e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 16:52:56 -0700 Subject: [PATCH 22/95] feat: updating demo url across jigsawstack-python. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index edf4020..e13c6bf 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ VOCR: ```py params = { - "url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg?t=2024-03-22T09%3A22%3A48.442Z" + "url": "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D?t=2024-03-22T09%3A22%3A48.442Z" } result = jigsaw.vision.vocr(params) ``` From efcff9c1c7b03a28b07f61b980c8970ce77a588a Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:27:31 -0700 Subject: [PATCH 23/95] feat: updating type to accept float for prediciton, introducing new test cases for prediction endpoint. --- jigsawstack/prediction.py | 9 +- tests/test_prediction.py | 191 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 tests/test_prediction.py diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index d24168b..84bfbf9 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -1,15 +1,14 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict +from typing_extensions import TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse class Dataset(TypedDict): - value: Union[int, str] + value: Union[int, float, str] """ The value of the dataset. """ @@ -32,6 +31,10 @@ class PredictionParams(TypedDict): class PredictionResponse(BaseResponse): + steps: int + """ + The number of steps predicted. + """ prediction: List[Dataset] """ The predictions made on the dataset. diff --git a/tests/test_prediction.py b/tests/test_prediction.py new file mode 100644 index 0000000..6069140 --- /dev/null +++ b/tests/test_prediction.py @@ -0,0 +1,191 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os +from datetime import datetime, timedelta + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + + +def generate_dates(start_date, num_days): + dates = [] + for i in range(num_days): + date = start_date + timedelta(days=i) + dates.append(date.strftime("%Y-%m-%d %H:%M:%S")) + return dates + + +start = datetime(2024, 1, 1) +dates = generate_dates(start, 30) +dates = [str(date) for date in dates] + +TEST_CASES = [ + { + "name": "linear_growth_pattern", + "params": { + "dataset": [{"date": dates[i], "value": 100 + (i * 10)} for i in range(10)], + "steps": 5, + }, + }, + { + "name": "exponential_growth_pattern", + "params": { + "dataset": [{"date": dates[i], "value": 100 * (1.1**i)} for i in range(10)], + "steps": 3, + }, + }, + { + "name": "seasonal_pattern", + "params": { + "dataset": [ + {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) + ], + "steps": 7, + }, + }, + { + "name": "single_step_prediction", + "params": { + "dataset": [{"date": dates[i], "value": 200 + (i * 5)} for i in range(15)], + "steps": 1, + }, + }, + { + "name": "large_dataset_prediction", + "params": { + "dataset": [ + {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) + ], + "steps": 10, + }, + }, + { + "name": "declining_trend", + "params": { + "dataset": [{"date": dates[i], "value": 500 - (i * 10)} for i in range(10)], + "steps": 5, + }, + }, + { + "name": "volatile_data", + "params": { + "dataset": [ + {"date": dates[0], "value": 100}, + {"date": dates[1], "value": 150}, + {"date": dates[2], "value": 80}, + {"date": dates[3], "value": 200}, + {"date": dates[4], "value": 120}, + {"date": dates[5], "value": 180}, + {"date": dates[6], "value": 90}, + {"date": dates[7], "value": 160}, + ], + "steps": 4, + }, + }, + { + "name": "constant_values", + "params": { + "dataset": [{"date": dates[i], "value": 100} for i in range(10)], + "steps": 3, + }, + }, + { + "name": "string_values_prediction", + "params": { + "dataset": [ + {"date": dates[0], "value": "33.4"}, + {"date": dates[1], "value": "33.6"}, + {"date": dates[2], "value": "33.6"}, + {"date": dates[3], "value": "33.0"}, + {"date": dates[4], "value": "265.0"}, + {"date": dates[5], "value": "80"}, + {"date": dates[6], "value": "90.45"}, + ], + "steps": 3, + }, + }, + { + "name": "minimal_dataset", + "params": { + "dataset": [ + {"date": dates[0], "value": 50}, + {"date": dates[1], "value": 60}, + {"date": dates[2], "value": 70}, + {"date": dates[3], "value": 80}, + {"date": dates[4], "value": 90}, + ], + "steps": 2, + }, + }, +] + + +class TestPredictionSync: + """Test synchronous prediction methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_prediction(self, test_case): + """Test synchronous prediction with various inputs""" + try: + result = jigsaw.prediction(test_case["params"]) + + assert result["success"] + assert "prediction" in result + assert isinstance(result["prediction"], list) + + # Verify the number of predictions matches the requested steps + assert len(result["prediction"]) == test_case["params"]["steps"] + + # Verify each prediction has the required fields + for prediction in result["prediction"]: + assert "date" in prediction + assert "value" in prediction + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestPredictionAsync: + """Test asynchronous prediction methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_prediction_async(self, test_case): + """Test asynchronous prediction with various inputs""" + try: + result = await async_jigsaw.prediction(test_case["params"]) + + assert result["success"] + assert "prediction" in result + assert isinstance(result["prediction"], list) + + # Verify the number of predictions matches the requested steps + assert len(result["prediction"]) == test_case["params"]["steps"] + + # Verify each prediction has the required fields + for prediction in result["prediction"]: + assert "date" in prediction + assert "value" in prediction + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 2a47fd0c411a54e4bb227804334a00eebc1362cf Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:37:49 -0700 Subject: [PATCH 24/95] test: defining new test cases for summary endpoint. --- tests/test_summary.py | 189 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 tests/test_summary.py diff --git a/tests/test_summary.py b/tests/test_summary.py new file mode 100644 index 0000000..46b5229 --- /dev/null +++ b/tests/test_summary.py @@ -0,0 +1,189 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +LONG_TEXT = """ +Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. +From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. +Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. +Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. +Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. +However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. +As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. +The future of AI holds immense promise, but it will require careful planning, regulation, and collaboration between technologists, policymakers, and society at large to realize its full potential while mitigating its risks. +""" + +ARTICLE_URL = "https://en.wikipedia.org/wiki/Artificial_intelligence" +PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + +TEST_CASES = [ + { + "name": "text_summary_default", + "params": { + "text": LONG_TEXT, + }, + }, + { + "name": "text_summary_with_text_type", + "params": { + "text": LONG_TEXT, + "type": "text", + }, + }, + { + "name": "text_summary_with_points_type", + "params": { + "text": LONG_TEXT, + "type": "points", + }, + }, + { + "name": "text_summary_with_max_points", + "params": { + "text": LONG_TEXT, + "type": "points", + "max_points": 5, + }, + }, + { + "name": "text_summary_with_max_characters", + "params": { + "text": LONG_TEXT, + "type": "text", + "max_characters": 200, + }, + }, + { + "name": "short_text_summary", + "params": { + "text": "This is a short text that doesn't need much summarization.", + }, + }, + { + "name": "url_summary_default", + "params": { + "url": ARTICLE_URL, + }, + }, + { + "name": "url_summary_with_text_type", + "params": { + "url": ARTICLE_URL, + "type": "text", + }, + }, + { + "name": "url_summary_with_points_type", + "params": { + "url": ARTICLE_URL, + "type": "points", + "max_points": 7, + }, + }, + { + "name": "pdf_url_summary", + "params": { + "url": PDF_URL, + "type": "text", + }, + }, + { + "name": "complex_text_with_points_and_limit", + "params": { + "text": LONG_TEXT * 3, # Triple the text for more content + "type": "points", + "max_points": 10, + }, + }, + { + "name": "technical_text_summary", + "params": { + "text": """ + Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. + Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. + Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. + Reinforcement learning enables agents to learn optimal behaviors through trial and error interactions with an environment. + """, + "type": "points", + "max_points": 4, + }, + }, +] + + +class TestSummarySync: + """Test synchronous summary methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_summary(self, test_case): + """Test synchronous summary with various inputs""" + try: + result = jigsaw.summary(test_case["params"]) + + assert result["success"] + assert "summary" in result + + if test_case["params"].get("type") == "points": + assert isinstance(result["summary"], list) + if "max_points" in test_case["params"]: + assert len(result["summary"]) <= test_case["params"]["max_points"] + else: + assert isinstance(result["summary"], str) + if "max_characters" in test_case["params"]: + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSummaryAsync: + """Test asynchronous summary methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_summary_async(self, test_case): + """Test asynchronous summary with various inputs""" + try: + result = await async_jigsaw.summary(test_case["params"]) + + assert result["success"] + assert "summary" in result + + if test_case["params"].get("type") == "points": + assert isinstance(result["summary"], list) + if "max_points" in test_case["params"]: + assert len(result["summary"]) <= test_case["params"]["max_points"] + else: + assert isinstance(result["summary"], str) + if "max_characters" in test_case["params"]: + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 7771416c08b171c8242c0fd1aeb0dad17687f204 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:47:59 -0700 Subject: [PATCH 25/95] feat: defining version for requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0a1a976..351d200 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ requests>=2.31.0 typing_extensions -aiohttp \ No newline at end of file +aiohttp>=3.12.15 \ No newline at end of file From 27cb92dff0dd2c6e982cbc70a4de493386586386 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:52:19 -0700 Subject: [PATCH 26/95] test: defining test cases for text_2_sql service. --- tests/test_sql.py | 272 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 tests/test_sql.py diff --git a/tests/test_sql.py b/tests/test_sql.py new file mode 100644 index 0000000..397b855 --- /dev/null +++ b/tests/test_sql.py @@ -0,0 +1,272 @@ +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +# Sample schemas for different databases +MYSQL_SCHEMA = """ +CREATE TABLE users ( + id INT PRIMARY KEY AUTO_INCREMENT, + username VARCHAR(255) NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE orders ( + id INT PRIMARY KEY AUTO_INCREMENT, + user_id INT, + product_name VARCHAR(255), + quantity INT, + price DECIMAL(10, 2), + order_date DATE, + FOREIGN KEY (user_id) REFERENCES users(id) +); +""" + +POSTGRESQL_SCHEMA = """ +CREATE TABLE employees ( + id SERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + department VARCHAR(50), + salary NUMERIC(10, 2), + hire_date DATE, + is_active BOOLEAN DEFAULT true +); + +CREATE TABLE departments ( + id SERIAL PRIMARY KEY, + name VARCHAR(50) UNIQUE NOT NULL, + budget NUMERIC(12, 2), + manager_id INTEGER REFERENCES employees(id) +); +""" + +SQLITE_SCHEMA = """ +CREATE TABLE products ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + category TEXT, + price REAL, + stock_quantity INTEGER DEFAULT 0 +); + +CREATE TABLE sales ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + product_id INTEGER, + quantity INTEGER, + sale_date TEXT, + total_amount REAL, + FOREIGN KEY (product_id) REFERENCES products(id) +); +""" + +TEST_CASES = [ + { + "name": "mysql_simple_select", + "params": { + "prompt": "Get all users from the users table", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "mysql_join_query", + "params": { + "prompt": "Get all orders with user information for orders placed in the last 30 days", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "mysql_aggregate_query", + "params": { + "prompt": "Calculate the total revenue per user", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "postgresql_simple_select", + "params": { + "prompt": "Find all active employees", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "postgresql_complex_join", + "params": { + "prompt": "Get all departments with their manager names and department budgets greater than 100000", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "postgresql_window_function", + "params": { + "prompt": "Rank employees by salary within each department", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "sqlite_simple_query", + "params": { + "prompt": "List all products in the electronics category", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "sqlite_aggregate_with_group", + "params": { + "prompt": "Calculate total sales amount for each product", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "default_database_type", + "params": { + "prompt": "Select all records from users table where email contains 'example.com'", + "sql_schema": MYSQL_SCHEMA, + # No database specified, should use default + }, + }, + { + "name": "complex_multi_table_query", + "params": { + "prompt": "Find users who have placed more than 5 orders with total value exceeding 1000", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "insert_query", + "params": { + "prompt": "Insert a new user with username 'john_doe' and email 'john@example.com'", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "update_query", + "params": { + "prompt": "Update the salary of all employees in the IT department by 10%", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, + { + "name": "delete_query", + "params": { + "prompt": "Delete all products with zero stock quantity", + "sql_schema": SQLITE_SCHEMA, + "database": "sqlite", + }, + }, + { + "name": "subquery_example", + "params": { + "prompt": "Find all users who have never placed an order", + "sql_schema": MYSQL_SCHEMA, + "database": "mysql", + }, + }, + { + "name": "date_filtering", + "params": { + "prompt": "Get all employees hired in the last year", + "sql_schema": POSTGRESQL_SCHEMA, + "database": "postgresql", + }, + }, +] + + +class TestSQLSync: + """Test synchronous SQL text-to-sql methods""" + + sync_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_text_to_sql(self, test_case): + """Test synchronous text-to-sql with various inputs""" + try: + result = jigsaw.text_to_sql(test_case["params"]) + + assert result["success"] + assert "sql" in result + assert isinstance(result["sql"], str) + assert len(result["sql"]) > 0 + + # Basic SQL validation - check if it contains SQL keywords + sql_lower = result["sql"].lower() + sql_keywords = [ + "select", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + ] + assert any(keyword in sql_lower for keyword in sql_keywords), ( + "Generated SQL should contain valid SQL keywords" + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSQLAsync: + """Test asynchronous SQL text-to-sql methods""" + + async_test_cases = TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_text_to_sql_async(self, test_case): + """Test asynchronous text-to-sql with various inputs""" + try: + result = await async_jigsaw.text_to_sql(test_case["params"]) + + assert result["success"] + assert "sql" in result + assert isinstance(result["sql"], str) + assert len(result["sql"]) > 0 + + sql_lower = result["sql"].lower() + sql_keywords = [ + "select", + "insert", + "update", + "delete", + "create", + "alter", + "drop", + ] + assert any(keyword in sql_lower for keyword in sql_keywords), ( + "Generated SQL should contain valid SQL keywords" + ) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 974f46fba58674d950d34c2e995ed189e9108080 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 17:54:16 -0700 Subject: [PATCH 27/95] feat: updating formating for sql dot py --- jigsawstack/sql.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index d2dfc3b..efac7be 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -1,8 +1,7 @@ -from typing import Any, Dict, List, Union, cast, Literal +from typing import Any, Dict, Union, cast, Literal from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse From c1b6df1a900730b5eb00964de30e047085c190fe Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:03:20 -0700 Subject: [PATCH 28/95] test: defining test cases for validation services. --- tests/test_validate.py | 457 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 431 insertions(+), 26 deletions(-) diff --git a/tests/test_validate.py b/tests/test_validate.py index 51b8d3d..f6219b1 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -1,42 +1,447 @@ -from unittest.mock import MagicMock -import unittest +import requests from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack +import jigsawstack import pytest -import asyncio import logging +from dotenv import load_dotenv +import os + +load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +# Sample URLs for NSFW testing +SAFE_IMAGE_URL = ( + "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" +) +POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + +SPAM_CHECK_TEST_CASES = [ + { + "name": "single_text_not_spam", + "params": { + "text": "I had a great experience with your product. The customer service was excellent!" + }, + }, + { + "name": "single_text_potential_spam", + "params": { + "text": "CLICK HERE NOW!!! FREE MONEY!!! Win $1000000 instantly! No credit card required! Act NOW!" + }, + }, + { + "name": "multiple_texts_mixed", + "params": { + "text": [ + "Thank you for your email. I'll get back to you soon.", + "BUY NOW! LIMITED TIME OFFER! 90% OFF EVERYTHING!", + "The meeting is scheduled for 3 PM tomorrow.", + ] + }, + }, + { + "name": "professional_email", + "params": { + "text": "Dear John, I hope this email finds you well. I wanted to follow up on our discussion from yesterday." + }, + }, + { + "name": "marketing_spam", + "params": { + "text": "Congratulations! You've been selected as our lucky winner! Claim your prize now at this link: bit.ly/win" + }, + }, +] + +# Spell Check Test Cases +SPELL_CHECK_TEST_CASES = [ + { + "name": "text_with_no_errors", + "params": {"text": "The quick brown fox jumps over the lazy dog."}, + }, + { + "name": "text_with_spelling_errors", + "params": {"text": "Thiss sentense has severel speling erors in it."}, + }, + { + "name": "text_with_language_code", + "params": {"text": "I recieved the pacakge yesterday.", "language_code": "en"}, + }, + { + "name": "mixed_correct_and_incorrect", + "params": { + "text": "The weather is beatiful today, but tommorow might be diferent." + }, + }, + { + "name": "technical_text", + "params": {"text": "The algorythm processes the datbase queries eficiently."}, + }, +] + +# Profanity Test Cases +PROFANITY_TEST_CASES = [ + { + "name": "clean_text", + "params": {"text": "This is a perfectly clean and professional message."}, + }, + { + "name": "text_with_profanity", + "params": { + "text": "This fucking thing is not working properly.", + "censor_replacement": "****", + }, + }, + { + "name": "text_with_custom_censor", + "params": { + "text": "What the fuck is going on here?", + "censor_replacement": "[CENSORED]", + }, + }, + { + "name": "mixed_clean_and_profane", + "params": {"text": "The weather is nice but this damn traffic is terrible."}, + }, + { + "name": "no_censor_replacement", + "params": {"text": "This text might contain some inappropriate words."}, + }, +] -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_spam_check_response(): - async def _test(): - client = AsyncJigsawStack() +# NSFW Test Cases +NSFW_TEST_CASES = [ + { + "name": "safe_image_url", + "params": {"url": SAFE_IMAGE_URL}, + }, + { + "name": "landscape_image_url", + "params": {"url": POTENTIALLY_NSFW_URL}, + }, +] + +# NSFW Blob Test Cases +NSFW_BLOB_TEST_CASES = [ + { + "name": "safe_image_blob", + "blob_url": SAFE_IMAGE_URL, + "options": {}, + }, +] + + +class TestSpamCheckSync: + """Test synchronous spam check methods""" + + @pytest.mark.parametrize( + "test_case", + SPAM_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], + ) + def test_spam_check(self, test_case): + """Test synchronous spam check with various inputs""" try: - result = await client.validate.spamcheck({"text": "I am happy!"}) - logger.info(result) - assert result["success"] == True + result = jigsaw.validate.spamcheck(test_case["params"]) + + assert result["success"] + assert "check" in result + + # Check structure based on input type + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["check"], list) + for check in result["check"]: + assert "is_spam" in check + assert "score" in check + assert isinstance(check["is_spam"], bool) + assert 0 <= check["score"] <= 1 + else: + assert "is_spam" in result["check"] + assert "score" in result["check"] + assert isinstance(result["check"]["is_spam"], bool) + assert 0 <= result["check"]["score"] <= 1 + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - asyncio.run(_test()) +class TestSpellCheckSync: + """Test synchronous spell check methods""" -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -def test_async_spell_check_response(): - async def _test(): - client = AsyncJigsawStack() + @pytest.mark.parametrize( + "test_case", + SPELL_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], + ) + def test_spell_check(self, test_case): + """Test synchronous spell check with various inputs""" try: - result = await client.validate.spellcheck( - { - "text": "All the world's a stage, and all the men and women merely players. They have their exits and their entrances; And one man in his time plays many parts" - } - ) - logger.info(result) - assert result["success"] == True + result = jigsaw.validate.spellcheck(test_case["params"]) + + assert result["success"] + assert "misspellings_found" in result + assert "misspellings" in result + assert "auto_correct_text" in result + assert isinstance(result["misspellings_found"], bool) + assert isinstance(result["misspellings"], list) + assert isinstance(result["auto_correct_text"], str) + + # Check misspellings structure + for misspelling in result["misspellings"]: + assert "word" in misspelling + assert "startIndex" in misspelling + assert "endIndex" in misspelling + assert "expected" in misspelling + assert "auto_corrected" in misspelling + assert isinstance(misspelling["expected"], list) + assert isinstance(misspelling["auto_corrected"], bool) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestProfanitySync: + """Test synchronous profanity check methods""" + + @pytest.mark.parametrize( + "test_case", + PROFANITY_TEST_CASES, + ids=[tc["name"] for tc in PROFANITY_TEST_CASES], + ) + def test_profanity_check(self, test_case): + """Test synchronous profanity check with various inputs""" + try: + result = jigsaw.validate.profanity(test_case["params"]) + + assert result["success"] + assert "clean_text" in result + assert "profanities" in result + assert "profanities_found" in result + assert isinstance(result["profanities_found"], bool) + assert isinstance(result["profanities"], list) + assert isinstance(result["clean_text"], str) + + # Check profanities structure + for profanity in result["profanities"]: + assert "profanity" in profanity + assert "startIndex" in profanity + assert "endIndex" in profanity + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestNSFWSync: + """Test synchronous NSFW check methods""" + + @pytest.mark.parametrize( + "test_case", NSFW_TEST_CASES, ids=[tc["name"] for tc in NSFW_TEST_CASES] + ) + def test_nsfw_check(self, test_case): + """Test synchronous NSFW check with various inputs""" + try: + result = jigsaw.validate.nsfw(test_case["params"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + assert isinstance(result["nsfw"], bool) + assert isinstance(result["nudity"], bool) + assert isinstance(result["gore"], bool) + assert 0 <= result["nsfw_score"] <= 1 + assert 0 <= result["nudity_score"] <= 1 + assert 0 <= result["gore_score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", + NSFW_BLOB_TEST_CASES, + ids=[tc["name"] for tc in NSFW_BLOB_TEST_CASES], + ) + def test_nsfw_check_blob(self, test_case): + """Test synchronous NSFW check with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = jigsaw.validate.nsfw(blob_content, test_case["options"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - asyncio.run(_test()) +# Async Test Classes + + +class TestSpamCheckAsync: + """Test asynchronous spam check methods""" + + @pytest.mark.parametrize( + "test_case", + SPAM_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPAM_CHECK_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_spam_check_async(self, test_case): + """Test asynchronous spam check with various inputs""" + try: + result = await async_jigsaw.validate.spamcheck(test_case["params"]) + + assert result["success"] + assert "check" in result + + # Check structure based on input type + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["check"], list) + for check in result["check"]: + assert "is_spam" in check + assert "score" in check + assert isinstance(check["is_spam"], bool) + assert 0 <= check["score"] <= 1 + else: + assert "is_spam" in result["check"] + assert "score" in result["check"] + assert isinstance(result["check"]["is_spam"], bool) + assert 0 <= result["check"]["score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSpellCheckAsync: + """Test asynchronous spell check methods""" + + @pytest.mark.parametrize( + "test_case", + SPELL_CHECK_TEST_CASES, + ids=[tc["name"] for tc in SPELL_CHECK_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_spell_check_async(self, test_case): + """Test asynchronous spell check with various inputs""" + try: + result = await async_jigsaw.validate.spellcheck(test_case["params"]) + + assert result["success"] + assert "misspellings_found" in result + assert "misspellings" in result + assert "auto_correct_text" in result + assert isinstance(result["misspellings_found"], bool) + assert isinstance(result["misspellings"], list) + assert isinstance(result["auto_correct_text"], str) + + # Check misspellings structure + for misspelling in result["misspellings"]: + assert "word" in misspelling + assert "startIndex" in misspelling + assert "endIndex" in misspelling + assert "expected" in misspelling + assert "auto_corrected" in misspelling + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestProfanityAsync: + """Test asynchronous profanity check methods""" + + @pytest.mark.parametrize( + "test_case", + PROFANITY_TEST_CASES, + ids=[tc["name"] for tc in PROFANITY_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_profanity_check_async(self, test_case): + """Test asynchronous profanity check with various inputs""" + try: + result = await async_jigsaw.validate.profanity(test_case["params"]) + + assert result["success"] + assert "clean_text" in result + assert "profanities" in result + assert "profanities_found" in result + assert isinstance(result["profanities_found"], bool) + assert isinstance(result["profanities"], list) + assert isinstance(result["clean_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestNSFWAsync: + """Test asynchronous NSFW check methods""" + + @pytest.mark.parametrize( + "test_case", NSFW_TEST_CASES, ids=[tc["name"] for tc in NSFW_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_nsfw_check_async(self, test_case): + """Test asynchronous NSFW check with various inputs""" + try: + result = await async_jigsaw.validate.nsfw(test_case["params"]) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + assert isinstance(result["nsfw"], bool) + assert isinstance(result["nudity"], bool) + assert isinstance(result["gore"], bool) + assert 0 <= result["nsfw_score"] <= 1 + assert 0 <= result["nudity_score"] <= 1 + assert 0 <= result["gore_score"] <= 1 + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", + NSFW_BLOB_TEST_CASES, + ids=[tc["name"] for tc in NSFW_BLOB_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_nsfw_check_blob_async(self, test_case): + """Test asynchronous NSFW check with blob inputs""" + try: + # Download blob content + blob_content = requests.get(test_case["blob_url"]).content + result = await async_jigsaw.validate.nsfw( + blob_content, test_case["options"] + ) + + assert result["success"] + assert "nsfw" in result + assert "nudity" in result + assert "gore" in result + assert "nsfw_score" in result + assert "nudity_score" in result + assert "gore_score" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 0a71df1b311ce5192609a115c967f10433172513 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:05:52 -0700 Subject: [PATCH 29/95] feat: formatting validate dot py. --- jigsawstack/validate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 1d4f715..3565ac9 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -3,8 +3,6 @@ from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig -from typing import Any, Dict, List, cast -from typing_extensions import NotRequired, TypedDict, Union, Optional from .helpers import build_path from ._types import BaseResponse From ba568f0f92d7e2423c639d14ada98e5ab8fbb6e2 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 18:37:45 -0700 Subject: [PATCH 30/95] feat: formatting search dot py --- jigsawstack/search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jigsawstack/search.py b/jigsawstack/search.py index 4b10884..3b80bca 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -247,7 +247,7 @@ def search(self, params: SearchParams) -> SearchResponse: "spell_check": spell_check, } - path = f"/web/search" + path = "/web/search" resp = Request( config=self.config, path=path, @@ -269,7 +269,7 @@ def suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsRespo return resp def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - path = f"/web/deep_research" + path = "/web/deep_research" resp = Request( config=self.config, path=path, @@ -296,7 +296,7 @@ def __init__( ) async def search(self, params: SearchParams) -> SearchResponse: - path = f"/web/search" + path = "/web/search" query = params["query"] ai_overview = params.get("ai_overview", "True") safe_search = params.get("safe_search", "moderate") @@ -331,7 +331,7 @@ async def suggestions( return resp async def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - path = f"/web/deep_research" + path = "/web/deep_research" resp = await AsyncRequest( config=self.config, path=path, From b5e2577b37040c7f023a83ea4df5c453cf24a79c Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 19:40:47 -0700 Subject: [PATCH 31/95] test:defining new test cases for web search. --- tests/test_web.py | 555 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 525 insertions(+), 30 deletions(-) diff --git a/tests/test_web.py b/tests/test_web.py index 5191fca..565d1e5 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -1,45 +1,540 @@ -from unittest.mock import MagicMock -import unittest from jigsawstack.exceptions import JigsawStackError -from jigsawstack import JigsawStack - +import jigsawstack import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) + +URL = "https://jigsawstack.com" + + +# AI Scrape Test Cases +AI_SCRAPE_TEST_CASES = [ + { + "name": "scrape_with_element_prompts", + "params": { + "url": URL, + "element_prompts": ["title", "main content", "navigation links"], + }, + }, + { + "name": "scrape_with_selectors", + "params": { + "url": URL, + "selectors": ["h1", "p", "a"], + }, + }, + { + "name": "scrape_with_features", + "params": { + "url": URL, + "features": ["meta", "link"], + }, + }, + { + "name": "scrape_with_root_element", + "params": { + "url": URL, + "element_prompts": ["content"], + "root_element_selector": "main", + }, + }, + { + "name": "scrape_with_wait_for_selector", + "params": { + "url": URL, + "element_prompts": ["dynamic content"], + "wait_for": {"mode": "selector", "value": ".loaded-content"}, + }, + }, + { + "name": "scrape_with_wait_for_timeout", + "params": { + "url": URL, + "element_prompts": ["content"], + "wait_for": {"mode": "timeout", "value": 3000}, + }, + }, + { + "name": "scrape_mobile_view", + "params": { + "url": URL, + "element_prompts": ["mobile menu"], + "is_mobile": True, + }, + }, + { + "name": "scrape_with_cookies", + "params": { + "url": URL, + "element_prompts": ["user data"], + "cookies": [ + {"name": "session", "value": "test123", "domain": "example.com"} + ], + }, + }, + { + "name": "scrape_with_advance_config", + "params": { + "url": URL, + "element_prompts": ["content"], + "advance_config": {"console": True, "network": True, "cookies": True}, + }, + }, +] + +# HTML to Any Test Cases +HTML_TO_ANY_TEST_CASES = [ + { + "name": "html_to_pdf_url", + "params": { + "url": URL, + "type": "pdf", + "return_type": "url", + }, + }, + { + "name": "html_to_png_base64", + "params": { + "url": URL, + "type": "png", + "return_type": "base64", + }, + }, + { + "name": "html_to_jpeg_binary", + "params": { + "url": URL, + "type": "jpeg", + "return_type": "binary", + }, + }, + { + "name": "html_string_to_pdf", + "params": { + "html": "

Test Document

This is a test.

", + "type": "pdf", + "return_type": "url", + }, + }, + { + "name": "html_to_pdf_with_options", + "params": { + "url": URL, + "type": "pdf", + "return_type": "url", + "pdf_display_header_footer": True, + "pdf_print_background": True, + }, + }, + { + "name": "html_to_png_full_page", + "params": { + "url": URL, + "type": "png", + "full_page": True, + "return_type": "url", + }, + }, + { + "name": "html_to_webp_custom_size", + "params": { + "url": URL, + "type": "webp", + "width": 1920, + "height": 1080, + "return_type": "base64", + }, + }, + { + "name": "html_to_png_mobile", + "params": { + "url": URL, + "type": "png", + "is_mobile": True, + "return_type": "url", + }, + }, + { + "name": "html_to_png_dark_mode", + "params": { + "url": URL, + "type": "png", + "dark_mode": True, + "return_type": "url", + }, + }, +] + +# Search Test Cases +SEARCH_TEST_CASES = [ + { + "name": "basic_search", + "params": { + "query": "artificial intelligence news", + }, + }, + { + "name": "search_with_max_results", + "params": { + "query": "python programming", + "max_results": 5, + }, + }, + { + "name": "search_specific_site", + "params": { + "query": "documentation site:github.com", + }, + }, + { + "name": "search_ai_mode", + "params": { + "query": "explain quantum computing", + "ai": True, + }, + }, +] + +# Search Suggestions Test Cases +SEARCH_SUGGESTIONS_TEST_CASES = [ + { + "name": "basic_suggestions", + "params": { + "query": "machine learn", + }, + }, + { + "name": "programming_suggestions", + "params": { + "query": "python tutor", + }, + }, + { + "name": "partial_query_suggestions", + "params": { + "query": "artifi", + }, + }, +] + +# Deep Research Test Cases +DEEP_RESEARCH_TEST_CASES = [ + { + "name": "basic_deep_research", + "params": { + "query": "climate change effects", + }, + }, + { + "name": "technical_deep_research", + "params": { + "query": "quantum computing applications in cryptography", + }, + }, + { + "name": "deep_research_with_depth", + "params": { + "query": "renewable energy sources", + "depth": 3, + }, + }, +] + + +class TestAIScrapeSync: + """Test synchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + def test_ai_scrape(self, test_case): + """Test synchronous AI scrape with various inputs""" + try: + result = jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestHTMLToAnySync: + """Test synchronous HTML to Any methods""" + + @pytest.mark.parametrize( + "test_case", + HTML_TO_ANY_TEST_CASES, + ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], + ) + def test_html_to_any(self, test_case): + """Test synchronous HTML to Any with various inputs""" + try: + result = jigsaw.web.html_to_any(test_case["params"]) + + return_type = test_case["params"].get("return_type", "url") + + if return_type == "binary": + assert isinstance(result, bytes) + assert len(result) > 0 + else: + assert result["success"] + assert "url" in result + assert isinstance(result["url"], str) + + if return_type == "base64": + # Check if it's a valid base64 string + assert result["url"].startswith("data:") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSearchSync: + """Test synchronous search methods""" + + @pytest.mark.parametrize( + "test_case", SEARCH_TEST_CASES, ids=[tc["name"] for tc in SEARCH_TEST_CASES] + ) + def test_search(self, test_case): + """Test synchronous search with various inputs""" + try: + result = jigsaw.web.search(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], list) + + if test_case["params"].get("max_results"): + assert len(result["results"]) <= test_case["params"]["max_results"] + + # Check result structure + for item in result["results"]: + assert "title" in item + assert "url" in item + assert "description" in item + + # Check AI mode response + if test_case["params"].get("ai"): + assert "ai_overview" in result + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestSearchSuggestionsSync: + """Test synchronous search suggestions methods""" + + @pytest.mark.parametrize( + "test_case", + SEARCH_SUGGESTIONS_TEST_CASES, + ids=[tc["name"] for tc in SEARCH_SUGGESTIONS_TEST_CASES], + ) + def test_search_suggestions(self, test_case): + """Test synchronous search suggestions with various inputs""" + try: + result = jigsaw.web.search_suggestions(test_case["params"]) -# flake8: noqa + assert result["success"] + assert "suggestions" in result + assert isinstance(result["suggestions"], list) + assert len(result["suggestions"]) > 0 -client = JigsawStack() + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestDeepResearchSync: + """Test synchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + def test_deep_research(self, test_case): + """Test synchronous deep research with various inputs""" + try: + result = jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "report" in result + assert isinstance(result["report"], str) + assert len(result["report"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +# Async Test Classes + + +class TestAIScrapeAsync: + """Test asynchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_ai_scrape_async(self, test_case): + """Test asynchronous AI scrape with various inputs""" + try: + result = await async_jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestHTMLToAnyAsync: + """Test asynchronous HTML to Any methods""" + @pytest.mark.parametrize( + "test_case", + HTML_TO_ANY_TEST_CASES, + ids=[tc["name"] for tc in HTML_TO_ANY_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_html_to_any_async(self, test_case): + """Test asynchronous HTML to Any with various inputs""" + try: + result = await async_jigsaw.web.html_to_any(test_case["params"]) + + return_type = test_case["params"].get("return_type", "url") + + if return_type == "binary": + assert isinstance(result, bytes) + assert len(result) > 0 + else: + assert result["success"] + assert "url" in result + assert isinstance(result["url"], str) + + if return_type == "base64": + # Check if it's a valid base64 string + assert result["url"].startswith("data:") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") -@pytest.mark.skip(reason="Skipping TestWebAPI class for now") -class TestWebAPI(unittest.TestCase): - def test_ai_scrape_success_response(self) -> None: - params = { - "url": "https://supabase.com/pricing", - "element_prompts": ["Plan title", "Plan price"], - } + +class TestSearchAsync: + """Test asynchronous search methods""" + + @pytest.mark.parametrize( + "test_case", SEARCH_TEST_CASES, ids=[tc["name"] for tc in SEARCH_TEST_CASES] + ) + @pytest.mark.asyncio + async def test_search_async(self, test_case): + """Test asynchronous search with various inputs""" try: - result = client.file.upload(params) - assert result["success"] == True + result = await async_jigsaw.web.search(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], list) + + if test_case["params"].get("max_results"): + assert len(result["results"]) <= test_case["params"]["max_results"] + + # Check result structure + for item in result["results"]: + assert "title" in item + assert "url" in item + assert "description" in item + + # Check AI mode response + if test_case["params"].get("ai"): + assert "ai_overview" in result + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - def test_scrape_success_response(self) -> None: - params = { - "url": "https://supabase.com/pricing", - } +class TestSearchSuggestionsAsync: + """Test asynchronous search suggestions methods""" + + @pytest.mark.parametrize( + "test_case", + SEARCH_SUGGESTIONS_TEST_CASES, + ids=[tc["name"] for tc in SEARCH_SUGGESTIONS_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_search_suggestions_async(self, test_case): + """Test asynchronous search suggestions with various inputs""" try: - result = client.web.scrape(params) - assert result["success"] == True + result = await async_jigsaw.web.search_suggestions(test_case["params"]) + + assert result["success"] + assert "suggestions" in result + assert isinstance(result["suggestions"], list) + assert len(result["suggestions"]) > 0 + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + - def test_dns_success_response(self) -> None: +class TestDeepResearchAsync: + """Test asynchronous deep research methods""" - params = { - "url": "https://supabase.com/pricing", - } + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_deep_research_async(self, test_case): + """Test asynchronous deep research with various inputs""" try: - result = client.web.dns(params) - assert result["success"] == True + result = await async_jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "report" in result + assert isinstance(result["report"], str) + assert len(result["report"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + except JigsawStackError as e: - assert e.message == "Failed to parse API response. Please try again." \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 7b213f4c47a8d5240219676afad4fbb44c60c36b Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:02:22 -0700 Subject: [PATCH 32/95] fix: update type for image translation. --- jigsawstack/translate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 14d225a..b9fef14 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -50,10 +50,10 @@ class TranslateResponse(BaseResponse): """ -class TranslateImageResponse(TypedDict): - image: bytes +class TranslateImageResponse(BaseResponse): + url: str """ - The image data that was translated. + The URL or base64 of the translated image. """ @@ -83,17 +83,17 @@ def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - def image(self, params: TranslateImageParams) -> TranslateImageResponse: ... + def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload def image( self, blob: bytes, options: TranslateImageParams = None - ) -> TranslateImageParams: ... + ) -> Union[TranslateImageResponse, bytes]: ... def image( self, blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, - ) -> TranslateImageResponse: + ) -> Union[TranslateImageResponse, bytes]: if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument From b9d2ac1d0d1cc1dd34e944c829febede073c09ae Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:03:14 -0700 Subject: [PATCH 33/95] fix: formatting for translate --- jigsawstack/translate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index b9fef14..4bb51a6 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict, Literal from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from ._types import BaseResponse From 33c895e64743695349e603cc8566653817cb7ea0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:03:44 -0700 Subject: [PATCH 34/95] test: defining test cases for text and image translation. --- tests/test_translate.py | 238 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 tests/test_translate.py diff --git a/tests/test_translate.py b/tests/test_translate.py new file mode 100644 index 0000000..7c903c7 --- /dev/null +++ b/tests/test_translate.py @@ -0,0 +1,238 @@ +import requests +from jigsawstack.exceptions import JigsawStackError +import jigsawstack +import pytest +import logging +from dotenv import load_dotenv +import os + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +# Sample image URL for translation tests +IMAGE_URL = "https://images.unsplash.com/photo-1580679137870-86ef9f9a03d6?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" + +# Text translation test cases +TEXT_TEST_CASES = [ + { + "name": "translate_single_text_to_spanish", + "params": { + "text": "Hello, how are you?", + "target_language": "es", + }, + }, + { + "name": "translate_single_text_with_current_language", + "params": { + "text": "Bonjour, comment allez-vous?", + "current_language": "fr", + "target_language": "en", + }, + }, + { + "name": "translate_multiple_texts", + "params": { + "text": ["Hello world", "Good morning", "Thank you"], + "target_language": "fr", + }, + }, + { + "name": "translate_to_german", + "params": { + "text": "The weather is beautiful today", + "target_language": "de", + }, + }, + { + "name": "translate_to_japanese", + "params": { + "text": "Welcome to our website", + "target_language": "ja", + }, + }, + { + "name": "translate_multiple_with_source_language", + "params": { + "text": ["Ciao", "Grazie", "Arrivederci"], + "current_language": "it", + "target_language": "en", + }, + }, +] + +# Image translation test cases +IMAGE_TEST_CASES = [ + { + "name": "translate_image_with_url", + "params": { + "url": IMAGE_URL, + "target_language": "es", + }, + "blob": None, + "options": None, + }, + { + "name": "translate_image_with_blob", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "fr", + }, + }, + { + "name": "translate_image_with_url_return_base64", + "params": { + "url": IMAGE_URL, + "target_language": "de", + "return_type": "base64", + }, + "blob": None, + "options": None, + }, + { + "name": "translate_image_with_blob_return_url", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "ja", + "return_type": "url", + }, + }, + { + "name": "translate_image_with_blob_return_binary", + "params": None, + "blob": IMAGE_URL, + "options": { + "target_language": "zh", + "return_type": "binary", + }, + }, + { + "name": "translate_image_to_italian", + "params": { + "url": IMAGE_URL, + "target_language": "it", + }, + "blob": None, + "options": None, + }, +] + + +class TestTranslateTextSync: + """Test synchronous text translation methods""" + + sync_test_cases = TEXT_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_translate_text(self, test_case): + """Test synchronous text translation with various inputs""" + try: + result = jigsaw.translate.text(test_case["params"]) + assert result["success"] + assert "translated_text" in result + + # Check if the response structure matches the input + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["translated_text"], list) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) + else: + assert isinstance(result["translated_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateTextAsync: + """Test asynchronous text translation methods""" + + async_test_cases = TEXT_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_translate_text_async(self, test_case): + """Test asynchronous text translation with various inputs""" + try: + result = await async_jigsaw.translate.text(test_case["params"]) + assert result["success"] + assert "translated_text" in result + + # Check if the response structure matches the input + if isinstance(test_case["params"]["text"], list): + assert isinstance(result["translated_text"], list) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) + else: + assert isinstance(result["translated_text"], str) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateImageSync: + """Test synchronous image translation methods""" + + sync_test_cases = IMAGE_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_translate_image(self, test_case): + """Test synchronous image translation with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.translate.image( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = jigsaw.translate.image(test_case["params"]) + assert result is not None + if isinstance(result, dict): + assert "url" in result + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestTranslateImageAsync: + """Test asynchronous image translation methods""" + + async_test_cases = IMAGE_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_translate_image_async(self, test_case): + """Test asynchronous image translation with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.translate.image( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.translate.image(test_case["params"]) + assert result is not None + if isinstance(result, dict): + assert "url" in result + else: + assert isinstance(result, bytes) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file From b12157816909f114b904830785a9adb08349eb76 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:04:54 -0700 Subject: [PATCH 35/95] fix: update type for image translation. --- jigsawstack/translate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 4bb51a6..0b95ef0 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -146,17 +146,17 @@ async def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - async def image(self, params: TranslateImageParams) -> TranslateImageResponse: ... + async def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload async def image( self, blob: bytes, options: TranslateImageParams = None - ) -> TranslateImageParams: ... + ) -> Union[TranslateImageResponse, bytes]: ... async def image( self, blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, - ) -> TranslateImageResponse: + ) -> Union[TranslateImageResponse, bytes]: if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, From 1f743da90f56ea4908a920472b7c9174bb87eaac Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:28:04 -0700 Subject: [PATCH 36/95] fix: formatting for the files. --- jigsawstack/audio.py | 1 - jigsawstack/embedding.py | 1 - jigsawstack/geo.py | 406 ----------------------------------- jigsawstack/prompt_engine.py | 1 - jigsawstack/store.py | 3 +- jigsawstack/summary.py | 1 - 6 files changed, 1 insertion(+), 412 deletions(-) delete mode 100644 jigsawstack/geo.py diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index cb4f199..9e6dfd0 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -4,7 +4,6 @@ from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig from typing_extensions import Literal -from .helpers import build_path from ._types import BaseResponse diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 70a8359..e37e856 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path from ._types import BaseResponse diff --git a/jigsawstack/geo.py b/jigsawstack/geo.py deleted file mode 100644 index cd182ba..0000000 --- a/jigsawstack/geo.py +++ /dev/null @@ -1,406 +0,0 @@ -from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequestConfig, AsyncRequest -from typing import List, Union -from ._config import ClientConfig - - -class BaseResponse: - success: bool - - -class GeoParams(TypedDict): - search_value: str - lat: str - lng: str - country_code: str - proximity_lat: str - proximity_lng: str - types: str - city_code: str - state_code: str - limit: int - - -class GeoSearchParams(TypedDict): - search_value: str - country_code: NotRequired[str] = None - proximity_lat: NotRequired[str] = None - proximity_lng: NotRequired[str] = None - types: NotRequired[str] = None - - -class Geoloc(TypedDict): - type: str - coordinates: List[float] - - -class Region(TypedDict): - name: str - region_code: str - region_code_full: str - - -class Country(TypedDict): - name: str - country_code: str - country_code_alpha_3: str - - -class GeoSearchResult(TypedDict): - type: str - full_address: str - name: str - place_formatted: str - postcode: str - place: str - region: Region - country: Country - language: str - geoloc: Geoloc - poi_category: List[str] - additional_properties: Dict[str, any] - - -class CityResult(TypedDict): - state_code: str - name: str - city_code: str - state: "StateResult" - - -class CountryResult(TypedDict): - country_code: str - name: str - iso2: str - iso3: str - capital: str - phone_code: str - region: str - subregion: str - currency_code: str - geoloc: Geoloc - currency_name: str - currency_symbol: str - tld: str - native: str - emoji: str - emojiU: str - latitude: float - longitude: float - - -class StateResult(TypedDict): - state_code: str - name: str - country_code: str - country: CountryResult - - -class GeoSearchResponse(BaseResponse): - data: List[GeoSearchResult] - - -class GeocodeParams(TypedDict): - search_value: str - lat: str - lng: str - country_code: str - proximity_lat: str - proximity_lng: str - types: str - limit: int - - -class GeoCityParams(TypedDict): - country_code: str - city_code: str - state_code: str - search_value: str - lat: str - lng: str - limit: int - - -class GeoCityResponse(BaseResponse): - city: List[CityResult] - - -class GeoCountryParams(TypedDict): - country_code: str - city_code: str - search_value: str - lat: str - lng: str - limit: int - currency_code: str - - -class GeoCountryResponse(BaseResponse): - country: List[CountryResult] - - -class GeoStateParams(TypedDict): - country_code: str - state_code: str - search_value: str - lat: str - lng: str - limit: int - - -class GeoStateResponse(BaseResponse): - state: List[StateResult] - - -class GeoDistanceParams(TypedDict): - unit: NotRequired[str] = None # "K" or "N" - lat1: str - lng1: str - lat2: str - lng2: str - - -class GeoDistanceResponse(BaseResponse): - distance: float - - -class GeoTimezoneParams(TypedDict): - lat: str - lng: str - city_code: NotRequired[str] = None - country_code: NotRequired[str] = None - - -class GeoTimezoneResponse(BaseResponse): - timezone: Dict[str, any] - - -class GeohashParams(TypedDict): - lat: str - lng: str - precision: int - - -class GeohashResponse(BaseResponse): - geohash: str - - -class GeohashDecodeResponse(BaseResponse): - latitude: float - longitude: float - - -class Geo(ClientConfig): - config: RequestConfig - - def __init__( - self, - api_key: str, - api_url: str, - disable_request_logging: Union[bool, None] = False, - ): - super().__init__(api_key, api_url, disable_request_logging) - self.config = RequestConfig( - api_url=api_url, - api_key=api_key, - disable_request_logging=disable_request_logging, - ) - - def search(self, params: GeoSearchParams) -> GeoSearchResponse: - path = "/geo/search" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geocode(self, params: GeocodeParams) -> GeohashDecodeResponse: - path = "/geo/geocode" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def city(self, params: GeoCityParams) -> GeoCityResponse: - path = "/geo/city" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def country(self, params: GeoCountryParams) -> GeoCountryResponse: - path = "/geo/country" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def state(self, params: GeoStateParams) -> GeoStateResponse: - path = "/geo/state" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def distance(self, params: GeoDistanceParams) -> GeoDistanceResponse: - path = "/geo/distance" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def timezone(self, params: GeoTimezoneParams) -> GeoTimezoneResponse: - path = "/geo/timezone" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geohash(self, params: GeohashParams) -> GeohashResponse: - path = "/geo/geohash" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - def geohash(self, key: str) -> GeohashDecodeResponse: - path = f"/geo/geohash/decode/{key}" - resp = Request( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params={}), - verb="get", - ).perform_with_content() - return resp - - -class AsyncGeo(ClientConfig): - config: AsyncRequestConfig - - def __init__( - self, - api_key: str, - api_url: str, - disable_request_logging: Union[bool, None] = False, - ): - super().__init__(api_key, api_url, disable_request_logging) - self.config = AsyncRequestConfig( - api_url=api_url, - api_key=api_key, - disable_request_logging=disable_request_logging, - ) - - async def search(self, params: GeoSearchParams) -> GeoSearchResponse: - path = "/geo/search" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geocode(self, params: GeocodeParams) -> GeohashDecodeResponse: - path = "/geo/geocode" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def city(self, params: GeoCityParams) -> GeoCityResponse: - path = "/geo/city" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def country(self, params: GeoCountryParams) -> GeoCountryResponse: - path = "/geo/country" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def state(self, params: GeoStateParams) -> GeoStateResponse: - path = "/geo/state" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def distance(self, params: GeoDistanceParams) -> GeoDistanceResponse: - path = "/geo/distance" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def timezone(self, params: GeoTimezoneParams) -> GeoTimezoneResponse: - path = "/geo/timezone" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geohash(self, params: GeohashParams) -> GeohashResponse: - path = "/geo/geohash" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params), - verb="get", - ).perform_with_content() - return resp - - async def geohash(self, key: str) -> GeohashDecodeResponse: - path = f"/geo/geohash/decode/{key}" - resp = await AsyncRequest( - config=self.config, - path=path, - params=cast(Dict[Any, Any], params={}), - verb="get", - ).perform_with_content() - return resp diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 378e9b3..62416e7 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from .helpers import build_path diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 72bf191..878a767 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -1,10 +1,9 @@ -from typing import Any, Dict, List, Union, cast +from typing import Any, Union from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig from .helpers import build_path -from .exceptions import JigsawStackError class FileDeleteResponse(TypedDict): diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 898c42b..898312d 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -2,7 +2,6 @@ from typing_extensions import NotRequired, TypedDict from .request import Request, RequestConfig from .async_request import AsyncRequest -from typing import List, Union from ._config import ClientConfig from ._types import BaseResponse From 6706742a06f1b5167bf95038f35d8ea122fb37a5 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 20:29:09 -0700 Subject: [PATCH 37/95] fix: dropping url used for local testing. --- tests/test_async_web.py | 36 --------------------------- tests/test_embedding.py | 8 ++---- tests/test_prediction.py | 9 +++---- tests/test_search.py | 53 ---------------------------------------- tests/test_sentiment.py | 8 ++---- tests/test_sql.py | 8 ++---- tests/test_summary.py | 8 ++---- tests/test_validate.py | 8 ++---- 8 files changed, 13 insertions(+), 125 deletions(-) delete mode 100644 tests/test_async_web.py delete mode 100644 tests/test_search.py diff --git a/tests/test_async_web.py b/tests/test_async_web.py deleted file mode 100644 index 99899a8..0000000 --- a/tests/test_async_web.py +++ /dev/null @@ -1,36 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -from jigsawstack import AsyncJigsawStack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def test_async_web_search_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.web.search({"query": "JigsawStack fund raising"}) - # logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -def test_async_web_search_suggestion_response(): - async def _test(): - client = AsyncJigsawStack() - try: - result = await client.web.search_suggestion({"query": "Lagos"}) - logger.info(result) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 0106751..c5b08f5 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -11,12 +11,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 6069140..48ba6f7 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -11,12 +11,9 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + def generate_dates(start_date, num_days): diff --git a/tests/test_search.py b/tests/test_search.py deleted file mode 100644 index 1ee28f0..0000000 --- a/tests/test_search.py +++ /dev/null @@ -1,53 +0,0 @@ -from unittest.mock import MagicMock -import unittest -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest -import asyncio -import logging - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -jigsaw = jigsawstack.JigsawStack() -async_jigsaw = jigsawstack.AsyncJigsawStack() - - -def test_search_suggestion_response(): - try: - result = jigsaw.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - -def test_ai_search_response(): - try: - result = jigsaw.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - -def test_search_suggestion_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() - try: - result = await client.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) - - -def test_ai_search_response_async(): - async def _test(): - client = jigsawstack.AsyncJigsawStack() - try: - result = await client.web.search({"query": "Where is San Francisco"}) - assert result["success"] == True - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError: {e}") - - asyncio.run(_test()) diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index e46e44f..8967562 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) TEST_CASES = [ { diff --git a/tests/test_sql.py b/tests/test_sql.py index 397b855..71de82b 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample schemas for different databases MYSQL_SCHEMA = """ diff --git a/tests/test_summary.py b/tests/test_summary.py index 46b5229..12125de 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) LONG_TEXT = """ Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. diff --git a/tests/test_validate.py b/tests/test_validate.py index f6219b1..9ad90bc 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -11,12 +11,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample URLs for NSFW testing SAFE_IMAGE_URL = ( From 408afeb9d92ccade67503329bece0c0e7b42e0aa Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 21:39:06 -0700 Subject: [PATCH 38/95] fix: formatting for test cases. --- tests/test_file_store.py | 2 +- tests/test_image_generation.py | 5 ++-- tests/test_web.py | 42 ++++++++++------------------------ 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index c44090b..e48cc15 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -32,7 +32,7 @@ "name": "upload_image_with_temp_url", "file": BINARY_FILE_CONTENT, "options": { - "key": f"test_image.jpg", + "key": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True, "temp_public_url": True, diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index fe2dc79..052fd6c 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -5,7 +5,6 @@ import logging from dotenv import load_dotenv import os -import base64 load_dotenv() logging.basicConfig(level=logging.INFO) @@ -166,7 +165,7 @@ def test_image_to_image_generation(self, test_case): assert result is not None if type(result) is dict: - assert result.get("success") == True + assert result.get("success") assert result.get("url") is not None elif type(result) is bytes: assert isinstance(result, bytes) @@ -217,7 +216,7 @@ async def test_image_to_image_generation_async(self, test_case): assert result is not None if type(result) is dict: - assert result.get("success") == True + assert result.get("success") assert result.get("url") is not None elif type(result) is bytes: assert isinstance(result, bytes) diff --git a/tests/test_web.py b/tests/test_web.py index 565d1e5..dda97c5 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -10,12 +10,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) URL = "https://jigsawstack.com" @@ -40,6 +36,7 @@ "name": "scrape_with_features", "params": { "url": URL, + "element_prompts": ["title"], "features": ["meta", "link"], }, }, @@ -51,14 +48,6 @@ "root_element_selector": "main", }, }, - { - "name": "scrape_with_wait_for_selector", - "params": { - "url": URL, - "element_prompts": ["dynamic content"], - "wait_for": {"mode": "selector", "value": ".loaded-content"}, - }, - }, { "name": "scrape_with_wait_for_timeout", "params": { @@ -186,13 +175,6 @@ "query": "artificial intelligence news", }, }, - { - "name": "search_with_max_results", - "params": { - "query": "python programming", - "max_results": 5, - }, - }, { "name": "search_specific_site", "params": { @@ -203,7 +185,7 @@ "name": "search_ai_mode", "params": { "query": "explain quantum computing", - "ai": True, + "ai_overview": True, }, }, ] @@ -248,7 +230,7 @@ "name": "deep_research_with_depth", "params": { "query": "renewable energy sources", - "depth": 3, + "depth": 2, }, }, ] @@ -381,9 +363,9 @@ def test_deep_research(self, test_case): result = jigsaw.web.deep_research(test_case["params"]) assert result["success"] - assert "report" in result - assert isinstance(result["report"], str) - assert len(result["report"]) > 0 + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 # Check for sources if "sources" in result: @@ -483,7 +465,7 @@ async def test_search_async(self, test_case): assert "description" in item # Check AI mode response - if test_case["params"].get("ai"): + if test_case["params"].get("ai_overview"): assert "ai_overview" in result except JigsawStackError as e: @@ -528,9 +510,9 @@ async def test_deep_research_async(self, test_case): result = await async_jigsaw.web.deep_research(test_case["params"]) assert result["success"] - assert "report" in result - assert isinstance(result["report"], str) - assert len(result["report"]) > 0 + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 # Check for sources if "sources" in result: From 7de35498bbc0bf4cc06f269b4a937ec6e1dab8ee Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 21:39:41 -0700 Subject: [PATCH 39/95] chore: rebase linter to ruff --- biome.json | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 biome.json diff --git a/biome.json b/biome.json deleted file mode 100644 index 5ad6df5..0000000 --- a/biome.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", - "files": { - "ignoreUnknown": false, - "ignore": [] - }, - "formatter": { - "enabled": true, - "useEditorconfig": true, - "formatWithErrors": false, - "indentStyle": "space", - "indentWidth": 2, - "lineEnding": "lf", - "lineWidth": 150, - "attributePosition": "auto", - "bracketSpacing": true - }, - "organizeImports": { - "enabled": true - }, - "linter": { - "enabled": false - }, - "javascript": { - "formatter": { - "jsxQuoteStyle": "double", - "quoteProperties": "asNeeded", - "trailingCommas": "es5", - "semicolons": "always", - "arrowParentheses": "always", - "bracketSameLine": false, - "quoteStyle": "double", - "attributePosition": "auto", - "bracketSpacing": true - } - } -} From 3f920f8bcd5584447ae4e156bc464e53f8303e00 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:12:43 -0700 Subject: [PATCH 40/95] chore: setting up ci for formatting checks and testing. --- .github/ruff.toml | 24 ++++++++ .github/workflows/ci.yml | 117 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 .github/ruff.toml create mode 100644 .github/workflows/ci.yml diff --git a/.github/ruff.toml b/.github/ruff.toml new file mode 100644 index 0000000..e431c02 --- /dev/null +++ b/.github/ruff.toml @@ -0,0 +1,24 @@ +# Ruff configuration for CI/CD +line-length = 88 +target-version = "py37" + +[lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "UP", # pyupgrade +] +ignore = [ + "E501", # line too long (handled by formatter) + "B008", # do not perform function calls in argument defaults +] + +[format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..12b1e5c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,117 @@ +name: CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +jobs: + ruff-format-check: + name: Ruff Format Check - ${{ matrix.file }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + file: + - __init__.py + - _config.py + - _types.py + - async_request.py + - audio.py + - classification.py + - embedding_v2.py + - embedding.py + - exceptions.py + - helpers.py + - image_generation.py + - prediction.py + - prompt_engine.py + - request.py + - search.py + - sentiment.py + - sql.py + - store.py + - summary.py + - translate.py + - validate.py + - vision.py + - web.py + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install ruff + run: pip install ruff + + - name: Check formatting for ${{ matrix.file }} + run: | + ruff check jigsawstack/${{ matrix.file }} --select I,F,E,W + ruff format --check jigsawstack/${{ matrix.file }} + + test: + name: Test - ${{ matrix.test-file }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + test-file: + - test_audio.py + - test_classification.py + - test_embedding.py + - test_file_store.py + - test_geo.py + - test_image_generation.py + - test_object_detection.py + - test_prediction.py + - test_sentiment.py + - test_sql.py + - test_summary.py + - test_translate.py + - test_validate.py + - test_vision.py + - test_web.py + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest pytest-asyncio pytest-cov + pip install -e . + + - name: Run test ${{ matrix.test-file }} + env: + JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} + run: | + pytest tests/${{ matrix.test-file }} -v + continue-on-error: true + + - name: Check if critical tests passed + if: contains(matrix.test-file, 'test_') && !contains(matrix.test-file, 'skip') + run: | + pytest tests/${{ matrix.test-file }} -v -m "not skip" + + all-checks-passed: + name: All Checks Passed + needs: [ruff-format-check, test] + runs-on: ubuntu-latest + if: always() + steps: + - name: Check if all jobs passed + run: | + if [[ "${{ needs.ruff-format-check.result }}" != "success" || "${{ needs.test.result }}" != "success" ]]; then + echo "One or more checks failed" + exit 1 + fi + echo "All checks passed successfully!" \ No newline at end of file From 592c5648afb4375b30800c54ef22035acdb90003 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:23:23 -0700 Subject: [PATCH 41/95] chore: track success count for ci/cd merge. --- .github/workflows/ci.yml | 57 +++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12b1e5c..da6881a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,6 @@ jobs: - test_classification.py - test_embedding.py - test_file_store.py - - test_geo.py - test_image_generation.py - test_object_detection.py - test_prediction.py @@ -75,6 +74,8 @@ jobs: - test_validate.py - test_vision.py - test_web.py + outputs: + test-result: ${{ steps.test-run.outcome }} steps: - uses: actions/checkout@v4 @@ -91,16 +92,29 @@ jobs: pip install -e . - name: Run test ${{ matrix.test-file }} + id: test-run env: JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} run: | - pytest tests/${{ matrix.test-file }} -v - continue-on-error: true + pytest tests/${{ matrix.test-file }} -v --json-report --json-report-file=report.json - - name: Check if critical tests passed - if: contains(matrix.test-file, 'test_') && !contains(matrix.test-file, 'skip') + - name: Count passed tests + id: count-tests + if: always() run: | - pytest tests/${{ matrix.test-file }} -v -m "not skip" + if [ -f report.json ]; then + PASSED=$(python -c "import json; data=json.load(open('report.json')); print(data.get('summary', {}).get('passed', 0))") + echo "passed-count=$PASSED" >> $GITHUB_OUTPUT + else + echo "passed-count=0" >> $GITHUB_OUTPUT + fi + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-results-${{ matrix.test-file }} + path: report.json all-checks-passed: name: All Checks Passed @@ -108,10 +122,33 @@ jobs: runs-on: ubuntu-latest if: always() steps: - - name: Check if all jobs passed + - name: Download all test results + uses: actions/download-artifact@v3 + with: + path: test-results + + - name: Count total passed tests run: | - if [[ "${{ needs.ruff-format-check.result }}" != "success" || "${{ needs.test.result }}" != "success" ]]; then - echo "One or more checks failed" + TOTAL_PASSED=0 + for file in test-results/*/report.json; do + if [ -f "$file" ]; then + PASSED=$(python -c "import json; data=json.load(open('$file')); print(data.get('summary', {}).get('passed', 0))") + TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) + fi + done + + echo "Total passed tests: $TOTAL_PASSED" + + if [ $TOTAL_PASSED -lt 327 ]; then + echo "❌ Insufficient tests passed: $TOTAL_PASSED/327" exit 1 + else + echo "✅ Required tests passed: $TOTAL_PASSED/327" fi - echo "All checks passed successfully!" \ No newline at end of file + + - name: Check if ruff passed + run: | + if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then + echo "Ruff format check failed" + exit 1 + fi \ No newline at end of file From 083874aed8f4202ecb5aff0596251a9092258777 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:25:23 -0700 Subject: [PATCH 42/95] chore: loosen ruff constraints. --- .github/ruff.toml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index e431c02..6b655a6 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -13,12 +13,5 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", # line too long (handled by formatter) "B008", # do not perform function calls in argument defaults ] - -[format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" \ No newline at end of file From 302d7ad2bb3659998753a29215a42787ec9104b5 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:29:06 -0700 Subject: [PATCH 43/95] fix: artifacts to use v4 since v3 upload and download artifacts were depricated by GitHub earlier this year. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da6881a..2d0f6df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -111,7 +111,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-results-${{ matrix.test-file }} path: report.json @@ -123,7 +123,7 @@ jobs: if: always() steps: - name: Download all test results - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: test-results From be5c5a7ab8693429536fdb1c98a2554d1bd6d306 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:34:18 -0700 Subject: [PATCH 44/95] fix: formatting with ruff. --- jigsawstack/__init__.py | 33 ++++++++++---------- jigsawstack/async_request.py | 8 +++-- jigsawstack/audio.py | 11 ++++--- jigsawstack/classification.py | 8 +++-- jigsawstack/embedding.py | 10 +++--- jigsawstack/embedding_v2.py | 10 +++--- jigsawstack/image_generation.py | 7 +++-- jigsawstack/prediction.py | 5 +-- jigsawstack/prompt_engine.py | 8 +++-- jigsawstack/request.py | 6 ++-- jigsawstack/search.py | 10 +++--- jigsawstack/sentiment.py | 6 ++-- jigsawstack/sql.py | 8 +++-- jigsawstack/store.py | 6 ++-- jigsawstack/summary.py | 8 +++-- jigsawstack/translate.py | 10 +++--- jigsawstack/validate.py | 8 +++-- jigsawstack/vision.py | 12 +++++--- jigsawstack/web.py | 17 ++++++----- tests/test_audio.py | 38 ++++++++++++----------- tests/test_classification.py | 10 +++--- tests/test_embedding.py | 12 +++++--- tests/test_file_store.py | 54 +++++++++++++++++---------------- tests/test_image_generation.py | 36 +++++++++++----------- tests/test_object_detection.py | 12 +++++--- tests/test_prediction.py | 10 +++--- tests/test_sentiment.py | 10 +++--- tests/test_sql.py | 10 +++--- tests/test_summary.py | 10 +++--- tests/test_translate.py | 22 ++++++++------ tests/test_validate.py | 12 +++++--- 31 files changed, 242 insertions(+), 185 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 2c5d775..091f775 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -1,22 +1,23 @@ -from typing import Union, Dict import os -from .audio import Audio, AsyncAudio -from .vision import Vision, AsyncVision +from typing import Dict, Union + +from .audio import AsyncAudio, Audio +from .classification import AsyncClassification, Classification +from .embedding import AsyncEmbedding, Embedding +from .embedding_v2 import AsyncEmbeddingV2, EmbeddingV2 +from .exceptions import JigsawStackError +from .image_generation import AsyncImageGeneration, ImageGeneration +from .prediction import AsyncPrediction, Prediction +from .prompt_engine import AsyncPromptEngine, PromptEngine from .search import Search -from .prediction import Prediction, AsyncPrediction +from .sentiment import AsyncSentiment, Sentiment from .sql import SQL, AsyncSQL -from .store import Store, AsyncStore -from .translate import Translate, AsyncTranslate -from .web import Web, AsyncWeb -from .sentiment import Sentiment, AsyncSentiment -from .validate import Validate, AsyncValidate -from .summary import Summary, AsyncSummary -from .embedding import Embedding, AsyncEmbedding -from .exceptions import JigsawStackError -from .image_generation import ImageGeneration, AsyncImageGeneration -from .classification import Classification, AsyncClassification -from .prompt_engine import PromptEngine, AsyncPromptEngine -from .embedding_v2 import EmbeddingV2, AsyncEmbeddingV2 +from .store import AsyncStore, Store +from .summary import AsyncSummary, Summary +from .translate import AsyncTranslate, Translate +from .validate import AsyncValidate, Validate +from .vision import AsyncVision, Vision +from .web import AsyncWeb, Web class JigsawStack: diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 4f90a2c..8d7bfb1 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, Generic, List, Union, cast, TypedDict, AsyncGenerator +import json +from io import BytesIO +from typing import Any, AsyncGenerator, Dict, Generic, List, TypedDict, Union, cast + import aiohttp from typing_extensions import Literal, TypeVar + from .exceptions import NoContentError, raise_for_code_and_type -import json -from io import BytesIO RequestVerb = Literal["get", "post", "put", "patch", "delete"] diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 9e6dfd0..cadfd25 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -1,10 +1,11 @@ -from typing import Any, Dict, List, cast, Union, Optional, overload -from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Optional, Union, cast, overload + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig -from typing_extensions import Literal from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class SpeechToTextParams(TypedDict): diff --git a/jigsawstack/classification.py b/jigsawstack/classification.py index a53ed87..45407e9 100644 --- a/jigsawstack/classification.py +++ b/jigsawstack/classification.py @@ -1,9 +1,11 @@ from typing import Any, Dict, List, Union, cast -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class DatasetItem(TypedDict): diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index e37e856..4957cde 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -1,10 +1,12 @@ -from typing import Any, Dict, List, Union, cast, Literal, overload +from typing import Any, Dict, List, Literal, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest +from .helpers import build_path +from .request import Request, RequestConfig class EmbeddingParams(TypedDict): diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 64c7d11..e944ee4 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -1,10 +1,12 @@ -from typing import Any, Dict, List, Union, cast, Literal, overload +from typing import Any, Dict, List, Literal, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig -from .helpers import build_path +from .async_request import AsyncRequest from .embedding import Chunk +from .helpers import build_path +from .request import Request, RequestConfig class EmbeddingV2Params(TypedDict): diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index d615b6d..525b653 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -1,9 +1,10 @@ from typing import Any, Dict, Union, cast -from typing_extensions import NotRequired, TypedDict, Literal, Required -from .request import Request, RequestConfig -from .async_request import AsyncRequest + +from typing_extensions import Literal, NotRequired, Required, TypedDict from ._config import ClientConfig +from .async_request import AsyncRequest +from .request import Request, RequestConfig class AdvanceConfig(TypedDict): diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index 84bfbf9..8517bda 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Union, cast + from typing_extensions import TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class Dataset(TypedDict): diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 62416e7..6411c66 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Generator, Literal +from typing import Any, Dict, Generator, List, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig +from .async_request import AsyncRequest from .helpers import build_path +from .request import Request, RequestConfig class PromptEngineResult(TypedDict): diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 68ac675..84d9f3d 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -1,8 +1,10 @@ -from typing import Any, Dict, Generic, List, Union, cast, TypedDict, Generator +import json +from typing import Any, Dict, Generator, Generic, List, TypedDict, Union, cast + import requests from typing_extensions import Literal, TypeVar + from .exceptions import NoContentError, raise_for_code_and_type -import json RequestVerb = Literal["get", "post", "put", "patch", "delete"] diff --git a/jigsawstack/search.py b/jigsawstack/search.py index 3b80bca..a607a1c 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Literal -from typing_extensions import NotRequired, TypedDict, Optional -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Literal, Optional, Union, cast + +from typing_extensions import NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class RelatedIndex(TypedDict): diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index 805dd80..ef5e9df 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -1,9 +1,11 @@ from typing import Any, Dict, List, Union, cast + from typing_extensions import TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SentimentParams(TypedDict): diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index efac7be..b895485 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, Union, cast, Literal +from typing import Any, Dict, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SQLParams(TypedDict): diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 878a767..1fe5f33 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -1,9 +1,11 @@ from typing import Any, Union + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + from ._config import ClientConfig +from .async_request import AsyncRequest, AsyncRequestConfig from .helpers import build_path +from .request import Request, RequestConfig class FileDeleteResponse(TypedDict): diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 898312d..0d19b39 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Literal +from typing import Any, Dict, List, Literal, Union, cast + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest +from .request import Request, RequestConfig class SummaryParams(TypedDict): diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 0b95ef0..63b7fa5 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Union, cast, overload -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest +from .helpers import build_path +from .request import Request, RequestConfig class TranslateImageParams(TypedDict): diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 3565ac9..fc57c3c 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig + from ._config import ClientConfig -from .helpers import build_path from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .helpers import build_path +from .request import Request, RequestConfig class Spam(TypedDict): diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 49191af..452291b 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -1,9 +1,11 @@ -from typing import Any, Dict, List, Union, cast, Optional, overload -from typing_extensions import NotRequired, TypedDict, Literal -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig +from typing import Any, Dict, List, Optional, Union, cast, overload + +from typing_extensions import Literal, NotRequired, TypedDict + from ._config import ClientConfig from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig class Point(TypedDict): @@ -239,7 +241,7 @@ def object_detection( verb="post", ).perform_with_content() return resp - + content_type = options.get("content_type", "application/octet-stream") headers = {"Content-Type": content_type} diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 58d9307..5ebf206 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -1,20 +1,21 @@ -from typing import Any, Dict, List, Union, Optional, cast, Literal, overload +from typing import Any, Dict, List, Literal, Optional, Union, cast, overload + from typing_extensions import NotRequired, TypedDict -from .request import Request, RequestConfig -from .async_request import AsyncRequest, AsyncRequestConfig from ._config import ClientConfig +from ._types import BaseResponse +from .async_request import AsyncRequest, AsyncRequestConfig +from .request import Request, RequestConfig from .search import ( + AsyncSearch, + DeepResearchParams, + DeepResearchResponse, Search, SearchParams, + SearchResponse, SearchSuggestionsParams, SearchSuggestionsResponse, - SearchResponse, - AsyncSearch, - DeepResearchParams, - DeepResearchResponse, ) -from ._types import BaseResponse class GotoOptions(TypedDict): diff --git a/tests/test_audio.py b/tests/test_audio.py index 1345621..3ba499e 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -142,7 +144,7 @@ class TestAudioSync: """Test synchronous audio speech-to-text methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) def test_speech_to_text(self, test_case): """Test synchronous speech-to-text with various inputs""" @@ -160,18 +162,18 @@ def test_speech_to_text(self, test_case): # Verify response structure assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) - + # Check for chunks if result.get("chunks", None): assert isinstance(result["chunks"], list) - + # Check for speaker diarization if requested if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) def test_speech_to_text_webhook(self, test_case): """Test synchronous speech-to-text with webhook""" @@ -188,7 +190,7 @@ def test_speech_to_text_webhook(self, test_case): result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify webhook response structure assert result["success"] - + except JigsawStackError as e: # Webhook URLs might fail if invalid print(f"Expected possible error for webhook test {test_case['name']}: {e}") @@ -196,7 +198,7 @@ def test_speech_to_text_webhook(self, test_case): class TestAudioAsync: """Test asynchronous audio speech-to-text methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) @pytest.mark.asyncio async def test_speech_to_text_async(self, test_case): @@ -212,21 +214,21 @@ async def test_speech_to_text_async(self, test_case): else: # Use params directly result = await async_jigsaw.audio.speech_to_text(test_case["params"]) - + # Verify response structure assert result["success"] assert result.get("text", None) is not None and isinstance(result["text"], str) - + # Check for chunks if result.get("chunks", None): assert isinstance(result["chunks"], list) - + # Check for speaker diarization if requested if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) @pytest.mark.asyncio async def test_speech_to_text_webhook_async(self, test_case): @@ -242,12 +244,12 @@ async def test_speech_to_text_webhook_async(self, test_case): else: # Use params directly result = await async_jigsaw.audio.speech_to_text(test_case["params"]) - + print(f"Async test {test_case['name']}: Webhook response") - + # Verify webhook response structure assert result["success"] except JigsawStackError as e: # Webhook URLs might fail if invalid - print(f"Expected possible error for async webhook test {test_case['name']}: {e}") \ No newline at end of file + print(f"Expected possible error for async webhook test {test_case['name']}: {e}") diff --git a/tests/test_classification.py b/tests/test_classification.py index a5cf66c..dba924a 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_embedding.py b/tests/test_embedding.py index c5b08f5..4464ae4 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_file_store.py b/tests/test_file_store.py index e48cc15..8cee658 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -1,12 +1,14 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os import uuid +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -55,30 +57,30 @@ class TestFileStoreSync: """Test synchronous file store operations""" - + uploaded_keys = [] # Track uploaded files for cleanup - + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) def test_file_upload(self, test_case): """Test synchronous file upload with various options""" try: result = jigsaw.store.upload(test_case["file"], test_case["options"]) - + print(f"Upload test {test_case['name']}: {result}") assert result.get("key") is not None assert result.get("url") is not None assert result.get("size") > 0 - + # Check temp_public_url if requested if test_case.get("options") and test_case["options"].get("temp_public_url"): assert result.get("temp_public_url") is not None - + # Store key for cleanup self.uploaded_keys.append(result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + def test_file_get(self): """Test synchronous file retrieval""" # First upload a file to retrieve @@ -88,46 +90,46 @@ def test_file_get(self): TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) - + # Now retrieve it file_content = jigsaw.store.get(upload_result["key"]) assert file_content is not None print(f"Retrieved file with key {upload_result['key']}") - + # Cleanup self.uploaded_keys.append(upload_result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in file get: {e}") class TestFileStoreAsync: """Test asynchronous file store operations""" - + uploaded_keys = [] # Track uploaded files for cleanup - + @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) @pytest.mark.asyncio async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" try: result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) - + print(f"Async upload test {test_case['name']}: {result}") assert result.get("key") is not None assert result.get("url") is not None assert result.get("size") > 0 - + # Check temp_public_url if requested if test_case.get("options") and test_case["options"].get("temp_public_url"): assert result.get("temp_public_url") is not None - + # Store key for cleanup self.uploaded_keys.append(result["key"]) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.asyncio async def test_file_get_async(self): """Test asynchronous file retrieval""" @@ -138,14 +140,14 @@ async def test_file_get_async(self): TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) - + # Now retrieve it file_content = await async_jigsaw.store.get(upload_result["key"]) assert file_content is not None print(f"Async retrieved file with key {upload_result['key']}") - + # Cleanup self.uploaded_keys.append(upload_result["key"]) - + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in async file get: {e}") diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 052fd6c..1a4342e 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -122,7 +124,7 @@ class TestImageGenerationSync: """Test synchronous image generation methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) def test_image_generation(self, test_case): """Test synchronous image generation with various parameters""" @@ -150,20 +152,20 @@ def test_image_generation(self, test_case): assert requests.get(result["url"]).status_code == 200 else: assert isinstance(result, bytes) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) def test_image_to_image_generation(self, test_case): """Test image-to-image generation with URL input""" try: - + result = jigsaw.image_generation(test_case["params"]) - + print(f"Test {test_case['name']}: Generated image from input") assert result is not None - + if type(result) is dict: assert result.get("success") assert result.get("url") is not None @@ -177,16 +179,16 @@ def test_image_to_image_generation(self, test_case): class TestImageGenerationAsync: """Test asynchronous image generation methods""" - + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) @pytest.mark.asyncio async def test_image_generation_async(self, test_case): """Test asynchronous image generation with various parameters""" try: result = await async_jigsaw.image_generation(test_case["params"]) - + print(f"Async test {test_case['name']}: Generated image") - + # Check response structure assert result is not None if type(result) is dict: @@ -203,10 +205,10 @@ async def test_image_generation_async(self, test_case): assert requests.get(result["url"]).status_code == 200 else: assert isinstance(result, bytes) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - + @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) @pytest.mark.asyncio async def test_image_to_image_generation_async(self, test_case): @@ -224,4 +226,4 @@ async def test_image_to_image_generation_async(self, test_case): pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 8c0f409..b6d8d78 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 48ba6f7..f38b016 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -1,11 +1,13 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os from datetime import datetime, timedelta +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index 8967562..c184ec1 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_sql.py b/tests/test_sql.py index 71de82b..822ae18 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_summary.py b/tests/test_summary.py index 12125de..5d66335 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -1,10 +1,12 @@ -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) diff --git a/tests/test_translate.py b/tests/test_translate.py index 7c903c7..f556fca 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) @@ -138,14 +140,14 @@ def test_translate_text(self, test_case): result = jigsaw.translate.text(test_case["params"]) assert result["success"] assert "translated_text" in result - + # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -165,14 +167,14 @@ async def test_translate_text_async(self, test_case): result = await async_jigsaw.translate.text(test_case["params"]) assert result["success"] assert "translated_text" in result - + # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -235,4 +237,4 @@ async def test_translate_image_async(self, test_case): assert isinstance(result, bytes) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_validate.py b/tests/test_validate.py index 9ad90bc..c1c0311 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -1,11 +1,13 @@ -import requests -from jigsawstack.exceptions import JigsawStackError -import jigsawstack -import pytest import logging -from dotenv import load_dotenv import os +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + load_dotenv() logging.basicConfig(level=logging.INFO) From 5ac36799e308bbb81b26af0bdc9d02c6f48e55b0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:39:30 -0700 Subject: [PATCH 45/95] fix: formatting errors and improper initializations. --- .github/ruff.toml | 1 + jigsawstack/async_request.py | 4 ++-- jigsawstack/prediction.py | 2 +- jigsawstack/request.py | 4 ++-- tests/test_summary.py | 20 ++++++++++---------- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 6b655a6..8f2e22f 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -13,5 +13,6 @@ select = [ "UP", # pyupgrade ] ignore = [ + "E501", # ignore line too long. "B008", # do not perform function calls in argument defaults ] diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 8d7bfb1..ee8a802 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -25,7 +25,7 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = {"Content-Type": "application/json"}, + headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, ): @@ -35,7 +35,7 @@ def __init__( self.api_url = config.get("api_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers + self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index 8517bda..ec571a4 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -27,7 +27,7 @@ class PredictionParams(TypedDict): """ steps: int """ - The number of predictions to make. The defualt is 5. + The number of predictions to make. The default is 5. """ diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 84d9f3d..ea373a6 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -25,7 +25,7 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = {"Content-Type": "application/json"}, + headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, ): @@ -35,7 +35,7 @@ def __init__( self.api_url = config.get("api_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers + self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream diff --git a/tests/test_summary.py b/tests/test_summary.py index 5d66335..e2fb763 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -16,13 +16,13 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) LONG_TEXT = """ -Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. -From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. -Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. -Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. -Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. -However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. -As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. +Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. +From healthcare to finance, transportation to entertainment, AI is reshaping industries and changing the way we live and work. +Machine learning algorithms can now diagnose diseases with remarkable accuracy, predict market trends, and even create art. +Natural language processing has enabled computers to understand and generate human language, leading to the development of sophisticated chatbots and virtual assistants. +Computer vision systems can identify objects, faces, and activities in images and videos with superhuman precision. +However, the rapid advancement of AI also raises important ethical questions about privacy, job displacement, and the potential for bias in algorithmic decision-making. +As we continue to develop more powerful AI systems, it's crucial that we consider their societal impact and work to ensure that the benefits of AI are distributed equitably. The future of AI holds immense promise, but it will require careful planning, regulation, and collaboration between technologists, policymakers, and society at large to realize its full potential while mitigating its risks. """ @@ -112,9 +112,9 @@ "name": "technical_text_summary", "params": { "text": """ - Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. - Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. - Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. + Machine learning is a subset of artificial intelligence that focuses on the development of algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience. + Deep learning, a subfield of machine learning, uses artificial neural networks with multiple layers to progressively extract higher-level features from raw input. + Supervised learning involves training models on labeled data, while unsupervised learning discovers patterns in unlabeled data. Reinforcement learning enables agents to learn optimal behaviors through trial and error interactions with an environment. """, "type": "points", From dfe3154c96f96a0aa3625d65da6ae583f75b42db Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:41:46 -0700 Subject: [PATCH 46/95] chore: update string max lenght --- .github/ruff.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index 8f2e22f..a401c08 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -1,5 +1,5 @@ # Ruff configuration for CI/CD -line-length = 88 +line-length = 100 target-version = "py37" [lint] @@ -13,6 +13,6 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", # ignore line too long. + "E501", "B008", # do not perform function calls in argument defaults ] From a76d141feec2a5a0c83051bdf04801f5380fa06e Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:44:05 -0700 Subject: [PATCH 47/95] chore: update ruff toml --- .github/ruff.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/ruff.toml b/.github/ruff.toml index a401c08..3922be1 100644 --- a/.github/ruff.toml +++ b/.github/ruff.toml @@ -4,7 +4,6 @@ target-version = "py37" [lint] select = [ - "E", # pycodestyle errors "W", # pycodestyle warnings "F", # pyflakes "I", # isort @@ -13,6 +12,5 @@ select = [ "UP", # pyupgrade ] ignore = [ - "E501", "B008", # do not perform function calls in argument defaults ] From 026c3d958a687a4eabd0506dae93173a3db1c125 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:49:03 -0700 Subject: [PATCH 48/95] feat: update CI ruff config. --- .github/workflows/ci.yml | 63 +++++++++------------------------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d0f6df..f77b329 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,8 +50,8 @@ jobs: - name: Check formatting for ${{ matrix.file }} run: | - ruff check jigsawstack/${{ matrix.file }} --select I,F,E,W - ruff format --check jigsawstack/${{ matrix.file }} + ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml + ruff format --check jigsawstack/${{ matrix.file }} --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} @@ -74,8 +74,6 @@ jobs: - test_validate.py - test_vision.py - test_web.py - outputs: - test-result: ${{ steps.test-run.outcome }} steps: - uses: actions/checkout@v4 @@ -92,29 +90,10 @@ jobs: pip install -e . - name: Run test ${{ matrix.test-file }} - id: test-run env: JIGSAWSTACK_API_KEY: ${{ secrets.JIGSAWSTACK_API_KEY }} run: | - pytest tests/${{ matrix.test-file }} -v --json-report --json-report-file=report.json - - - name: Count passed tests - id: count-tests - if: always() - run: | - if [ -f report.json ]; then - PASSED=$(python -c "import json; data=json.load(open('report.json')); print(data.get('summary', {}).get('passed', 0))") - echo "passed-count=$PASSED" >> $GITHUB_OUTPUT - else - echo "passed-count=0" >> $GITHUB_OUTPUT - fi - - - name: Upload test results - if: always() - uses: actions/upload-artifact@v4 - with: - name: test-results-${{ matrix.test-file }} - path: report.json + pytest tests/${{ matrix.test-file }} -v all-checks-passed: name: All Checks Passed @@ -122,33 +101,19 @@ jobs: runs-on: ubuntu-latest if: always() steps: - - name: Download all test results - uses: actions/download-artifact@v4 - with: - path: test-results - - - name: Count total passed tests + - name: Verify all checks passed run: | - TOTAL_PASSED=0 - for file in test-results/*/report.json; do - if [ -f "$file" ]; then - PASSED=$(python -c "import json; data=json.load(open('$file')); print(data.get('summary', {}).get('passed', 0))") - TOTAL_PASSED=$((TOTAL_PASSED + PASSED)) - fi - done - - echo "Total passed tests: $TOTAL_PASSED" + echo "Ruff Format Check: ${{ needs.ruff-format-check.result }}" + echo "Tests: ${{ needs.test.result }}" - if [ $TOTAL_PASSED -lt 327 ]; then - echo "❌ Insufficient tests passed: $TOTAL_PASSED/327" + if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then + echo "❌ Ruff format check failed" exit 1 - else - echo "✅ Required tests passed: $TOTAL_PASSED/327" fi - - - name: Check if ruff passed - run: | - if [[ "${{ needs.ruff-format-check.result }}" != "success" ]]; then - echo "Ruff format check failed" + + if [[ "${{ needs.test.result }}" != "success" ]]; then + echo "❌ Tests failed" exit 1 - fi \ No newline at end of file + fi + + echo "✅ All checks passed successfully!" \ No newline at end of file From af5c92718ca7ef7c71c9ff58bd6eff560fafe0bb Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:50:10 -0700 Subject: [PATCH 49/95] fix: avoid re-formmating gracefully fail for the dev to fix the formatting errors. --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f77b329..d576488 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,6 @@ jobs: - name: Check formatting for ${{ matrix.file }} run: | ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml - ruff format --check jigsawstack/${{ matrix.file }} --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} From 700d8edfb86d1bcc9a34befe1001a4929acfd950 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 22:52:05 -0700 Subject: [PATCH 50/95] fix: missing package in ci --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d576488..6195b72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,7 +85,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt - pip install pytest pytest-asyncio pytest-cov + pip install pytest pytest-asyncio pytest-cov python-dotenv pip install -e . - name: Run test ${{ matrix.test-file }} From 519f8316a20301922b34d5ff1de9051b6b891a3d Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Thu, 11 Sep 2025 23:02:55 -0700 Subject: [PATCH 51/95] fix: drop non-existent-previously-deleted test cases. --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6195b72..c3b541d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,7 +71,6 @@ jobs: - test_summary.py - test_translate.py - test_validate.py - - test_vision.py - test_web.py steps: - uses: actions/checkout@v4 From c3c25410dc4ee7b085dc663d6d16b52064873c30 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 08:37:36 -0700 Subject: [PATCH 52/95] fix: formatting for ruff. --- jigsawstack/async_request.py | 4 +- jigsawstack/embedding.py | 8 +--- jigsawstack/embedding_v2.py | 4 +- jigsawstack/helpers.py | 8 +--- jigsawstack/image_generation.py | 8 +--- jigsawstack/prompt_engine.py | 16 ++----- jigsawstack/request.py | 5 +-- jigsawstack/search.py | 4 +- jigsawstack/store.py | 4 +- jigsawstack/vision.py | 8 +--- jigsawstack/web.py | 12 ++--- tests/test_audio.py | 79 +++++++++------------------------ tests/test_embedding.py | 8 ++-- tests/test_file_store.py | 18 +++++--- tests/test_image_generation.py | 62 +++++++++++--------------- tests/test_object_detection.py | 8 ++-- tests/test_prediction.py | 9 +--- tests/test_sentiment.py | 8 +--- tests/test_summary.py | 8 +--- tests/test_translate.py | 4 +- tests/test_validate.py | 12 ++--- 21 files changed, 98 insertions(+), 199 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index ee8a802..26a7e53 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -250,9 +250,7 @@ async def make_request( form_data.add_field( "file", BytesIO(data), - content_type=headers.get( - "Content-Type", "application/octet-stream" - ), + content_type=headers.get("Content-Type", "application/octet-stream"), filename="file", ) diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 4957cde..cd755f0 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -47,9 +47,7 @@ def __init__( @overload def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... def execute( self, @@ -101,9 +99,7 @@ def __init__( @overload async def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - async def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + async def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... async def execute( self, diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index e944ee4..fe62f69 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -45,9 +45,7 @@ def __init__( @overload def execute(self, params: EmbeddingV2Params) -> EmbeddingV2Response: ... @overload - def execute( - self, blob: bytes, options: EmbeddingV2Params = None - ) -> EmbeddingV2Response: ... + def execute(self, blob: bytes, options: EmbeddingV2Params = None) -> EmbeddingV2Response: ... def execute( self, diff --git a/jigsawstack/helpers.py b/jigsawstack/helpers.py index 1854410..5c1ad6a 100644 --- a/jigsawstack/helpers.py +++ b/jigsawstack/helpers.py @@ -2,9 +2,7 @@ from urllib.parse import urlencode -def build_path( - base_path: str, params: Optional[Dict[str, Union[str, int, bool]]] = None -) -> str: +def build_path(base_path: str, params: Optional[Dict[str, Union[str, int, bool]]] = None) -> str: """ Build an API endpoint path with query parameters. @@ -20,9 +18,7 @@ def build_path( # remove None values from the parameters filtered_params = { - k: str(v).lower() if isinstance(v, bool) else v - for k, v in params.items() - if v is not None + k: str(v).lower() if isinstance(v, bool) else v for k, v in params.items() if v is not None } # encode the parameters diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index 525b653..9584cf3 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -92,9 +92,7 @@ def __init__( api_url: str, disable_request_logging: Union[bool, None] = False, ): - super().__init__( - api_key, api_url, disable_request_logging=disable_request_logging - ) + super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) self.config = RequestConfig( api_url=api_url, api_key=api_key, @@ -123,9 +121,7 @@ def __init__( api_url: str, disable_request_logging: Union[bool, None] = False, ): - super().__init__( - api_key, api_url, disable_request_logging=disable_request_logging - ) + super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) self.config = RequestConfig( api_url=api_url, api_key=api_key, diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 6411c66..3af7fa3 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -119,14 +119,10 @@ def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse def get(self, id: str) -> PromptEngineGetResponse: path = f"/prompt_engine/{id}" - resp = Request( - config=self.config, path=path, params={}, verb="get" - ).perform_with_content() + resp = Request(config=self.config, path=path, params={}, verb="get").perform_with_content() return resp - def list( - self, params: Union[PromptEngineListParams, None] = None - ) -> PromptEngineListResponse: + def list(self, params: Union[PromptEngineListParams, None] = None) -> PromptEngineListResponse: if params is None: params = {} @@ -141,9 +137,7 @@ def list( base_path="/prompt_engine", params=params, ) - resp = Request( - config=self.config, path=path, params={}, verb="get" - ).perform_with_content() + resp = Request(config=self.config, path=path, params={}, verb="get").perform_with_content() return resp def delete(self, id: str) -> PromptEngineDeleteResponse: @@ -219,9 +213,7 @@ def __init__( disable_request_logging=disable_request_logging, ) - async def create( - self, params: PromptEngineCreateParams - ) -> PromptEngineCreateResponse: + async def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse: path = "/prompt_engine" resp = await AsyncRequest( config=self.config, diff --git a/jigsawstack/request.py b/jigsawstack/request.py index ea373a6..c1967a4 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -91,10 +91,7 @@ def perform_file(self) -> Union[T, None]: # handle error in case there is a statusCode attr present # and status != 200 and response is a json. - if ( - "application/json" not in resp.headers["content-type"] - and resp.status_code != 200 - ): + if "application/json" not in resp.headers["content-type"] and resp.status_code != 200: raise_for_code_and_type( code=500, message="Failed to parse JigsawStack API response. Please try again.", diff --git a/jigsawstack/search.py b/jigsawstack/search.py index a607a1c..21b0187 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -319,9 +319,7 @@ async def search(self, params: SearchParams) -> SearchResponse: ).perform_with_content() return resp - async def suggestions( - self, params: SearchSuggestionsParams - ) -> SearchSuggestionsResponse: + async def suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: query = params["query"] path = f"/web/search/suggest?query={query}" resp = await AsyncRequest( diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 1fe5f33..0693f49 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -23,9 +23,7 @@ class FileUploadResponse(TypedDict): key: str url: str size: int - temp_public_url: NotRequired[ - str - ] # Optional, only if temp_public_url is set to True in params + temp_public_url: NotRequired[str] # Optional, only if temp_public_url is set to True in params class Store(ClientConfig): diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 452291b..6df4e37 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -218,9 +218,7 @@ def vocr( return resp @overload - def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -307,9 +305,7 @@ async def vocr( return resp @overload - async def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 5ebf206..5d400c3 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -20,9 +20,7 @@ class GotoOptions(TypedDict): timeout: NotRequired[int] - wait_until: NotRequired[ - Literal["load", "domcontentloaded", "networkidle0", "networkidle2"] - ] + wait_until: NotRequired[Literal["load", "domcontentloaded", "networkidle0", "networkidle2"]] # @@ -257,9 +255,7 @@ def search(self, params: SearchParams) -> SearchResponse: ) return s.search(params) - def search_suggestions( - self, params: SearchSuggestionsParams - ) -> SearchSuggestionsResponse: + def search_suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: s = Search( self.api_key, self.api_url, @@ -309,9 +305,7 @@ async def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse: async def html_to_any(self, params: HTMLToAnyURLParams) -> HTMLToAnyURLResponse: ... @overload - async def html_to_any( - self, params: HTMLToAnyBinaryParams - ) -> HTMLToAnyBinaryResponse: ... + async def html_to_any(self, params: HTMLToAnyBinaryParams) -> HTMLToAnyBinaryResponse: ... async def html_to_any( self, params: HTMLToAnyParams diff --git a/tests/test_audio.py b/tests/test_audio.py index 3ba499e..037f285 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -22,36 +22,25 @@ TEST_CASES = [ { "name": "with_url_only", - "params": { - "url": AUDIO_URL - }, + "params": {"url": AUDIO_URL}, "blob": None, "options": None, }, { "name": "with_url_and_language", - "params": { - "url": AUDIO_URL, - "language": "en" - }, + "params": {"url": AUDIO_URL, "language": "en"}, "blob": None, "options": None, }, { "name": "with_url_auto_detect_language", - "params": { - "url": AUDIO_URL, - "language": "auto" - }, + "params": {"url": AUDIO_URL, "language": "auto"}, "blob": None, "options": None, }, { "name": "with_url_and_translate", - "params": { - "url": AUDIO_URL, - "translate": True - }, + "params": {"url": AUDIO_URL, "translate": True}, "blob": None, "options": None, }, @@ -65,43 +54,29 @@ "name": "with_blob_and_language", "params": None, "blob": AUDIO_URL, - "options": { - "language": "en" - }, + "options": {"language": "en"}, }, { "name": "with_blob_auto_detect", "params": None, "blob": AUDIO_URL, - "options": { - "language": "auto" - }, + "options": {"language": "auto"}, }, { "name": "with_blob_and_translate", "params": None, "blob": AUDIO_URL, - "options": { - "translate": True, - "language": "en" - }, + "options": {"translate": True, "language": "en"}, }, { "name": "with_by_speaker", - "params": { - "url": AUDIO_URL_LONG, - "by_speaker": True - }, + "params": {"url": AUDIO_URL_LONG, "by_speaker": True}, "blob": None, "options": None, }, { "name": "with_chunk_settings", - "params": { - "url": AUDIO_URL, - "batch_size": 5, - "chunk_duration": 15 - }, + "params": {"url": AUDIO_URL, "batch_size": 5, "chunk_duration": 15}, "blob": None, "options": None, }, @@ -114,7 +89,7 @@ "translate": False, "by_speaker": True, "batch_size": 10, - "chunk_duration": 15 + "chunk_duration": 15, }, }, ] @@ -123,10 +98,7 @@ WEBHOOK_TEST_CASES = [ { "name": "with_webhook_url", - "params": { - "url": AUDIO_URL, - "webhook_url": "https://webhook.site/test-webhook" - }, + "params": {"url": AUDIO_URL, "webhook_url": "https://webhook.site/test-webhook"}, "blob": None, "options": None, }, @@ -134,10 +106,7 @@ "name": "with_blob_and_webhook", "params": None, "blob": AUDIO_URL, - "options": { - "webhook_url": "https://webhook.site/test-webhook", - "language": "en" - }, + "options": {"webhook_url": "https://webhook.site/test-webhook", "language": "en"}, }, ] @@ -152,10 +121,7 @@ def test_speech_to_text(self, test_case): if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -174,17 +140,16 @@ def test_speech_to_text(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.parametrize( + "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] + ) def test_speech_to_text_webhook(self, test_case): """Test synchronous speech-to-text with webhook""" try: if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -208,8 +173,7 @@ async def test_speech_to_text_async(self, test_case): # Download audio content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly @@ -229,7 +193,9 @@ async def test_speech_to_text_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES]) + @pytest.mark.parametrize( + "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] + ) @pytest.mark.asyncio async def test_speech_to_text_webhook_async(self, test_case): """Test asynchronous speech-to-text with webhook""" @@ -238,8 +204,7 @@ async def test_speech_to_text_webhook_async(self, test_case): # Download audio content blob_content = requests.get(test_case["blob"]).content result = await async_jigsaw.audio.speech_to_text( - blob_content, - test_case.get("options", {}) + blob_content, test_case.get("options", {}) ) else: # Use params directly diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 4464ae4..7b6b368 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -16,12 +16,12 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +SAMPLE_TEXT = ( + "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +) SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" -SAMPLE_PDF_URL = ( - "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" -) +SAMPLE_PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" # Test cases for Embedding V1 EMBEDDING_V1_TEST_CASES = [ diff --git a/tests/test_file_store.py b/tests/test_file_store.py index 8cee658..97d07dd 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -18,7 +18,9 @@ TEXT_FILE_CONTENT = b"This is a test file content for JigsawStack storage" JSON_FILE_CONTENT = b'{"test": "data", "key": "value"}' -BINARY_FILE_CONTENT = requests.get("https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg").content +BINARY_FILE_CONTENT = requests.get( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +).content TEST_CASES_UPLOAD = [ { @@ -60,7 +62,9 @@ class TestFileStoreSync: uploaded_keys = [] # Track uploaded files for cleanup - @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.parametrize( + "test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD] + ) def test_file_upload(self, test_case): """Test synchronous file upload with various options""" try: @@ -87,8 +91,7 @@ def test_file_get(self): test_key = f"test-get-{uuid.uuid4().hex[:8]}.txt" try: upload_result = jigsaw.store.upload( - TEXT_FILE_CONTENT, - {"key": test_key, "content_type": "text/plain"} + TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) # Now retrieve it @@ -108,7 +111,9 @@ class TestFileStoreAsync: uploaded_keys = [] # Track uploaded files for cleanup - @pytest.mark.parametrize("test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD]) + @pytest.mark.parametrize( + "test_case", TEST_CASES_UPLOAD, ids=[tc["name"] for tc in TEST_CASES_UPLOAD] + ) @pytest.mark.asyncio async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" @@ -137,8 +142,7 @@ async def test_file_get_async(self): test_key = f"test-async-get-{uuid.uuid4().hex[:8]}.txt" try: upload_result = await async_jigsaw.store.upload( - TEXT_FILE_CONTENT, - {"key": test_key, "content_type": "text/plain"} + TEXT_FILE_CONTENT, {"key": test_key, "content_type": "text/plain"} ) # Now retrieve it diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 1a4342e..6b982ba 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -16,11 +16,10 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" -FILE_STORE_KEY = jigsaw.store.upload(requests.get(IMAGE_URL).content, { - "filename": "test_image.jpg", - "content_type": "image/jpeg", - "overwrite": True - }) +FILE_STORE_KEY = jigsaw.store.upload( + requests.get(IMAGE_URL).content, + {"filename": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True}, +) TEST_CASES = [ { @@ -33,23 +32,16 @@ "name": "with_aspect_ratio", "params": { "prompt": "A serene lake with mountains in the background", - "aspect_ratio": "16:9" + "aspect_ratio": "16:9", }, }, { "name": "with_custom_dimensions", - "params": { - "prompt": "A futuristic city skyline", - "width": 1024, - "height": 768 - }, + "params": {"prompt": "A futuristic city skyline", "width": 1024, "height": 768}, }, { "name": "with_output_format_png", - "params": { - "prompt": "A colorful abstract painting", - "output_format": "png" - }, + "params": {"prompt": "A colorful abstract painting", "output_format": "png"}, }, { "name": "with_advanced_config", @@ -58,8 +50,8 @@ "advance_config": { "negative_prompt": "blurry, low quality, distorted", "guidance": 7, - "seed": 42 - } + "seed": 42, + }, }, }, { @@ -68,22 +60,16 @@ "prompt": "A detailed botanical illustration", "steps": 30, "aspect_ratio": "3:4", - "return_type": "base64" + "return_type": "base64", }, }, { "name": "with_return_type_url", - "params": { - "prompt": "A vintage car on a desert road", - "return_type": "url" - }, + "params": {"prompt": "A vintage car on a desert road", "return_type": "url"}, }, { "name": "with_return_type_base64", - "params": { - "prompt": "A fantasy castle on a hill", - "return_type": "base64" - } + "params": {"prompt": "A fantasy castle on a hill", "return_type": "base64"}, }, { "name": "with_all_options", @@ -95,9 +81,9 @@ "advance_config": { "negative_prompt": "simple, plain, boring", "guidance": 8, - "seed": 12345 + "seed": 12345, }, - "return_type": "base64" + "return_type": "base64", }, }, ] @@ -109,7 +95,7 @@ "params": { "prompt": "Add snow effects to this image", "url": IMAGE_URL, - "return_type": "base64" + "return_type": "base64", }, }, { @@ -118,7 +104,7 @@ "prompt": "Apply a cyberpunk style to this image", "file_store_key": FILE_STORE_KEY, }, - } + }, ] @@ -139,7 +125,6 @@ def test_image_generation(self, test_case): assert result is not None if type(result) is dict: - # Check for image data based on return_type if test_case["params"].get("return_type") == "url": assert result.get("url") is not None @@ -156,11 +141,14 @@ def test_image_generation(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.parametrize( + "test_case", + IMAGE_TO_IMAGE_TEST_CASES[:1], + ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]], + ) def test_image_to_image_generation(self, test_case): """Test image-to-image generation with URL input""" try: - result = jigsaw.image_generation(test_case["params"]) print(f"Test {test_case['name']}: Generated image from input") @@ -192,7 +180,7 @@ async def test_image_generation_async(self, test_case): # Check response structure assert result is not None if type(result) is dict: - # Check for image data based on return_type + # Check for image data based on return_type if test_case["params"].get("return_type") == "url": assert result.get("url") is not None assert requests.get(result["url"]).status_code == 200 @@ -209,7 +197,11 @@ async def test_image_generation_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", IMAGE_TO_IMAGE_TEST_CASES[:1], ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]]) + @pytest.mark.parametrize( + "test_case", + IMAGE_TO_IMAGE_TEST_CASES[:1], + ids=[tc["name"] for tc in IMAGE_TO_IMAGE_TEST_CASES[:1]], + ) @pytest.mark.asyncio async def test_image_to_image_generation_async(self, test_case): """Test asynchronous image-to-image generation with URL input""" diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index b6d8d78..1fbd5ca 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -18,7 +18,9 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +IMAGE_URL = ( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +) TEST_CASES = [ { @@ -102,9 +104,7 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index f38b016..a87ccab 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -17,7 +17,6 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) - def generate_dates(start_date, num_days): dates = [] for i in range(num_days): @@ -48,9 +47,7 @@ def generate_dates(start_date, num_days): { "name": "seasonal_pattern", "params": { - "dataset": [ - {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) - ], + "dataset": [{"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21)], "steps": 7, }, }, @@ -64,9 +61,7 @@ def generate_dates(start_date, num_days): { "name": "large_dataset_prediction", "params": { - "dataset": [ - {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) - ], + "dataset": [{"date": dates[i], "value": 1000 + (i * 20)} for i in range(30)], "steps": 10, }, }, diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index c184ec1..5bb5914 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -30,9 +30,7 @@ }, { "name": "neutral_sentiment_factual", - "params": { - "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." - }, + "params": {"text": "The meeting is scheduled for 3 PM tomorrow in conference room B."}, }, { "name": "mixed_sentiment_paragraph", @@ -68,9 +66,7 @@ }, { "name": "question_sentiment", - "params": { - "text": "Why is this product so amazing? I can't believe how well it works!" - }, + "params": {"text": "Why is this product so amazing? I can't believe how well it works!"}, }, ] diff --git a/tests/test_summary.py b/tests/test_summary.py index e2fb763..ab79ea9 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -147,9 +147,7 @@ def test_summary(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) + assert len(result["summary"]) <= test_case["params"]["max_characters"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -179,9 +177,7 @@ async def test_summary_async(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert ( - len(result["summary"]) <= test_case["params"]["max_characters"] - ) + assert len(result["summary"]) <= test_case["params"]["max_characters"] except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_translate.py b/tests/test_translate.py index f556fca..5b560be 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -193,9 +193,7 @@ def test_translate_image(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.translate.image( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.translate.image(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.translate.image(test_case["params"]) diff --git a/tests/test_validate.py b/tests/test_validate.py index c1c0311..d0d2c43 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -17,9 +17,7 @@ async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) # Sample URLs for NSFW testing -SAFE_IMAGE_URL = ( - "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" -) +SAFE_IMAGE_URL = "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SPAM_CHECK_TEST_CASES = [ @@ -75,9 +73,7 @@ }, { "name": "mixed_correct_and_incorrect", - "params": { - "text": "The weather is beatiful today, but tommorow might be diferent." - }, + "params": {"text": "The weather is beatiful today, but tommorow might be diferent."}, }, { "name": "technical_text", @@ -429,9 +425,7 @@ async def test_nsfw_check_blob_async(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.validate.nsfw( - blob_content, test_case["options"] - ) + result = await async_jigsaw.validate.nsfw(blob_content, test_case["options"]) assert result["success"] assert "nsfw" in result From ba56ab1b30b500f45533870a76f02472773f80f1 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 09:03:57 -0700 Subject: [PATCH 53/95] feat: seperating test cases for deepresearch and ai_scrape. --- .github/workflows/ci.yml | 36 ++----- tests/test_ai_scrape.py | 141 +++++++++++++++++++++++++ tests/test_deep_research.py | 95 +++++++++++++++++ tests/test_web.py | 205 ------------------------------------ 4 files changed, 242 insertions(+), 235 deletions(-) create mode 100644 tests/test_ai_scrape.py create mode 100644 tests/test_deep_research.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c3b541d..b1f5b26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,35 +8,8 @@ on: jobs: ruff-format-check: - name: Ruff Format Check - ${{ matrix.file }} + name: Ruff Format Check runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - file: - - __init__.py - - _config.py - - _types.py - - async_request.py - - audio.py - - classification.py - - embedding_v2.py - - embedding.py - - exceptions.py - - helpers.py - - image_generation.py - - prediction.py - - prompt_engine.py - - request.py - - search.py - - sentiment.py - - sql.py - - store.py - - summary.py - - translate.py - - validate.py - - vision.py - - web.py steps: - uses: actions/checkout@v4 @@ -48,9 +21,10 @@ jobs: - name: Install ruff run: pip install ruff - - name: Check formatting for ${{ matrix.file }} + - name: Check all files with ruff run: | - ruff check jigsawstack/${{ matrix.file }} --config .github/ruff.toml + ruff check jigsawstack/ --config .github/ruff.toml + ruff format --check jigsawstack/ --config .github/ruff.toml test: name: Test - ${{ matrix.test-file }} @@ -72,6 +46,8 @@ jobs: - test_translate.py - test_validate.py - test_web.py + - test_deep_research.py + - test_ai_scrape.py steps: - uses: actions/checkout@v4 diff --git a/tests/test_ai_scrape.py b/tests/test_ai_scrape.py new file mode 100644 index 0000000..4c30b33 --- /dev/null +++ b/tests/test_ai_scrape.py @@ -0,0 +1,141 @@ +import logging +import os + +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +URL = "https://jigsawstack.com" + +# AI Scrape Test Cases +AI_SCRAPE_TEST_CASES = [ + { + "name": "scrape_with_element_prompts", + "params": { + "url": URL, + "element_prompts": ["title", "main content", "navigation links"], + }, + }, + { + "name": "scrape_with_selectors", + "params": { + "url": URL, + "selectors": ["h1", "p", "a"], + }, + }, + { + "name": "scrape_with_features", + "params": { + "url": URL, + "element_prompts": ["title"], + "features": ["meta", "link"], + }, + }, + { + "name": "scrape_with_root_element", + "params": { + "url": URL, + "element_prompts": ["content"], + "root_element_selector": "main", + }, + }, + { + "name": "scrape_with_wait_for_timeout", + "params": { + "url": URL, + "element_prompts": ["content"], + "wait_for": {"mode": "timeout", "value": 3000}, + }, + }, + { + "name": "scrape_mobile_view", + "params": { + "url": URL, + "element_prompts": ["mobile menu"], + "is_mobile": True, + }, + }, + { + "name": "scrape_with_cookies", + "params": { + "url": URL, + "element_prompts": ["user data"], + "cookies": [{"name": "session", "value": "test123", "domain": "example.com"}], + }, + }, + { + "name": "scrape_with_advance_config", + "params": { + "url": URL, + "element_prompts": ["content"], + "advance_config": {"console": True, "network": True, "cookies": True}, + }, + }, +] + + +class TestAIScrapeSync: + """Test synchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + def test_ai_scrape(self, test_case): + """Test synchronous AI scrape with various inputs""" + try: + result = jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestAIScrapeAsync: + """Test asynchronous AI scrape methods""" + + @pytest.mark.parametrize( + "test_case", + AI_SCRAPE_TEST_CASES, + ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_ai_scrape_async(self, test_case): + """Test asynchronous AI scrape with various inputs""" + try: + result = await async_jigsaw.web.ai_scrape(test_case["params"]) + + assert result["success"] + assert "data" in result + assert isinstance(result["data"], list) + + # Check for optional features + if "meta" in test_case["params"].get("features", []): + assert "meta" in result + if "link" in test_case["params"].get("features", []): + assert "link" in result + assert isinstance(result["link"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_deep_research.py b/tests/test_deep_research.py new file mode 100644 index 0000000..3d584ab --- /dev/null +++ b/tests/test_deep_research.py @@ -0,0 +1,95 @@ +import logging +import os + +import pytest +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +URL = "https://jigsawstack.com" + + +# Deep Research Test Cases +DEEP_RESEARCH_TEST_CASES = [ + { + "name": "basic_deep_research", + "params": { + "query": "climate change effects", + }, + }, + { + "name": "technical_deep_research", + "params": { + "query": "quantum computing applications in cryptography", + }, + }, + { + "name": "deep_research_with_depth", + "params": { + "query": "renewable energy sources", + "depth": 2, + }, + }, +] + + +class TestDeepResearchSync: + """Test synchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + def test_deep_research(self, test_case): + """Test synchronous deep research with various inputs""" + try: + result = jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestDeepResearchAsync: + """Test asynchronous deep research methods""" + + @pytest.mark.parametrize( + "test_case", + DEEP_RESEARCH_TEST_CASES, + ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], + ) + @pytest.mark.asyncio + async def test_deep_research_async(self, test_case): + """Test asynchronous deep research with various inputs""" + try: + result = await async_jigsaw.web.deep_research(test_case["params"]) + + assert result["success"] + assert "results" in result + assert isinstance(result["results"], str) + assert len(result["results"]) > 0 + + # Check for sources + if "sources" in result: + assert isinstance(result["sources"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_web.py b/tests/test_web.py index dda97c5..c22ccd7 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -15,75 +15,6 @@ URL = "https://jigsawstack.com" - -# AI Scrape Test Cases -AI_SCRAPE_TEST_CASES = [ - { - "name": "scrape_with_element_prompts", - "params": { - "url": URL, - "element_prompts": ["title", "main content", "navigation links"], - }, - }, - { - "name": "scrape_with_selectors", - "params": { - "url": URL, - "selectors": ["h1", "p", "a"], - }, - }, - { - "name": "scrape_with_features", - "params": { - "url": URL, - "element_prompts": ["title"], - "features": ["meta", "link"], - }, - }, - { - "name": "scrape_with_root_element", - "params": { - "url": URL, - "element_prompts": ["content"], - "root_element_selector": "main", - }, - }, - { - "name": "scrape_with_wait_for_timeout", - "params": { - "url": URL, - "element_prompts": ["content"], - "wait_for": {"mode": "timeout", "value": 3000}, - }, - }, - { - "name": "scrape_mobile_view", - "params": { - "url": URL, - "element_prompts": ["mobile menu"], - "is_mobile": True, - }, - }, - { - "name": "scrape_with_cookies", - "params": { - "url": URL, - "element_prompts": ["user data"], - "cookies": [ - {"name": "session", "value": "test123", "domain": "example.com"} - ], - }, - }, - { - "name": "scrape_with_advance_config", - "params": { - "url": URL, - "element_prompts": ["content"], - "advance_config": {"console": True, "network": True, "cookies": True}, - }, - }, -] - # HTML to Any Test Cases HTML_TO_ANY_TEST_CASES = [ { @@ -212,58 +143,6 @@ }, ] -# Deep Research Test Cases -DEEP_RESEARCH_TEST_CASES = [ - { - "name": "basic_deep_research", - "params": { - "query": "climate change effects", - }, - }, - { - "name": "technical_deep_research", - "params": { - "query": "quantum computing applications in cryptography", - }, - }, - { - "name": "deep_research_with_depth", - "params": { - "query": "renewable energy sources", - "depth": 2, - }, - }, -] - - -class TestAIScrapeSync: - """Test synchronous AI scrape methods""" - - @pytest.mark.parametrize( - "test_case", - AI_SCRAPE_TEST_CASES, - ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], - ) - def test_ai_scrape(self, test_case): - """Test synchronous AI scrape with various inputs""" - try: - result = jigsaw.web.ai_scrape(test_case["params"]) - - assert result["success"] - assert "data" in result - assert isinstance(result["data"], list) - - # Check for optional features - if "meta" in test_case["params"].get("features", []): - assert "meta" in result - if "link" in test_case["params"].get("features", []): - assert "link" in result - assert isinstance(result["link"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestHTMLToAnySync: """Test synchronous HTML to Any methods""" @@ -348,65 +227,8 @@ def test_search_suggestions(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - -class TestDeepResearchSync: - """Test synchronous deep research methods""" - - @pytest.mark.parametrize( - "test_case", - DEEP_RESEARCH_TEST_CASES, - ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], - ) - def test_deep_research(self, test_case): - """Test synchronous deep research with various inputs""" - try: - result = jigsaw.web.deep_research(test_case["params"]) - - assert result["success"] - assert "results" in result - assert isinstance(result["results"], str) - assert len(result["results"]) > 0 - - # Check for sources - if "sources" in result: - assert isinstance(result["sources"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - # Async Test Classes - -class TestAIScrapeAsync: - """Test asynchronous AI scrape methods""" - - @pytest.mark.parametrize( - "test_case", - AI_SCRAPE_TEST_CASES, - ids=[tc["name"] for tc in AI_SCRAPE_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_ai_scrape_async(self, test_case): - """Test asynchronous AI scrape with various inputs""" - try: - result = await async_jigsaw.web.ai_scrape(test_case["params"]) - - assert result["success"] - assert "data" in result - assert isinstance(result["data"], list) - - # Check for optional features - if "meta" in test_case["params"].get("features", []): - assert "meta" in result - if "link" in test_case["params"].get("features", []): - assert "link" in result - assert isinstance(result["link"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - class TestHTMLToAnyAsync: """Test asynchronous HTML to Any methods""" @@ -493,30 +315,3 @@ async def test_search_suggestions_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - - -class TestDeepResearchAsync: - """Test asynchronous deep research methods""" - - @pytest.mark.parametrize( - "test_case", - DEEP_RESEARCH_TEST_CASES, - ids=[tc["name"] for tc in DEEP_RESEARCH_TEST_CASES], - ) - @pytest.mark.asyncio - async def test_deep_research_async(self, test_case): - """Test asynchronous deep research with various inputs""" - try: - result = await async_jigsaw.web.deep_research(test_case["params"]) - - assert result["success"] - assert "results" in result - assert isinstance(result["results"], str) - assert len(result["results"]) > 0 - - # Check for sources - if "sources" in result: - assert isinstance(result["sources"], list) - - except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From 86db626b1ddc3a150785b8f0301a4518ecaf8e01 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 16:47:21 -0700 Subject: [PATCH 54/95] fix: multipart form requests to utlize file and data. --- jigsawstack/request.py | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/jigsawstack/request.py b/jigsawstack/request.py index c1967a4..069d65a 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -1,3 +1,4 @@ +from importlib.resources import files import json from typing import Any, Dict, Generator, Generic, List, TypedDict, Union, cast @@ -28,6 +29,7 @@ def __init__( headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, + files: Union[Dict[str, Any], None] = None, # Change from 'file' to 'files' ): self.path = path self.params = params @@ -38,6 +40,7 @@ def __init__( self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream + self.files = files # Change from 'file' to 'files' def perform(self) -> Union[T, None]: """Is the main function that makes the HTTP request @@ -152,15 +155,23 @@ def __get_headers(self) -> Dict[Any, Any]: """ h = { - "Content-Type": "application/json", "Accept": "application/json", "x-api-key": f"{self.api_key}", } + + # Only add Content-Type if not using multipart (files) + if not self.files and not self.data: + h["Content-Type"] = "application/json" if self.disable_request_logging: h["x-jigsaw-no-request-log"] = "true" _headers = h.copy() + + # Don't override Content-Type if using multipart + if self.files and "Content-Type" in self.headers: + self.headers.pop("Content-Type") + _headers.update(self.headers) return _headers @@ -243,20 +254,41 @@ def make_request(self, url: str) -> requests.Response: params = self.params verb = self.verb data = self.data + files = self.files # Change from 'file' to 'files' _requestParams = None + _json = None + _data = None + _files = None if verb.lower() in ["get", "delete"]: _requestParams = params + elif files: + # For multipart requests + _files = files + # Add params as 'body' field in multipart form (JSON stringified) + if params and isinstance(params, dict): + # Convert params to JSON string and add as 'body' field + _data = {"body": json.dumps(params)} + elif data: + # For binary data without multipart + _data = data + # Pass params as query parameters for binary uploads + if params and isinstance(params, dict): + _requestParams = params + else: + # For JSON requests + _json = params try: return requests.request( verb, url, params=_requestParams, - json=params, + json=_json, headers=headers, - data=data, + data=_data, + files=_files, stream=self.stream, ) except requests.HTTPError as e: From 08c6c42107a4e2e6c2fd2dce9ce55c6dc5cc57a0 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 16:48:52 -0700 Subject: [PATCH 55/95] fix: maintain same structure and logic as sync request. --- jigsawstack/async_request.py | 115 ++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 36 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 26a7e53..1fbf44d 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -1,7 +1,6 @@ import json -from io import BytesIO from typing import Any, AsyncGenerator, Dict, Generic, List, TypedDict, Union, cast - +from io import BytesIO import aiohttp from typing_extensions import Literal, TypeVar @@ -28,6 +27,7 @@ def __init__( headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, + files: Union[Dict[str, Any], None] = None, # Add files parameter ): self.path = path self.params = params @@ -38,6 +38,7 @@ def __init__( self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream + self.files = files # Store files for multipart requests def __convert_params( self, params: Union[Dict[Any, Any], List[Dict[Any, Any]]] @@ -171,15 +172,23 @@ def __get_headers(self) -> Dict[str, str]: Dict[str, str]: Configured HTTP Headers """ h = { - "Content-Type": "application/json", "Accept": "application/json", "x-api-key": f"{self.api_key}", } + # only add Content-Type if not using multipart (files) + if not self.files and not self.data: + h["Content-Type"] = "application/json" + if self.disable_request_logging: h["x-jigsaw-no-request-log"] = "true" _headers = h.copy() + + #don't override Content-Type if using multipart + if self.files and "Content-Type" in self.headers: + self.headers.pop("Content-Type") + _headers.update(self.headers) return _headers @@ -231,50 +240,84 @@ async def make_request( self, session: aiohttp.ClientSession, url: str ) -> aiohttp.ClientResponse: headers = self.__get_headers() + params = self.params verb = self.verb data = self.data + files = self.files - # Convert params to string values for URL encoding - converted_params = self.__convert_params(self.params) + _params = None + _json = None + _data = None + _form_data = None if verb.lower() in ["get", "delete"]: + #convert params for URL encoding if needed + _params = self.__convert_params(params) + elif files: + # for multipart requests - matches request.py behavior + _form_data = aiohttp.FormData() + + # add file(s) to form data + for field_name, file_data in files.items(): + if isinstance(file_data, bytes): + # just pass the blob without filename + _form_data.add_field( + field_name, + BytesIO(file_data), + content_type="application/octet-stream" + ) + elif isinstance(file_data, tuple): + # if tuple format (filename, data, content_type) + filename, content, content_type = file_data + _form_data.add_field( + field_name, + content, + filename=filename, + content_type=content_type + ) + + # add params as 'body' field in multipart form (JSON stringified) + if params and isinstance(params, dict): + _form_data.add_field( + "body", + json.dumps(params), + content_type="application/json" + ) + elif data: + # for binary data without multipart + _data = data + # pass params as query parameters for binary uploads + if params and isinstance(params, dict): + _params = self.__convert_params(params) + else: + # for JSON requests + _json = params + + # m,ake the request based on the data type + if _form_data: + return await session.request( + verb, + url, + params=_params, + data=_form_data, + headers=headers, + ) + elif _json is not None: return await session.request( verb, url, - params=converted_params, + params=_params, + json=_json, headers=headers, ) else: - if data is not None: - form_data = aiohttp.FormData() - form_data.add_field( - "file", - BytesIO(data), - content_type=headers.get("Content-Type", "application/octet-stream"), - filename="file", - ) - - if self.params and isinstance(self.params, dict): - form_data.add_field( - "body", json.dumps(self.params), content_type="application/json" - ) - - multipart_headers = headers.copy() - multipart_headers.pop("Content-Type", None) - - return await session.request( - verb, - url, - data=form_data, - headers=multipart_headers, - ) - else: - return await session.request( - verb, - url, - json=self.params, # Keep JSON body as original - headers=headers, - ) + return await session.request( + verb, + url, + params=_params, + data=_data, + headers=headers, + ) def __get_session(self) -> aiohttp.ClientSession: """ From 73d5647a8a1626fb7952d15e9d6bd599f5000449 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 18:39:02 -0700 Subject: [PATCH 56/95] fix: mutlipart form request for STT. --- jigsawstack/audio.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index cadfd25..043be8c 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -80,11 +80,9 @@ def speech_to_text( ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: options = options or {} path = "/ai/transcribe" - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - if isinstance( - blob, dict - ): # If params is provided as a dict, we assume it's the first argument + params= options or {} + if isinstance(blob, dict): + # URL or file_store_key based request resp = Request( config=self.config, path=path, @@ -93,13 +91,13 @@ def speech_to_text( ).perform_with_content() return resp + files = {"file": blob} resp = Request( config=self.config, path=path, - params=options, - data=blob, - headers=headers, + params=params, verb="post", + files=files, ).perform_with_content() return resp @@ -136,8 +134,7 @@ async def speech_to_text( ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: options = options or {} path = "/ai/transcribe" - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} + params = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, @@ -146,13 +143,13 @@ async def speech_to_text( verb="post", ).perform_with_content() return resp - + + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, - params=options, - data=blob, - headers=headers, + params=params, verb="post", + files=files, ).perform_with_content() return resp From caed1b57fbc673b02a51c71ea132b263afe243e5 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 18:40:38 -0700 Subject: [PATCH 57/95] fix: multiform request for obj-detection. --- jigsawstack/vision.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 6df4e37..ed1d2a9 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -1,4 +1,5 @@ from typing import Any, Dict, List, Optional, Union, cast, overload +from wsgiref import headers from typing_extensions import Literal, NotRequired, TypedDict @@ -159,10 +160,10 @@ class OCRResponse(BaseResponse): tags: List[str] has_text: bool sections: List[object] - total_pages: Optional[int] # Only available for PDFs - page_ranges: Optional[ + total_pages: Optional[int] + page_range: Optional[ List[int] - ] # Only available if page_ranges is set in the request parameters. + ] # Only available if page_range is set in the request parameters. class Vision(ClientConfig): @@ -204,15 +205,13 @@ def vocr( ).perform_with_content() return resp - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + files ={"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp @@ -239,16 +238,13 @@ def object_detection( verb="post", ).perform_with_content() return resp - - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + files = {"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp @@ -291,15 +287,13 @@ async def vocr( ).perform_with_content() return resp - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp @@ -329,15 +323,13 @@ async def object_detection( ).perform_with_content() return resp - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp From 4146c25464692d468c9b2df261ec23eabff5f978 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 18:42:50 -0700 Subject: [PATCH 58/95] test: defining test cases for vocr --- tests/test_vocr.py | 260 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 tests/test_vocr.py diff --git a/tests/test_vocr.py b/tests/test_vocr.py new file mode 100644 index 0000000..df809d2 --- /dev/null +++ b/tests/test_vocr.py @@ -0,0 +1,260 @@ +import logging +import os + +import pytest +import requests +from dotenv import load_dotenv + +import jigsawstack +from jigsawstack.exceptions import JigsawStackError + +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +jigsaw = jigsawstack.JigsawStack(api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY")) + +IMAGE_URL = "https://jigsawstack.com/preview/vocr-example.jpg" + +# PDF URL for testing page_range functionality +PDF_URL = "https://arxiv.org/pdf/1706.03762" + +TEST_CASES = [ + { + "name": "with_url_only", + "params": {"url": IMAGE_URL}, + "blob": None, + "options": None, + }, + { + "name": "with_blob_only", + "params": None, + "blob": IMAGE_URL, + "options": None, + }, + { + "name": "with_string_prompt", + "blob": IMAGE_URL, + "options": {"prompt": "Extract all text from the image"}, + }, + { + "name": "with_list_prompt", + "blob": IMAGE_URL, + "options": { + "prompt": [ + "What is the main heading?", + "Extract any dates mentioned", + "What are the key points?" + ] + }, + }, + { + "name": "with_dict_prompt", + "blob": IMAGE_URL, + "options": { + "prompt": { + "title": "Extract the main title", + "content": "What is the main content?", + "metadata": "Extract any metadata or additional information" + } + }, + }, + { + "name": "url_with_string_prompt", + "params": { + "url": IMAGE_URL, + "prompt": "Summarize the text content" + }, + "blob": None, + "options": None, + }, + { + "name": "url_with_list_prompt", + "params": { + "url": IMAGE_URL, + "prompt": ["Extract headers", "Extract body text"] + }, + "blob": None, + "options": None, + }, +] + +# PDF specific test cases +PDF_TEST_CASES = [ + { + "name": "pdf_with_page_range", + "params": { + "url": PDF_URL, + "page_range": [1, 3], + "prompt": "Extract text from these pages" + }, + "blob": None, + "options": None, + }, + { + "name": "pdf_single_page", + "params": { + "url": PDF_URL, + "page_range": [1, 1], + "prompt": "What is on the first page?" + }, + "blob": None, + "options": None, + }, + { + "name": "pdf_blob_with_page_range", + "blob": PDF_URL, + "options": { + "page_range": [1, 3], + "prompt": "what is this about?" + }, + }, +] + + +class TestVOCRSync: + """Test synchronous VOCR methods""" + + sync_test_cases = TEST_CASES + pdf_test_cases = PDF_TEST_CASES + + @pytest.mark.parametrize( + "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] + ) + def test_vocr(self, test_case): + """Test synchronous VOCR with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.vision.vocr(blob_content, test_case.get("options", {})) + else: + # Use params directly + result = jigsaw.vision.vocr(test_case["params"]) + + print(f"Test {test_case['name']}: Success={result.get('success')}") + + # Verify response structure + assert result["success"] is True + if "prompt" in (test_case.get("params") or {}): + assert "context" in result + assert "width" in result + assert "height" in result + assert "has_text" in result + assert "tags" in result + assert isinstance(result["tags"], list) + assert "sections" in result + assert isinstance(result["sections"], list) + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] + ) + def test_vocr_pdf(self, test_case): + """Test synchronous VOCR with PDF inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = jigsaw.vision.vocr(blob_content, test_case.get("options", {})) + else: + # Use params directly + result = jigsaw.vision.vocr(test_case["params"]) + + # Verify response structure + assert result["success"] is True + if "prompt" in (test_case.get("params") or {}): + assert "context" in result + assert "total_pages" in result + + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get("page_range"): + assert "page_range" in result + assert isinstance(result["page_range"], list) + + logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + +class TestVOCRAsync: + """Test asynchronous VOCR methods""" + + async_test_cases = TEST_CASES + pdf_test_cases = PDF_TEST_CASES + + @pytest.mark.parametrize( + "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] + ) + @pytest.mark.asyncio + async def test_vocr_async(self, test_case): + """Test asynchronous VOCR with various inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.vision.vocr( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.vision.vocr(test_case["params"]) + + print(f"Test {test_case['name']}: Success={result.get('success')}") + + # Verify response structure + assert result["success"] is True + if "prompt" in (test_case.get("params") or {}): + assert "context" in result + assert "width" in result + assert "height" in result + assert "has_text" in result + assert "tags" in result + assert isinstance(result["tags"], list) + assert "sections" in result + assert isinstance(result["sections"], list) + + # Log some details + logger.info(f"Test {test_case['name']}: has_text={result['has_text']}, tags={result['tags'][:3] if result['tags'] else []}") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + + @pytest.mark.parametrize( + "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] + ) + @pytest.mark.asyncio + async def test_vocr_pdf_async(self, test_case): + """Test asynchronous VOCR with PDF inputs""" + try: + if test_case.get("blob"): + # Download blob content + blob_content = requests.get(test_case["blob"]).content + result = await async_jigsaw.vision.vocr( + blob_content, test_case.get("options", {}) + ) + else: + # Use params directly + result = await async_jigsaw.vision.vocr(test_case["params"]) + + print(f"Test {test_case['name']}: Success={result.get('success')}") + + # Verify response structure + assert result["success"] is True + if "prompt" in (test_case.get("params") or {}): + assert "context" in result + assert "total_pages" in result # PDF specific + + # Check if page_range is in response when requested + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get("page_range"): + assert "page_range" in result + assert isinstance(result["page_range"], list) + + logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") + + except JigsawStackError as e: + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file From 411bf893c69ee1e86c797e0e1ac88dc9a199c776 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 18:54:25 -0700 Subject: [PATCH 59/95] fix: multipart-form request for image translation. --- jigsawstack/translate.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 63b7fa5..2967514 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -95,6 +95,8 @@ def image( blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, ) -> Union[TranslateImageResponse, bytes]: + path = "/ai/translate/image" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument @@ -106,17 +108,14 @@ def image( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/translate/image", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + + files = {"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp @@ -159,6 +158,8 @@ async def image( blob: Union[TranslateImageParams, bytes], options: TranslateImageParams = None, ) -> Union[TranslateImageResponse, bytes]: + path = "/ai/translate/image" + options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, @@ -168,17 +169,13 @@ async def image( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path="/ai/translate/image", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp From d61fe05b5f56cf1e536348188cbea02130e0bc6f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 18:59:38 -0700 Subject: [PATCH 60/95] fix: multipart request for NSFW. --- jigsawstack/validate.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index fc57c3c..5079385 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -99,6 +99,8 @@ def nsfw( blob: Union[NSFWParams, bytes], options: NSFWParams = None, ) -> NSFWResponse: + path = "/validate/nsfw" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument @@ -109,18 +111,14 @@ def nsfw( verb="post", ).perform_with_content() return resp - - options = options or {} - path = build_path(base_path="/validate/nsfw", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + + files = {"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp @@ -188,28 +186,26 @@ async def nsfw( blob: Union[NSFWParams, bytes], options: NSFWParams = None, ) -> NSFWResponse: + path = "/validate/nsfw" + options = options or {} if isinstance( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = await AsyncRequest( config=self.config, - path="/validate/nsfw", + path=path, params=cast(Dict[Any, Any], blob), verb="post", ).perform_with_content() return resp - - options = options or {} - path = build_path(base_path="/validate/nsfw", params=options) - content_type = options.get("content_type", "application/octet-stream") - headers = {"Content-Type": content_type} - + + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=headers, + files=files, verb="post", ).perform_with_content() return resp From 306eeb56acc71c26a8592d9a30377bd0e73bc2b8 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 19:04:20 -0700 Subject: [PATCH 61/95] fix: multipartform request. --- jigsawstack/embedding.py | 20 ++++++++------------ jigsawstack/embedding_v2.py | 18 ++++++------------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index cd755f0..511f9d1 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -1,3 +1,4 @@ +from importlib.metadata import files from typing import Any, Dict, List, Literal, Union, cast, overload from typing_extensions import NotRequired, TypedDict @@ -55,6 +56,7 @@ def execute( options: EmbeddingParams = None, ) -> EmbeddingResponse: path = "/embedding" + options = options or {} if isinstance(blob, dict): resp = Request( config=self.config, @@ -64,17 +66,14 @@ def execute( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path=path, params=options) - content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} - + + files = {"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=_headers, + files=files, verb="post", ).perform_with_content() return resp @@ -107,6 +106,7 @@ async def execute( options: EmbeddingParams = None, ) -> EmbeddingResponse: path = "/embedding" + options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, @@ -116,17 +116,13 @@ async def execute( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path=path, params=options) - content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} - + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=_headers, + files=files, verb="post", ).perform_with_content() return resp diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index fe62f69..6ce501b 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -53,6 +53,7 @@ def execute( options: EmbeddingV2Params = None, ) -> EmbeddingV2Response: path = "/embedding" + options = options or {} if isinstance(blob, dict): resp = Request( config=self.config, @@ -62,17 +63,13 @@ def execute( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path=path, params=options) - content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} - + files = {"file": blob} resp = Request( config=self.config, path=path, params=options, data=blob, - headers=_headers, + files=files, verb="post", ).perform_with_content() return resp @@ -107,6 +104,7 @@ async def execute( options: EmbeddingV2Params = None, ) -> EmbeddingV2Response: path = "/embedding" + options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, @@ -116,17 +114,13 @@ async def execute( ).perform_with_content() return resp - options = options or {} - path = build_path(base_path=path, params=options) - content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} - + files = {"file": blob} resp = await AsyncRequest( config=self.config, path=path, params=options, data=blob, - headers=_headers, + files=files, verb="post", ).perform_with_content() return resp From b8bbf70dd7b97da4c9a30aee3ec748d2b4d3875f Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 19:05:04 -0700 Subject: [PATCH 62/95] chore: formatting. --- jigsawstack/async_request.py | 27 ++++------- jigsawstack/audio.py | 6 +-- jigsawstack/embedding.py | 1 - jigsawstack/request.py | 6 +-- jigsawstack/translate.py | 1 - jigsawstack/validate.py | 4 +- jigsawstack/vision.py | 4 +- tests/test_vocr.py | 91 +++++++++++++++--------------------- 8 files changed, 57 insertions(+), 83 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 1fbf44d..a1fc5bf 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -184,11 +184,11 @@ def __get_headers(self) -> Dict[str, str]: h["x-jigsaw-no-request-log"] = "true" _headers = h.copy() - - #don't override Content-Type if using multipart + + # don't override Content-Type if using multipart if self.files and "Content-Type" in self.headers: self.headers.pop("Content-Type") - + _headers.update(self.headers) return _headers @@ -251,38 +251,29 @@ async def make_request( _form_data = None if verb.lower() in ["get", "delete"]: - #convert params for URL encoding if needed + # convert params for URL encoding if needed _params = self.__convert_params(params) elif files: # for multipart requests - matches request.py behavior _form_data = aiohttp.FormData() - + # add file(s) to form data for field_name, file_data in files.items(): if isinstance(file_data, bytes): # just pass the blob without filename _form_data.add_field( - field_name, - BytesIO(file_data), - content_type="application/octet-stream" + field_name, BytesIO(file_data), content_type="application/octet-stream" ) elif isinstance(file_data, tuple): # if tuple format (filename, data, content_type) filename, content, content_type = file_data _form_data.add_field( - field_name, - content, - filename=filename, - content_type=content_type + field_name, content, filename=filename, content_type=content_type ) - + # add params as 'body' field in multipart form (JSON stringified) if params and isinstance(params, dict): - _form_data.add_field( - "body", - json.dumps(params), - content_type="application/json" - ) + _form_data.add_field("body", json.dumps(params), content_type="application/json") elif data: # for binary data without multipart _data = data diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 043be8c..47c2cd8 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -80,8 +80,8 @@ def speech_to_text( ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: options = options or {} path = "/ai/transcribe" - params= options or {} - if isinstance(blob, dict): + params = options or {} + if isinstance(blob, dict): # URL or file_store_key based request resp = Request( config=self.config, @@ -143,7 +143,7 @@ async def speech_to_text( verb="post", ).perform_with_content() return resp - + files = {"file": blob} resp = await AsyncRequest( config=self.config, diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 511f9d1..edbef82 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -66,7 +66,6 @@ def execute( ).perform_with_content() return resp - files = {"file": blob} resp = Request( config=self.config, diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 069d65a..0aa1a40 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -158,7 +158,7 @@ def __get_headers(self) -> Dict[Any, Any]: "Accept": "application/json", "x-api-key": f"{self.api_key}", } - + # Only add Content-Type if not using multipart (files) if not self.files and not self.data: h["Content-Type"] = "application/json" @@ -167,11 +167,11 @@ def __get_headers(self) -> Dict[Any, Any]: h["x-jigsaw-no-request-log"] = "true" _headers = h.copy() - + # Don't override Content-Type if using multipart if self.files and "Content-Type" in self.headers: self.headers.pop("Content-Type") - + _headers.update(self.headers) return _headers diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 2967514..d8f9974 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -108,7 +108,6 @@ def image( ).perform_with_content() return resp - files = {"file": blob} resp = Request( config=self.config, diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 5079385..6ee4040 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -111,7 +111,7 @@ def nsfw( verb="post", ).perform_with_content() return resp - + files = {"file": blob} resp = Request( config=self.config, @@ -198,7 +198,7 @@ async def nsfw( verb="post", ).perform_with_content() return resp - + files = {"file": blob} resp = await AsyncRequest( config=self.config, diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index ed1d2a9..a8bb3af 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -160,7 +160,7 @@ class OCRResponse(BaseResponse): tags: List[str] has_text: bool sections: List[object] - total_pages: Optional[int] + total_pages: Optional[int] page_range: Optional[ List[int] ] # Only available if page_range is set in the request parameters. @@ -205,7 +205,7 @@ def vocr( ).perform_with_content() return resp - files ={"file": blob} + files = {"file": blob} resp = Request( config=self.config, path=path, diff --git a/tests/test_vocr.py b/tests/test_vocr.py index df809d2..13c7a32 100644 --- a/tests/test_vocr.py +++ b/tests/test_vocr.py @@ -13,8 +13,12 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") +) IMAGE_URL = "https://jigsawstack.com/preview/vocr-example.jpg" @@ -46,7 +50,7 @@ "prompt": [ "What is the main heading?", "Extract any dates mentioned", - "What are the key points?" + "What are the key points?", ] }, }, @@ -57,25 +61,19 @@ "prompt": { "title": "Extract the main title", "content": "What is the main content?", - "metadata": "Extract any metadata or additional information" + "metadata": "Extract any metadata or additional information", } }, }, { "name": "url_with_string_prompt", - "params": { - "url": IMAGE_URL, - "prompt": "Summarize the text content" - }, + "params": {"url": IMAGE_URL, "prompt": "Summarize the text content"}, "blob": None, "options": None, }, { "name": "url_with_list_prompt", - "params": { - "url": IMAGE_URL, - "prompt": ["Extract headers", "Extract body text"] - }, + "params": {"url": IMAGE_URL, "prompt": ["Extract headers", "Extract body text"]}, "blob": None, "options": None, }, @@ -85,31 +83,20 @@ PDF_TEST_CASES = [ { "name": "pdf_with_page_range", - "params": { - "url": PDF_URL, - "page_range": [1, 3], - "prompt": "Extract text from these pages" - }, + "params": {"url": PDF_URL, "page_range": [1, 3], "prompt": "Extract text from these pages"}, "blob": None, "options": None, }, { "name": "pdf_single_page", - "params": { - "url": PDF_URL, - "page_range": [1, 1], - "prompt": "What is on the first page?" - }, + "params": {"url": PDF_URL, "page_range": [1, 1], "prompt": "What is on the first page?"}, "blob": None, "options": None, }, { "name": "pdf_blob_with_page_range", "blob": PDF_URL, - "options": { - "page_range": [1, 3], - "prompt": "what is this about?" - }, + "options": {"page_range": [1, 3], "prompt": "what is this about?"}, }, ] @@ -135,7 +122,7 @@ def test_vocr(self, test_case): result = jigsaw.vision.vocr(test_case["params"]) print(f"Test {test_case['name']}: Success={result.get('success')}") - + # Verify response structure assert result["success"] is True if "prompt" in (test_case.get("params") or {}): @@ -147,13 +134,11 @@ def test_vocr(self, test_case): assert isinstance(result["tags"], list) assert "sections" in result assert isinstance(result["sections"], list) - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize( - "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] - ) + @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) def test_vocr_pdf(self, test_case): """Test synchronous VOCR with PDF inputs""" try: @@ -164,19 +149,21 @@ def test_vocr_pdf(self, test_case): else: # Use params directly result = jigsaw.vision.vocr(test_case["params"]) - + # Verify response structure assert result["success"] is True if "prompt" in (test_case.get("params") or {}): assert "context" in result assert "total_pages" in result - - if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get("page_range"): + + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( + "page_range" + ): assert "page_range" in result assert isinstance(result["page_range"], list) logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") - + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -197,15 +184,13 @@ async def test_vocr_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr( - blob_content, test_case.get("options", {}) - ) + result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) print(f"Test {test_case['name']}: Success={result.get('success')}") - + # Verify response structure assert result["success"] is True if "prompt" in (test_case.get("params") or {}): @@ -217,16 +202,16 @@ async def test_vocr_async(self, test_case): assert isinstance(result["tags"], list) assert "sections" in result assert isinstance(result["sections"], list) - + # Log some details - logger.info(f"Test {test_case['name']}: has_text={result['has_text']}, tags={result['tags'][:3] if result['tags'] else []}") - + logger.info( + f"Test {test_case['name']}: has_text={result['has_text']}, tags={result['tags'][:3] if result['tags'] else []}" + ) + except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize( - "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] - ) + @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) @pytest.mark.asyncio async def test_vocr_pdf_async(self, test_case): """Test asynchronous VOCR with PDF inputs""" @@ -234,27 +219,27 @@ async def test_vocr_pdf_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr( - blob_content, test_case.get("options", {}) - ) + result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) print(f"Test {test_case['name']}: Success={result.get('success')}") - + # Verify response structure assert result["success"] is True if "prompt" in (test_case.get("params") or {}): assert "context" in result assert "total_pages" in result # PDF specific - + # Check if page_range is in response when requested - if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get("page_range"): + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( + "page_range" + ): assert "page_range" in result assert isinstance(result["page_range"], list) logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") - + except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") \ No newline at end of file + pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From f5b73b25d4d75fe95cf1de2901f388ff48f105ce Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 19:05:31 -0700 Subject: [PATCH 63/95] fix: linting --- jigsawstack/async_request.py | 3 ++- jigsawstack/embedding.py | 2 -- jigsawstack/embedding_v2.py | 1 - jigsawstack/request.py | 1 - jigsawstack/translate.py | 1 - jigsawstack/vision.py | 1 - 6 files changed, 2 insertions(+), 7 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index a1fc5bf..55fdfa1 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -1,6 +1,7 @@ import json -from typing import Any, AsyncGenerator, Dict, Generic, List, TypedDict, Union, cast from io import BytesIO +from typing import Any, AsyncGenerator, Dict, Generic, List, TypedDict, Union, cast + import aiohttp from typing_extensions import Literal, TypeVar diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index edbef82..203dc23 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -1,4 +1,3 @@ -from importlib.metadata import files from typing import Any, Dict, List, Literal, Union, cast, overload from typing_extensions import NotRequired, TypedDict @@ -6,7 +5,6 @@ from ._config import ClientConfig from ._types import BaseResponse from .async_request import AsyncRequest -from .helpers import build_path from .request import Request, RequestConfig diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 6ce501b..b9514f9 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -5,7 +5,6 @@ from ._config import ClientConfig from .async_request import AsyncRequest from .embedding import Chunk -from .helpers import build_path from .request import Request, RequestConfig diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 0aa1a40..84ef8cf 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -1,4 +1,3 @@ -from importlib.resources import files import json from typing import Any, Dict, Generator, Generic, List, TypedDict, Union, cast diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index d8f9974..42b50dd 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -5,7 +5,6 @@ from ._config import ClientConfig from ._types import BaseResponse from .async_request import AsyncRequest -from .helpers import build_path from .request import Request, RequestConfig diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index a8bb3af..fe1d94a 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -1,5 +1,4 @@ from typing import Any, Dict, List, Optional, Union, cast, overload -from wsgiref import headers from typing_extensions import Literal, NotRequired, TypedDict From 085907bbd6bd7790251e433c8b660c7b91871204 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Fri, 12 Sep 2025 19:07:02 -0700 Subject: [PATCH 64/95] test: updating ci to include vocr tests. --- .github/workflows/ci.yml | 1 + tests/test_vocr.py | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b1f5b26..1eea9f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,6 +48,7 @@ jobs: - test_web.py - test_deep_research.py - test_ai_scrape.py + - test_vocr.py steps: - uses: actions/checkout@v4 diff --git a/tests/test_vocr.py b/tests/test_vocr.py index 13c7a32..d233484 100644 --- a/tests/test_vocr.py +++ b/tests/test_vocr.py @@ -13,12 +13,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) -async_jigsaw = jigsawstack.AsyncJigsawStack( - api_url="http://localhost:3000/api/", api_key=os.getenv("JIGSAWSTACK_API_KEY") -) +jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) IMAGE_URL = "https://jigsawstack.com/preview/vocr-example.jpg" From b5ec3b3fe71bdc8b7130141ecf88b5912f1440fe Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 14 Sep 2025 22:21:00 -0700 Subject: [PATCH 65/95] rm unnecessary params --- jigsawstack/audio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 47c2cd8..2f004c4 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -80,7 +80,6 @@ def speech_to_text( ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: options = options or {} path = "/ai/transcribe" - params = options or {} if isinstance(blob, dict): # URL or file_store_key based request resp = Request( @@ -95,7 +94,7 @@ def speech_to_text( resp = Request( config=self.config, path=path, - params=params, + params=options, verb="post", files=files, ).perform_with_content() From 762ce6af5cb03e1bf0888a346396c167d5f257f2 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 14:24:54 -0700 Subject: [PATCH 66/95] fix: drop unnecessary data param, as every request is not multipartform. --- jigsawstack/async_request.py | 68 ++++++++---------------------------- jigsawstack/audio.py | 3 +- jigsawstack/embedding.py | 10 +++--- jigsawstack/embedding_v2.py | 6 ++-- jigsawstack/request.py | 28 ++++++--------- jigsawstack/translate.py | 10 +++--- jigsawstack/validate.py | 2 -- jigsawstack/version.py | 2 +- jigsawstack/vision.py | 12 +++---- 9 files changed, 49 insertions(+), 92 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 55fdfa1..dc0c063 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -243,7 +243,6 @@ async def make_request( headers = self.__get_headers() params = self.params verb = self.verb - data = self.data files = self.files _params = None @@ -252,64 +251,27 @@ async def make_request( _form_data = None if verb.lower() in ["get", "delete"]: - # convert params for URL encoding if needed _params = self.__convert_params(params) elif files: - # for multipart requests - matches request.py behavior _form_data = aiohttp.FormData() - - # add file(s) to form data - for field_name, file_data in files.items(): - if isinstance(file_data, bytes): - # just pass the blob without filename - _form_data.add_field( - field_name, BytesIO(file_data), content_type="application/octet-stream" - ) - elif isinstance(file_data, tuple): - # if tuple format (filename, data, content_type) - filename, content, content_type = file_data - _form_data.add_field( - field_name, content, filename=filename, content_type=content_type - ) - - # add params as 'body' field in multipart form (JSON stringified) + _form_data.add_field("file", BytesIO(files["file"]), filename="upload") if params and isinstance(params, dict): - _form_data.add_field("body", json.dumps(params), content_type="application/json") - elif data: - # for binary data without multipart - _data = data - # pass params as query parameters for binary uploads - if params and isinstance(params, dict): - _params = self.__convert_params(params) - else: - # for JSON requests + _form_data.add_field( + "body", json.dumps(params), content_type="application/json" + ) + + headers.pop("Content-Type", None) + else: # pure JSON request _json = params - # m,ake the request based on the data type - if _form_data: - return await session.request( - verb, - url, - params=_params, - data=_form_data, - headers=headers, - ) - elif _json is not None: - return await session.request( - verb, - url, - params=_params, - json=_json, - headers=headers, - ) - else: - return await session.request( - verb, - url, - params=_params, - data=_data, - headers=headers, - ) + return await session.request( + verb, + url, + params=_params, + json=_json, + data=_form_data or _data, + headers=headers, + ) def __get_session(self) -> aiohttp.ClientSession: """ diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 2f004c4..7dab251 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -133,7 +133,6 @@ async def speech_to_text( ) -> Union[SpeechToTextResponse, SpeechToTextWebhookResponse]: options = options or {} path = "/ai/transcribe" - params = options or {} if isinstance(blob, dict): resp = await AsyncRequest( config=self.config, @@ -147,7 +146,7 @@ async def speech_to_text( resp = await AsyncRequest( config=self.config, path=path, - params=params, + params=options, verb="post", files=files, ).perform_with_content() diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 203dc23..c091896 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -46,7 +46,9 @@ def __init__( @overload def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... + def execute( + self, blob: bytes, options: EmbeddingParams = None + ) -> EmbeddingResponse: ... def execute( self, @@ -69,7 +71,6 @@ def execute( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() @@ -95,7 +96,9 @@ def __init__( @overload async def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - async def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... + async def execute( + self, blob: bytes, options: EmbeddingParams = None + ) -> EmbeddingResponse: ... async def execute( self, @@ -118,7 +121,6 @@ async def execute( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index b9514f9..4447e8c 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -44,7 +44,9 @@ def __init__( @overload def execute(self, params: EmbeddingV2Params) -> EmbeddingV2Response: ... @overload - def execute(self, blob: bytes, options: EmbeddingV2Params = None) -> EmbeddingV2Response: ... + def execute( + self, blob: bytes, options: EmbeddingV2Params = None + ) -> EmbeddingV2Response: ... def execute( self, @@ -67,7 +69,6 @@ def execute( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() @@ -118,7 +119,6 @@ async def execute( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 84ef8cf..e824457 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -28,7 +28,7 @@ def __init__( headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, - files: Union[Dict[str, Any], None] = None, # Change from 'file' to 'files' + files: Union[Dict[str, Any], None] = None, ): self.path = path self.params = params @@ -39,7 +39,7 @@ def __init__( self.headers = headers or {"Content-Type": "application/json"} self.disable_request_logging = config.get("disable_request_logging") self.stream = stream - self.files = files # Change from 'file' to 'files' + self.files = files def perform(self) -> Union[T, None]: """Is the main function that makes the HTTP request @@ -93,7 +93,10 @@ def perform_file(self) -> Union[T, None]: # handle error in case there is a statusCode attr present # and status != 200 and response is a json. - if "application/json" not in resp.headers["content-type"] and resp.status_code != 200: + if ( + "application/json" not in resp.headers["content-type"] + and resp.status_code != 200 + ): raise_for_code_and_type( code=500, message="Failed to parse JigsawStack API response. Please try again.", @@ -253,7 +256,7 @@ def make_request(self, url: str) -> requests.Response: params = self.params verb = self.verb data = self.data - files = self.files # Change from 'file' to 'files' + files = self.files _requestParams = None _json = None @@ -262,23 +265,14 @@ def make_request(self, url: str) -> requests.Response: if verb.lower() in ["get", "delete"]: _requestParams = params - elif files: - # For multipart requests + elif files: # multipart request _files = files - # Add params as 'body' field in multipart form (JSON stringified) if params and isinstance(params, dict): - # Convert params to JSON string and add as 'body' field _data = {"body": json.dumps(params)} - elif data: - # For binary data without multipart - _data = data - # Pass params as query parameters for binary uploads - if params and isinstance(params, dict): - _requestParams = params - else: - # For JSON requests - _json = params + headers.pop("Content-Type", None) # let requests set it for multipart + else: # pure JSON request + _json = params try: return requests.request( verb, diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 42b50dd..e96e37f 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -83,7 +83,9 @@ def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... + def image( + self, params: TranslateImageParams + ) -> Union[TranslateImageResponse, bytes]: ... @overload def image( self, blob: bytes, options: TranslateImageParams = None @@ -112,7 +114,6 @@ def image( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() @@ -145,7 +146,9 @@ async def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - async def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... + async def image( + self, params: TranslateImageParams + ) -> Union[TranslateImageResponse, bytes]: ... @overload async def image( self, blob: bytes, options: TranslateImageParams = None @@ -172,7 +175,6 @@ async def image( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 6ee4040..774aef4 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -117,7 +117,6 @@ def nsfw( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() @@ -204,7 +203,6 @@ async def nsfw( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() diff --git a/jigsawstack/version.py b/jigsawstack/version.py index 44573a9..95b9715 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.3" +__version__ = "0.3.4" def get_version() -> str: diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index fe1d94a..97e87cb 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -209,14 +209,15 @@ def vocr( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() return resp @overload - def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -242,7 +243,6 @@ def object_detection( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() @@ -291,14 +291,15 @@ async def vocr( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() return resp @overload - async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + async def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -327,7 +328,6 @@ async def object_detection( config=self.config, path=path, params=options, - data=blob, files=files, verb="post", ).perform_with_content() From 2127ca15bdc1dcabbe02b1d25a41cc83dc8cd295 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 14:26:32 -0700 Subject: [PATCH 67/95] chore: ruff formatting. --- jigsawstack/async_request.py | 4 +--- jigsawstack/embedding.py | 8 ++------ jigsawstack/embedding_v2.py | 4 +--- jigsawstack/request.py | 5 +---- jigsawstack/translate.py | 8 ++------ jigsawstack/vision.py | 8 ++------ 6 files changed, 9 insertions(+), 28 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index dc0c063..a462ae6 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -256,9 +256,7 @@ async def make_request( _form_data = aiohttp.FormData() _form_data.add_field("file", BytesIO(files["file"]), filename="upload") if params and isinstance(params, dict): - _form_data.add_field( - "body", json.dumps(params), content_type="application/json" - ) + _form_data.add_field("body", json.dumps(params), content_type="application/json") headers.pop("Content-Type", None) else: # pure JSON request diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index c091896..9611537 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -46,9 +46,7 @@ def __init__( @overload def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... def execute( self, @@ -96,9 +94,7 @@ def __init__( @overload async def execute(self, params: EmbeddingParams) -> EmbeddingResponse: ... @overload - async def execute( - self, blob: bytes, options: EmbeddingParams = None - ) -> EmbeddingResponse: ... + async def execute(self, blob: bytes, options: EmbeddingParams = None) -> EmbeddingResponse: ... async def execute( self, diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 4447e8c..0192f52 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -44,9 +44,7 @@ def __init__( @overload def execute(self, params: EmbeddingV2Params) -> EmbeddingV2Response: ... @overload - def execute( - self, blob: bytes, options: EmbeddingV2Params = None - ) -> EmbeddingV2Response: ... + def execute(self, blob: bytes, options: EmbeddingV2Params = None) -> EmbeddingV2Response: ... def execute( self, diff --git a/jigsawstack/request.py b/jigsawstack/request.py index e824457..038c540 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -93,10 +93,7 @@ def perform_file(self) -> Union[T, None]: # handle error in case there is a statusCode attr present # and status != 200 and response is a json. - if ( - "application/json" not in resp.headers["content-type"] - and resp.status_code != 200 - ): + if "application/json" not in resp.headers["content-type"] and resp.status_code != 200: raise_for_code_and_type( code=500, message="Failed to parse JigsawStack API response. Please try again.", diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index e96e37f..601c2a1 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -83,9 +83,7 @@ def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - def image( - self, params: TranslateImageParams - ) -> Union[TranslateImageResponse, bytes]: ... + def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload def image( self, blob: bytes, options: TranslateImageParams = None @@ -146,9 +144,7 @@ async def text(self, params: TranslateParams) -> TranslateResponse: return resp @overload - async def image( - self, params: TranslateImageParams - ) -> Union[TranslateImageResponse, bytes]: ... + async def image(self, params: TranslateImageParams) -> Union[TranslateImageResponse, bytes]: ... @overload async def image( self, blob: bytes, options: TranslateImageParams = None diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 97e87cb..793841a 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -215,9 +215,7 @@ def vocr( return resp @overload - def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -297,9 +295,7 @@ async def vocr( return resp @overload - async def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None From 563f65e0bb50906b286ce456996394ae92c166b6 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 14:26:47 -0700 Subject: [PATCH 68/95] feat: update version to 0.3.4 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index bfb1aff..1aebb49 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.3", + version="0.3.4", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", @@ -19,7 +19,7 @@ python_requires=">=3.7", keywords=["AI", "AI Tooling"], setup_requires=["pytest-runner"], - tests_require=["pytest"], + tests_require=["pytest", "pytest-asyncio"], test_suite="tests", classifiers=[ "Development Status :: 4 - Beta", From 777706d088d8d810bc419b529fae3128eae5611d Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 14:27:35 -0700 Subject: [PATCH 69/95] feat: ruff formatting. --- tests/test_audio.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 037f285..309b191 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -15,7 +15,6 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -# Sample audio URLs for testing AUDIO_URL = AUDIO_URL_LONG = "https://jigsawstack.com/preview/stt-example.wav" @@ -98,7 +97,10 @@ WEBHOOK_TEST_CASES = [ { "name": "with_webhook_url", - "params": {"url": AUDIO_URL, "webhook_url": "https://webhook.site/test-webhook"}, + "params": { + "url": AUDIO_URL, + "webhook_url": "https://webhook.site/test-webhook", + }, "blob": None, "options": None, }, @@ -106,7 +108,10 @@ "name": "with_blob_and_webhook", "params": None, "blob": AUDIO_URL, - "options": {"webhook_url": "https://webhook.site/test-webhook", "language": "en"}, + "options": { + "webhook_url": "https://webhook.site/test-webhook", + "language": "en", + }, }, ] From f97b5598d92828be9472a07c24dcde5f36ef458b Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 14:29:01 -0700 Subject: [PATCH 70/95] fix: drop unused param. --- jigsawstack/request.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 038c540..fddd5c3 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -252,7 +252,6 @@ def make_request(self, url: str) -> requests.Response: headers = self.__get_headers() params = self.params verb = self.verb - data = self.data files = self.files _requestParams = None From 50d8e5d2137d6d98df28119d3741c1317cafd90d Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 15:12:25 -0700 Subject: [PATCH 71/95] fix: updating properties for JigsawStack & AsyncJigsawStack, and embedding_v2 naming convetion. --- jigsawstack/__init__.py | 60 ++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 091f775..5af7ca4 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -21,24 +21,29 @@ class JigsawStack: - audio: Audio - vision: Vision - image_generation: ImageGeneration - file: Store - web: Web - search: Search - classification: Classification - prompt_engine: PromptEngine api_key: str api_url: str headers: Dict[str, str] - # disable_request_logging: bool + audio: Audio + classification: Classification + embedding: Embedding + embedding_v2: EmbeddingV2 + store: Store + image_generation: ImageGeneration + prediction: Prediction + prompt_engine: PromptEngine + sentiment: Sentiment + summary: Summary + text_to_sql: SQL + translate: Translate + validate: Validate + vision: Vision + web: Web def __init__( self, api_key: Union[str, None] = None, api_url: Union[str, None] = None, - # disable_request_logging: Union[bool, None] = None, headers: Union[Dict[str, str], None] = None, ) -> None: if api_key is None: @@ -66,16 +71,19 @@ def __init__( api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ) + self.web = Web( api_key=api_key, api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ) + self.sentiment = Sentiment( api_key=api_key, api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ).analyze + self.validate = Validate( api_key=api_key, api_url=api_url + "/v1", @@ -86,21 +94,25 @@ def __init__( api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ).summarize + self.vision = Vision( api_key=api_key, api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ) + self.prediction = Prediction( api_key=api_key, api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ).predict + self.text_to_sql = SQL( api_key=api_key, api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ).text_to_sql + self.store = Store( api_key=api_key, api_url=api_url + "/v1", @@ -118,7 +130,7 @@ def __init__( disable_request_logging=disable_request_logging, ).execute - self.embeddingV2 = EmbeddingV2( + self.embedding_v2 = EmbeddingV2( api_key=api_key, api_url=api_url + "/v2", disable_request_logging=disable_request_logging, @@ -144,16 +156,24 @@ def __init__( class AsyncJigsawStack: - validate: AsyncValidate - web: AsyncWeb + api_key: str + api_url: str + headers: Dict[str, str] audio: AsyncAudio - vision: AsyncVision + classification: AsyncClassification + embedding: AsyncEmbedding + embedding_v2: AsyncEmbeddingV2 image_generation: AsyncImageGeneration - store: AsyncStore + prediction: AsyncPrediction prompt_engine: AsyncPromptEngine - api_key: str - api_url: str - disable_request_logging: bool + sentiment: AsyncSentiment + store: AsyncStore + summary: AsyncSummary + text_to_sql: AsyncSQL + translate: AsyncTranslate + validate: AsyncValidate + vision: AsyncVision + web: AsyncWeb def __init__( self, @@ -176,6 +196,7 @@ def __init__( self.api_key = api_key self.api_url = api_url + disable_request_logging = self.headers.get("x-jigsaw-no-request-log") self.web = AsyncWeb( api_key=api_key, @@ -217,6 +238,7 @@ def __init__( api_url=api_url + "/v1", disable_request_logging=disable_request_logging, ).predict + self.text_to_sql = AsyncSQL( api_key=api_key, api_url=api_url + "/v1", @@ -241,7 +263,7 @@ def __init__( disable_request_logging=disable_request_logging, ).execute - self.embeddingV2 = AsyncEmbeddingV2( + self.embedding_v2 = AsyncEmbeddingV2( api_key=api_key, api_url=api_url + "/v2", disable_request_logging=disable_request_logging, From 8fa4631332bfd9edc979ba32cf98ed380387b32b Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 17:11:13 -0700 Subject: [PATCH 72/95] chore: drop redundant disable logging flag. --- jigsawstack/__init__.py | 228 +++++++++-------------------------- jigsawstack/async_request.py | 1 - tests/test_embedding.py | 8 +- 3 files changed, 59 insertions(+), 178 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 5af7ca4..537d6a1 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -62,97 +62,37 @@ def __init__( self.api_key = api_key self.api_url = api_url - self.headers = headers or {} - - disable_request_logging = self.headers.get("x-jigsaw-no-request-log") - - self.audio = Audio( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.web = Web( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.sentiment = Sentiment( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).analyze - - self.validate = Validate( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - self.summary = Summary( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).summarize - - self.vision = Vision( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.prediction = Prediction( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).predict - - self.text_to_sql = SQL( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).text_to_sql - - self.store = Store( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - self.translate = Translate( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.embedding = Embedding( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).execute - - self.embedding_v2 = EmbeddingV2( - api_key=api_key, - api_url=api_url + "/v2", - disable_request_logging=disable_request_logging, - ).execute + self.headers = headers or {"Content-Type": "application/json"} + + self.audio = Audio(api_key=api_key, api_url=api_url + "/v1") + + self.web = Web(api_key=api_key, api_url=api_url + "/v1") + + self.sentiment = Sentiment(api_key=api_key, api_url=api_url + "/v1").analyze + + self.validate = Validate(api_key=api_key, api_url=api_url + "/v1") + self.summary = Summary(api_key=api_key, api_url=api_url + "/v1").summarize + + self.vision = Vision(api_key=api_key, api_url=api_url + "/v1") + + self.prediction = Prediction(api_key=api_key, api_url=api_url + "/v1").predict + + self.text_to_sql = SQL(api_key=api_key, api_url=api_url + "/v1").text_to_sql + + self.store = Store(api_key=api_key, api_url=api_url + "/v1") + self.translate = Translate(api_key=api_key, api_url=api_url + "/v1") + + self.embedding = Embedding(api_key=api_key, api_url=api_url + "/v1").execute + + self.embedding_v2 = EmbeddingV2(api_key=api_key, api_url=api_url + "/v2").execute self.image_generation = ImageGeneration( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, + api_key=api_key, api_url=api_url + "/v1" ).image_generation - self.classification = Classification( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).classify + self.classification = Classification(api_key=api_key, api_url=api_url + "/v1").classify - self.prompt_engine = PromptEngine( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) + self.prompt_engine = PromptEngine(api_key=api_key, api_url=api_url + "/v1") class AsyncJigsawStack: @@ -179,7 +119,7 @@ def __init__( self, api_key: Union[str, None] = None, api_url: Union[str, None] = None, - disable_request_logging: Union[bool, None] = None, + headers: Union[Dict[str, str], None] = None, ) -> None: if api_key is None: api_key = os.environ.get("JIGSAWSTACK_API_KEY") @@ -196,96 +136,38 @@ def __init__( self.api_key = api_key self.api_url = api_url - disable_request_logging = self.headers.get("x-jigsaw-no-request-log") - - self.web = AsyncWeb( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.validate = AsyncValidate( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - self.audio = AsyncAudio( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.vision = AsyncVision( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.store = AsyncStore( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.summary = AsyncSummary( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).summarize - - self.prediction = AsyncPrediction( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).predict - - self.text_to_sql = AsyncSQL( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).text_to_sql - - self.sentiment = AsyncSentiment( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).analyze - - self.translate = AsyncTranslate( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) - - self.embedding = AsyncEmbedding( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).execute - - self.embedding_v2 = AsyncEmbeddingV2( - api_key=api_key, - api_url=api_url + "/v2", - disable_request_logging=disable_request_logging, - ).execute + self.headers = headers or {"Content-Type": "application/json"} + + self.web = AsyncWeb(api_key=api_key, api_url=api_url + "/v1") + + self.validate = AsyncValidate(api_key=api_key, api_url=api_url + "/v1") + self.audio = AsyncAudio(api_key=api_key, api_url=api_url + "/v1") + + self.vision = AsyncVision(api_key=api_key, api_url=api_url + "/v1") + + self.store = AsyncStore(api_key=api_key, api_url=api_url + "/v1") + + self.summary = AsyncSummary(api_key=api_key, api_url=api_url + "/v1").summarize + + self.prediction = AsyncPrediction(api_key=api_key, api_url=api_url + "/v1").predict + + self.text_to_sql = AsyncSQL(api_key=api_key, api_url=api_url + "/v1").text_to_sql + + self.sentiment = AsyncSentiment(api_key=api_key, api_url=api_url + "/v1").analyze + + self.translate = AsyncTranslate(api_key=api_key, api_url=api_url + "/v1") + + self.embedding = AsyncEmbedding(api_key=api_key, api_url=api_url + "/v1").execute + + self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, api_url=api_url + "/v2").execute self.image_generation = AsyncImageGeneration( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, + api_key=api_key, api_url=api_url + "/v1" ).image_generation - self.classification = AsyncClassification( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ).classify - - self.prompt_engine = AsyncPromptEngine( - api_key=api_key, - api_url=api_url + "/v1", - disable_request_logging=disable_request_logging, - ) + self.classification = AsyncClassification(api_key=api_key, api_url=api_url + "/v1").classify + + self.prompt_engine = AsyncPromptEngine(api_key=api_key, api_url=api_url + "/v1") # Create a global instance of the Web class diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index a462ae6..0d44929 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -37,7 +37,6 @@ def __init__( self.api_key = config.get("api_key") self.data = data self.headers = headers or {"Content-Type": "application/json"} - self.disable_request_logging = config.get("disable_request_logging") self.stream = stream self.files = files # Store files for multipart requests diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 7b6b368..c2bc59d 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -246,7 +246,7 @@ class TestEmbeddingV2Sync: def test_embedding_v2(self, test_case): """Test synchronous embedding v2 with various inputs""" try: - result = jigsaw.embeddingV2(test_case["params"]) + result = jigsaw.embedding_v2(test_case["params"]) assert result["success"] assert "embeddings" in result assert isinstance(result["embeddings"], list) @@ -271,7 +271,7 @@ def test_embedding_v2_blob(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = jigsaw.embeddingV2(blob_content, test_case["options"]) + result = jigsaw.embedding_v2(blob_content, test_case["options"]) assert result["success"] assert "embeddings" in result assert isinstance(result["embeddings"], list) @@ -291,7 +291,7 @@ class TestEmbeddingV2Async: async def test_embedding_v2_async(self, test_case): """Test asynchronous embedding v2 with various inputs""" try: - result = await async_jigsaw.embeddingV2(test_case["params"]) + result = await async_jigsaw.embedding_v2(test_case["params"]) assert result["success"] assert "embeddings" in result assert isinstance(result["embeddings"], list) @@ -317,7 +317,7 @@ async def test_embedding_v2_blob_async(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.embeddingV2(blob_content, test_case["options"]) + result = await async_jigsaw.embedding_v2(blob_content, test_case["options"]) assert result["success"] assert "embeddings" in result assert isinstance(result["embeddings"], list) From 0c3aa6052792a32f33a12d563f3090d826854653 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 17:33:37 -0700 Subject: [PATCH 73/95] fix: pass user defined headers as config. --- jigsawstack/async_request.py | 5 +--- jigsawstack/audio.py | 16 +++++------ jigsawstack/classification.py | 12 ++++----- jigsawstack/embedding.py | 12 ++++----- jigsawstack/embedding_v2.py | 12 ++++----- jigsawstack/image_generation.py | 12 ++++----- jigsawstack/prediction.py | 12 ++++----- jigsawstack/prompt_engine.py | 6 ++--- jigsawstack/request.py | 6 +---- jigsawstack/search.py | 12 ++++----- jigsawstack/sentiment.py | 12 ++++----- jigsawstack/sql.py | 12 ++++----- jigsawstack/store.py | 14 +++++----- jigsawstack/summary.py | 12 ++++----- jigsawstack/translate.py | 12 ++++----- jigsawstack/validate.py | 12 ++++----- jigsawstack/vision.py | 15 ++++++----- jigsawstack/web.py | 48 +++++++++------------------------ 18 files changed, 105 insertions(+), 137 deletions(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 0d44929..028e107 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -15,7 +15,7 @@ class AsyncRequestConfig(TypedDict): api_url: str api_key: str - disable_request_logging: Union[bool, None] = False + headers: Union[Dict[str, str], None] class AsyncRequest(Generic[T]): @@ -180,9 +180,6 @@ def __get_headers(self) -> Dict[str, str]: if not self.files and not self.data: h["Content-Type"] = "application/json" - if self.disable_request_logging: - h["x-jigsaw-no-request-log"] = "true" - _headers = h.copy() # don't override Content-Type if using multipart diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 7dab251..589f4d2 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -55,14 +55,10 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) - self.config = RequestConfig( - api_url=api_url, - api_key=api_key, - disable_request_logging=disable_request_logging, - ) + super().__init__(api_key, api_url, headers) + self.config = RequestConfig(api_url=api_url, api_key=api_key, headers=headers) @overload def speech_to_text( @@ -108,13 +104,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload diff --git a/jigsawstack/classification.py b/jigsawstack/classification.py index 45407e9..1d9770d 100644 --- a/jigsawstack/classification.py +++ b/jigsawstack/classification.py @@ -68,13 +68,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def classify(self, params: ClassificationParams) -> ClassificationResponse: @@ -95,13 +95,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def classify(self, params: ClassificationParams) -> ClassificationResponse: diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 9611537..71453ec 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -34,13 +34,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload @@ -82,13 +82,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 0192f52..8148df1 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -32,13 +32,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload @@ -80,13 +80,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index 9584cf3..40d0e81 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -90,13 +90,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def image_generation( @@ -119,13 +119,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging=disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def image_generation( diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index ec571a4..eeee9a6 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -49,13 +49,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def predict(self, params: PredictionParams) -> PredictionResponse: @@ -76,13 +76,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def predict(self, params: PredictionParams) -> PredictionResponse: diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 3af7fa3..1c2420b 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -98,13 +98,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse: diff --git a/jigsawstack/request.py b/jigsawstack/request.py index fddd5c3..26fc710 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -14,7 +14,7 @@ class RequestConfig(TypedDict): api_url: str api_key: str - disable_request_logging: Union[bool, None] = False + headers: Union[Dict[str, str], None] # This class wraps the HTTP request creation logic @@ -37,7 +37,6 @@ def __init__( self.api_key = config.get("api_key") self.data = data self.headers = headers or {"Content-Type": "application/json"} - self.disable_request_logging = config.get("disable_request_logging") self.stream = stream self.files = files @@ -162,9 +161,6 @@ def __get_headers(self) -> Dict[Any, Any]: if not self.files and not self.data: h["Content-Type"] = "application/json" - if self.disable_request_logging: - h["x-jigsaw-no-request-log"] = "true" - _headers = h.copy() # Don't override Content-Type if using multipart diff --git a/jigsawstack/search.py b/jigsawstack/search.py index 21b0187..c1fe804 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -226,13 +226,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def search(self, params: SearchParams) -> SearchResponse: @@ -288,13 +288,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def search(self, params: SearchParams) -> SearchResponse: diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index ef5e9df..db1031a 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -49,13 +49,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def analyze(self, params: SentimentParams) -> SentimentResponse: @@ -76,13 +76,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def analyze(self, params: SentimentParams) -> SentimentResponse: diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index b895485..c74f2a7 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -44,13 +44,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def text_to_sql(self, params: SQLParams) -> SQLResponse: @@ -71,13 +71,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def text_to_sql(self, params: SQLParams) -> SQLResponse: diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 0693f49..4dd6918 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -1,4 +1,4 @@ -from typing import Any, Union +from typing import Any, Dict, Union from typing_extensions import NotRequired, TypedDict @@ -33,13 +33,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def upload( @@ -91,13 +91,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def upload( diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 0d19b39..8bbc0c3 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -54,13 +54,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def summarize(self, params: SummaryParams) -> SummaryResponse: @@ -81,13 +81,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def summarize(self, params: SummaryParams) -> SummaryResponse: diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index 601c2a1..ebffcfb 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -64,13 +64,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def text(self, params: TranslateParams) -> TranslateResponse: @@ -125,13 +125,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def text(self, params: TranslateParams) -> TranslateResponse: diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 774aef4..37614b2 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -80,13 +80,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload @@ -166,13 +166,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 793841a..e44bb57 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -172,13 +172,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload @@ -254,13 +254,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) @overload @@ -277,6 +277,7 @@ async def vocr( options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( + headers=self.headers, config=self.config, path=path, params=cast(Dict[Any, Any], blob), @@ -286,6 +287,7 @@ async def vocr( files = {"file": blob} resp = await AsyncRequest( + headers=self.headers, config=self.config, path=path, params=options, @@ -312,6 +314,7 @@ async def object_detection( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = await AsyncRequest( + headers=self.headers, config=self.config, path=path, params=cast(Dict[Any, Any], blob), diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 5d400c3..732e9c6 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -199,13 +199,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = RequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse: @@ -248,27 +248,15 @@ def html_to_any( return cast(HTMLToAnyURLResponse, resp) def search(self, params: SearchParams) -> SearchResponse: - s = Search( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = Search(self.api_key, self.api_url, self.headers) return s.search(params) def search_suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: - s = Search( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = Search(self.api_key, self.api_url, self.headers) return s.suggestions(params) def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - s = Search( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = Search(self.api_key, self.api_url, self.headers) return s.deep_research(params) @@ -282,13 +270,13 @@ def __init__( self, api_key: str, api_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, api_url, headers) self.config = AsyncRequestConfig( api_url=api_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse: @@ -331,27 +319,15 @@ async def html_to_any( return cast(HTMLToAnyURLResponse, resp) async def search(self, params: SearchParams) -> SearchResponse: - s = AsyncSearch( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = AsyncSearch(self.api_key, self.api_url, self.headers) return await s.search(params) async def search_suggestions( self, params: SearchSuggestionsParams ) -> SearchSuggestionsResponse: - s = AsyncSearch( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = AsyncSearch(self.api_key, self.api_url, self.headers) return await s.suggestions(params) async def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - s = AsyncSearch( - self.api_key, - self.api_url, - disable_request_logging=self.config.get("disable_request_logging"), - ) + s = AsyncSearch(self.api_key, self.api_url, self.headers) return await s.deep_research(params) From 2d7e43a986b0141e041cec632832cef51b686c25 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 19:02:00 -0700 Subject: [PATCH 74/95] fix: pass logging param withing header, rename api_url as base_url and other fixes. --- jigsawstack/__init__.py | 90 +++++++++++++++++---------------- jigsawstack/_config.py | 12 ++--- jigsawstack/async_request.py | 13 +++-- jigsawstack/audio.py | 12 ++--- jigsawstack/classification.py | 12 ++--- jigsawstack/embedding.py | 12 ++--- jigsawstack/embedding_v2.py | 12 ++--- jigsawstack/image_generation.py | 12 ++--- jigsawstack/prediction.py | 12 ++--- jigsawstack/prompt_engine.py | 12 ++--- jigsawstack/request.py | 13 +++-- jigsawstack/search.py | 12 ++--- jigsawstack/sentiment.py | 12 ++--- jigsawstack/sql.py | 12 ++--- jigsawstack/store.py | 34 ++++++++----- jigsawstack/summary.py | 12 ++--- jigsawstack/translate.py | 12 ++--- jigsawstack/validate.py | 12 ++--- jigsawstack/vision.py | 15 +++--- jigsawstack/web.py | 24 ++++----- 20 files changed, 180 insertions(+), 177 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 537d6a1..6858a64 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -22,7 +22,7 @@ class JigsawStack: api_key: str - api_url: str + base_url: str headers: Dict[str, str] audio: Audio classification: Classification @@ -43,7 +43,7 @@ class JigsawStack: def __init__( self, api_key: Union[str, None] = None, - api_url: Union[str, None] = None, + base_url: Union[str, None] = None, headers: Union[Dict[str, str], None] = None, ) -> None: if api_key is None: @@ -54,50 +54,50 @@ def __init__( "The api_key client option must be set either by passing api_key to the client or by setting the JIGSAWSTACK_API_KEY environment variable" ) - if api_url is None: - api_url = os.environ.get("JIGSAWSTACK_API_URL") - if api_url is None: - api_url = "https://api.jigsawstack.com/" + if base_url is None: + base_url = os.environ.get("JIGSAWSTACK_base_url") + if base_url is None: + base_url = "https://api.jigsawstack.com/" self.api_key = api_key - self.api_url = api_url + self.base_url = base_url self.headers = headers or {"Content-Type": "application/json"} - self.audio = Audio(api_key=api_key, api_url=api_url + "/v1") + self.audio = Audio(api_key=api_key, base_url=base_url + "/v1") - self.web = Web(api_key=api_key, api_url=api_url + "/v1") + self.web = Web(api_key=api_key, base_url=base_url + "/v1") - self.sentiment = Sentiment(api_key=api_key, api_url=api_url + "/v1").analyze + self.sentiment = Sentiment(api_key=api_key, base_url=base_url + "/v1").analyze - self.validate = Validate(api_key=api_key, api_url=api_url + "/v1") - self.summary = Summary(api_key=api_key, api_url=api_url + "/v1").summarize + self.validate = Validate(api_key=api_key, base_url=base_url + "/v1") + self.summary = Summary(api_key=api_key, base_url=base_url + "/v1").summarize - self.vision = Vision(api_key=api_key, api_url=api_url + "/v1") + self.vision = Vision(api_key=api_key, base_url=base_url + "/v1") - self.prediction = Prediction(api_key=api_key, api_url=api_url + "/v1").predict + self.prediction = Prediction(api_key=api_key, base_url=base_url + "/v1").predict - self.text_to_sql = SQL(api_key=api_key, api_url=api_url + "/v1").text_to_sql + self.text_to_sql = SQL(api_key=api_key, base_url=base_url + "/v1").text_to_sql - self.store = Store(api_key=api_key, api_url=api_url + "/v1") - self.translate = Translate(api_key=api_key, api_url=api_url + "/v1") + self.store = Store(api_key=api_key, base_url=base_url + "/v1") + self.translate = Translate(api_key=api_key, base_url=base_url + "/v1") - self.embedding = Embedding(api_key=api_key, api_url=api_url + "/v1").execute + self.embedding = Embedding(api_key=api_key, base_url=base_url + "/v1").execute - self.embedding_v2 = EmbeddingV2(api_key=api_key, api_url=api_url + "/v2").execute + self.embedding_v2 = EmbeddingV2(api_key=api_key, base_url=base_url + "/v2").execute self.image_generation = ImageGeneration( - api_key=api_key, api_url=api_url + "/v1" + api_key=api_key, base_url=base_url + "/v1" ).image_generation - self.classification = Classification(api_key=api_key, api_url=api_url + "/v1").classify + self.classification = Classification(api_key=api_key, base_url=base_url + "/v1").classify - self.prompt_engine = PromptEngine(api_key=api_key, api_url=api_url + "/v1") + self.prompt_engine = PromptEngine(api_key=api_key, base_url=base_url + "/v1") class AsyncJigsawStack: api_key: str - api_url: str + base_url: str headers: Dict[str, str] audio: AsyncAudio classification: AsyncClassification @@ -118,7 +118,7 @@ class AsyncJigsawStack: def __init__( self, api_key: Union[str, None] = None, - api_url: Union[str, None] = None, + base_url: Union[str, None] = None, headers: Union[Dict[str, str], None] = None, ) -> None: if api_key is None: @@ -129,45 +129,47 @@ def __init__( "The api_key client option must be set either by passing api_key to the client or by setting the JIGSAWSTACK_API_KEY environment variable" ) - if api_url is None: - api_url = os.environ.get("JIGSAWSTACK_API_URL") - if api_url is None: - api_url = "https://api.jigsawstack.com/" + if base_url is None: + base_url = os.environ.get("JIGSAWSTACK_base_url") + if base_url is None: + base_url = "https://api.jigsawstack.com/" self.api_key = api_key - self.api_url = api_url + self.base_url = base_url self.headers = headers or {"Content-Type": "application/json"} - self.web = AsyncWeb(api_key=api_key, api_url=api_url + "/v1") + self.web = AsyncWeb(api_key=api_key, base_url=base_url + "/v1") - self.validate = AsyncValidate(api_key=api_key, api_url=api_url + "/v1") - self.audio = AsyncAudio(api_key=api_key, api_url=api_url + "/v1") + self.validate = AsyncValidate(api_key=api_key, base_url=base_url + "/v1") + self.audio = AsyncAudio(api_key=api_key, base_url=base_url + "/v1") - self.vision = AsyncVision(api_key=api_key, api_url=api_url + "/v1") + self.vision = AsyncVision(api_key=api_key, base_url=base_url + "/v1") - self.store = AsyncStore(api_key=api_key, api_url=api_url + "/v1") + self.store = AsyncStore(api_key=api_key, base_url=base_url + "/v1") - self.summary = AsyncSummary(api_key=api_key, api_url=api_url + "/v1").summarize + self.summary = AsyncSummary(api_key=api_key, base_url=base_url + "/v1").summarize - self.prediction = AsyncPrediction(api_key=api_key, api_url=api_url + "/v1").predict + self.prediction = AsyncPrediction(api_key=api_key, base_url=base_url + "/v1").predict - self.text_to_sql = AsyncSQL(api_key=api_key, api_url=api_url + "/v1").text_to_sql + self.text_to_sql = AsyncSQL(api_key=api_key, base_url=base_url + "/v1").text_to_sql - self.sentiment = AsyncSentiment(api_key=api_key, api_url=api_url + "/v1").analyze + self.sentiment = AsyncSentiment(api_key=api_key, base_url=base_url + "/v1").analyze - self.translate = AsyncTranslate(api_key=api_key, api_url=api_url + "/v1") + self.translate = AsyncTranslate(api_key=api_key, base_url=base_url + "/v1") - self.embedding = AsyncEmbedding(api_key=api_key, api_url=api_url + "/v1").execute + self.embedding = AsyncEmbedding(api_key=api_key, base_url=base_url + "/v1").execute - self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, api_url=api_url + "/v2").execute + self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, base_url=base_url + "/v2").execute self.image_generation = AsyncImageGeneration( - api_key=api_key, api_url=api_url + "/v1" + api_key=api_key, base_url=base_url + "/v1" ).image_generation - self.classification = AsyncClassification(api_key=api_key, api_url=api_url + "/v1").classify + self.classification = AsyncClassification( + api_key=api_key, base_url=base_url + "/v1" + ).classify - self.prompt_engine = AsyncPromptEngine(api_key=api_key, api_url=api_url + "/v1") + self.prompt_engine = AsyncPromptEngine(api_key=api_key, base_url=base_url + "/v1") # Create a global instance of the Web class diff --git a/jigsawstack/_config.py b/jigsawstack/_config.py index 6e15b54..3a007d8 100644 --- a/jigsawstack/_config.py +++ b/jigsawstack/_config.py @@ -1,17 +1,17 @@ -from typing import Union +from typing import Dict, Union class ClientConfig: base_url: str api_key: str - disable_request_logging: Union[bool, None] = None + headers: Union[Dict[str, str], None] def __init__( self, api_key: str, - api_url: str, - disable_request_logging: Union[bool, None] = None, + base_url: str, + headers: Union[Dict[str, str], None] = None, ): self.api_key = api_key - self.api_url = api_url - self.disable_request_logging = disable_request_logging + self.base_url = base_url + self.headers = headers diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index 028e107..d8f530d 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -13,7 +13,7 @@ class AsyncRequestConfig(TypedDict): - api_url: str + base_url: str api_key: str headers: Union[Dict[str, str], None] @@ -25,7 +25,6 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, files: Union[Dict[str, Any], None] = None, # Add files parameter @@ -33,10 +32,10 @@ def __init__( self.path = path self.params = params self.verb = verb - self.api_url = config.get("api_url") + self.base_url = config.get("base_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers or {"Content-Type": "application/json"} + self.headers = config.get("headers", None) or {"Content-Type": "application/json"} self.stream = stream self.files = files # Store files for multipart requests @@ -68,7 +67,7 @@ async def perform(self) -> Union[T, None]: Async method to make an HTTP request to the JigsawStack API. """ async with self.__get_session() as session: - resp = await self.make_request(session, url=f"{self.api_url}{self.path}") + resp = await self.make_request(session, url=f"{self.base_url}{self.path}") # For binary responses if resp.status == 200: @@ -109,7 +108,7 @@ async def perform(self) -> Union[T, None]: async def perform_file(self) -> Union[T, None]: async with self.__get_session() as session: - resp = await self.make_request(session, url=f"{self.api_url}{self.path}") + resp = await self.make_request(session, url=f"{self.base_url}{self.path}") if resp.status != 200: try: @@ -198,7 +197,7 @@ async def perform_streaming(self) -> AsyncGenerator[Union[T, str], None]: AsyncGenerator[Union[T, str], None]: A generator of response chunks """ async with self.__get_session() as session: - resp = await self.make_request(session, url=f"{self.api_url}{self.path}") + resp = await self.make_request(session, url=f"{self.base_url}{self.path}") # delete calls do not return a body if await resp.text() == "": diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 589f4d2..575b839 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -54,11 +54,11 @@ class Audio(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) - self.config = RequestConfig(api_url=api_url, api_key=api_key, headers=headers) + super().__init__(api_key, base_url, headers) + self.config = RequestConfig(base_url=base_url, api_key=api_key, headers=headers) @overload def speech_to_text( @@ -103,12 +103,12 @@ class AsyncAudio(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/classification.py b/jigsawstack/classification.py index 1d9770d..134307c 100644 --- a/jigsawstack/classification.py +++ b/jigsawstack/classification.py @@ -67,12 +67,12 @@ class Classification(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -94,12 +94,12 @@ class AsyncClassification(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/embedding.py b/jigsawstack/embedding.py index 71453ec..d914c4c 100644 --- a/jigsawstack/embedding.py +++ b/jigsawstack/embedding.py @@ -33,12 +33,12 @@ class Embedding(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -81,12 +81,12 @@ class AsyncEmbedding(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/embedding_v2.py b/jigsawstack/embedding_v2.py index 8148df1..685cd52 100644 --- a/jigsawstack/embedding_v2.py +++ b/jigsawstack/embedding_v2.py @@ -31,12 +31,12 @@ class EmbeddingV2(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -79,12 +79,12 @@ class AsyncEmbeddingV2(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index 40d0e81..08cf81c 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -89,12 +89,12 @@ class ImageGeneration(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -118,12 +118,12 @@ class AsyncImageGeneration(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/prediction.py b/jigsawstack/prediction.py index eeee9a6..00bd3cf 100644 --- a/jigsawstack/prediction.py +++ b/jigsawstack/prediction.py @@ -48,12 +48,12 @@ class Prediction(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -75,12 +75,12 @@ class AsyncPrediction(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 1c2420b..59932b6 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -97,12 +97,12 @@ class PromptEngine(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -203,12 +203,12 @@ class AsyncPromptEngine(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, disable_request_logging: Union[bool, None] = False, ): - super().__init__(api_key, api_url, disable_request_logging) + super().__init__(api_key, base_url, disable_request_logging) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, disable_request_logging=disable_request_logging, ) diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 26fc710..38cbf01 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -12,7 +12,7 @@ class RequestConfig(TypedDict): - api_url: str + base_url: str api_key: str headers: Union[Dict[str, str], None] @@ -25,7 +25,6 @@ def __init__( path: str, params: Union[Dict[Any, Any], List[Dict[Any, Any]]], verb: RequestVerb, - headers: Dict[str, str] = None, data: Union[bytes, None] = None, stream: Union[bool, None] = False, files: Union[Dict[str, Any], None] = None, @@ -33,10 +32,10 @@ def __init__( self.path = path self.params = params self.verb = verb - self.api_url = config.get("api_url") + self.base_url = config.get("base_url") self.api_key = config.get("api_key") self.data = data - self.headers = headers or {"Content-Type": "application/json"} + self.headers = config.get("headers", None) or {"Content-Type": "application/json"} self.stream = stream self.files = files @@ -51,7 +50,7 @@ def perform(self) -> Union[T, None]: Raises: requests.HTTPError: If the request fails """ - resp = self.make_request(url=f"{self.api_url}{self.path}") + resp = self.make_request(url=f"{self.base_url}{self.path}") # for binary responses if resp.status_code == 200: @@ -84,7 +83,7 @@ def perform(self) -> Union[T, None]: return cast(T, resp) def perform_file(self) -> Union[T, None]: - resp = self.make_request(url=f"{self.api_url}{self.path}") + resp = self.make_request(url=f"{self.base_url}{self.path}") # delete calls do not return a body if resp.text == "" and resp.status_code == 200: @@ -182,7 +181,7 @@ def perform_streaming(self) -> Generator[Union[T, str], None, None]: Raises: requests.HTTPError: If the request fails """ - resp = self.make_request(url=f"{self.api_url}{self.path}") + resp = self.make_request(url=f"{self.base_url}{self.path}") # delete calls do not return a body if resp.text == "": diff --git a/jigsawstack/search.py b/jigsawstack/search.py index c1fe804..7898f8b 100644 --- a/jigsawstack/search.py +++ b/jigsawstack/search.py @@ -225,12 +225,12 @@ class Search(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -287,12 +287,12 @@ class AsyncSearch(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/sentiment.py b/jigsawstack/sentiment.py index db1031a..7bc1acb 100644 --- a/jigsawstack/sentiment.py +++ b/jigsawstack/sentiment.py @@ -48,12 +48,12 @@ class Sentiment(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -75,12 +75,12 @@ class AsyncSentiment(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/sql.py b/jigsawstack/sql.py index c74f2a7..4fa0ac8 100644 --- a/jigsawstack/sql.py +++ b/jigsawstack/sql.py @@ -43,12 +43,12 @@ class SQL(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -70,12 +70,12 @@ class AsyncSQL(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/store.py b/jigsawstack/store.py index 4dd6918..89facea 100644 --- a/jigsawstack/store.py +++ b/jigsawstack/store.py @@ -32,12 +32,12 @@ class Store(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -51,14 +51,16 @@ def upload( path = build_path(base_path="/store/file", params=options) content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} + config_with_headers = self.config.copy() + if config_with_headers.get("headers") is None: + config_with_headers["headers"] = {} + config_with_headers["headers"]["Content-Type"] = content_type resp = Request( - config=self.config, - params=options, # Empty params since we're using them in the URL + config=config_with_headers, + params={}, path=path, data=file, - headers=_headers, verb="post", ).perform_with_content() return resp @@ -90,12 +92,12 @@ class AsyncStore(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -108,13 +110,17 @@ async def upload( path = build_path(base_path="/store/file", params=options) content_type = options.get("content_type", "application/octet-stream") - _headers = {"Content-Type": content_type} + + config_with_headers = self.config.copy() + if config_with_headers.get("headers") is None: + config_with_headers["headers"] = {} + config_with_headers["headers"]["Content-Type"] = content_type + resp = await AsyncRequest( - config=self.config, - params=options, # Empty params since we're using them in the URL + config=config_with_headers, + params={}, path=path, data=file, - headers=_headers, verb="post", ).perform_with_content() return resp diff --git a/jigsawstack/summary.py b/jigsawstack/summary.py index 8bbc0c3..48fe578 100644 --- a/jigsawstack/summary.py +++ b/jigsawstack/summary.py @@ -53,12 +53,12 @@ class Summary(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -80,12 +80,12 @@ class AsyncSummary(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/translate.py b/jigsawstack/translate.py index ebffcfb..b609540 100644 --- a/jigsawstack/translate.py +++ b/jigsawstack/translate.py @@ -63,12 +63,12 @@ class Translate(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -124,12 +124,12 @@ class AsyncTranslate(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/validate.py b/jigsawstack/validate.py index 37614b2..d40cf55 100644 --- a/jigsawstack/validate.py +++ b/jigsawstack/validate.py @@ -79,12 +79,12 @@ class Validate(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -165,12 +165,12 @@ class AsyncValidate(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index e44bb57..280b71d 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -171,12 +171,12 @@ class Vision(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -253,12 +253,12 @@ class AsyncVision(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -277,7 +277,6 @@ async def vocr( options = options or {} if isinstance(blob, dict): resp = await AsyncRequest( - headers=self.headers, config=self.config, path=path, params=cast(Dict[Any, Any], blob), @@ -287,7 +286,6 @@ async def vocr( files = {"file": blob} resp = await AsyncRequest( - headers=self.headers, config=self.config, path=path, params=options, @@ -314,7 +312,6 @@ async def object_detection( blob, dict ): # If params is provided as a dict, we assume it's the first argument resp = await AsyncRequest( - headers=self.headers, config=self.config, path=path, params=cast(Dict[Any, Any], blob), diff --git a/jigsawstack/web.py b/jigsawstack/web.py index 732e9c6..d432c25 100644 --- a/jigsawstack/web.py +++ b/jigsawstack/web.py @@ -198,12 +198,12 @@ class Web(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -248,15 +248,15 @@ def html_to_any( return cast(HTMLToAnyURLResponse, resp) def search(self, params: SearchParams) -> SearchResponse: - s = Search(self.api_key, self.api_url, self.headers) + s = Search(self.api_key, self.base_url, self.headers) return s.search(params) def search_suggestions(self, params: SearchSuggestionsParams) -> SearchSuggestionsResponse: - s = Search(self.api_key, self.api_url, self.headers) + s = Search(self.api_key, self.base_url, self.headers) return s.suggestions(params) def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - s = Search(self.api_key, self.api_url, self.headers) + s = Search(self.api_key, self.base_url, self.headers) return s.deep_research(params) @@ -269,12 +269,12 @@ class AsyncWeb(ClientConfig): def __init__( self, api_key: str, - api_url: str, + base_url: str, headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, api_url, headers) + super().__init__(api_key, base_url, headers) self.config = AsyncRequestConfig( - api_url=api_url, + base_url=base_url, api_key=api_key, headers=headers, ) @@ -319,15 +319,15 @@ async def html_to_any( return cast(HTMLToAnyURLResponse, resp) async def search(self, params: SearchParams) -> SearchResponse: - s = AsyncSearch(self.api_key, self.api_url, self.headers) + s = AsyncSearch(self.api_key, self.base_url, self.headers) return await s.search(params) async def search_suggestions( self, params: SearchSuggestionsParams ) -> SearchSuggestionsResponse: - s = AsyncSearch(self.api_key, self.api_url, self.headers) + s = AsyncSearch(self.api_key, self.base_url, self.headers) return await s.suggestions(params) async def deep_research(self, params: DeepResearchParams) -> DeepResearchResponse: - s = AsyncSearch(self.api_key, self.api_url, self.headers) + s = AsyncSearch(self.api_key, self.base_url, self.headers) return await s.deep_research(params) From 160c8598c5c2c671a59c5dcc85f14683a0a5dc83 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 20:35:31 -0700 Subject: [PATCH 75/95] fix: env variable to still be API_URL --- jigsawstack/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 6858a64..590e08e 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -55,7 +55,7 @@ def __init__( ) if base_url is None: - base_url = os.environ.get("JIGSAWSTACK_base_url") + base_url = os.environ.get("JIGSAWSTACK_API_URL") if base_url is None: base_url = "https://api.jigsawstack.com/" @@ -80,6 +80,7 @@ def __init__( self.text_to_sql = SQL(api_key=api_key, base_url=base_url + "/v1").text_to_sql self.store = Store(api_key=api_key, base_url=base_url + "/v1") + self.translate = Translate(api_key=api_key, base_url=base_url + "/v1") self.embedding = Embedding(api_key=api_key, base_url=base_url + "/v1").execute @@ -130,7 +131,7 @@ def __init__( ) if base_url is None: - base_url = os.environ.get("JIGSAWSTACK_base_url") + base_url = os.environ.get("JIGSAWSTACK_API_URL") if base_url is None: base_url = "https://api.jigsawstack.com/" @@ -141,6 +142,7 @@ def __init__( self.web = AsyncWeb(api_key=api_key, base_url=base_url + "/v1") self.validate = AsyncValidate(api_key=api_key, base_url=base_url + "/v1") + self.audio = AsyncAudio(api_key=api_key, base_url=base_url + "/v1") self.vision = AsyncVision(api_key=api_key, base_url=base_url + "/v1") From d81794de9e63b453643ab954cfac799e77d03f04 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 20:36:17 -0700 Subject: [PATCH 76/95] feat: formatting with ruff --- jigsawstack/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 590e08e..98ecc26 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -142,7 +142,7 @@ def __init__( self.web = AsyncWeb(api_key=api_key, base_url=base_url + "/v1") self.validate = AsyncValidate(api_key=api_key, base_url=base_url + "/v1") - + self.audio = AsyncAudio(api_key=api_key, base_url=base_url + "/v1") self.vision = AsyncVision(api_key=api_key, base_url=base_url + "/v1") From ad9ab563e91719203800422db42721640d731105 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 20:46:47 -0700 Subject: [PATCH 77/95] feat: pass headers to endpoint. --- jigsawstack/__init__.py | 58 ++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 98ecc26..1613098 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -64,36 +64,36 @@ def __init__( self.headers = headers or {"Content-Type": "application/json"} - self.audio = Audio(api_key=api_key, base_url=base_url + "/v1") + self.audio = Audio(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.web = Web(api_key=api_key, base_url=base_url + "/v1") + self.web = Web(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.sentiment = Sentiment(api_key=api_key, base_url=base_url + "/v1").analyze + self.sentiment = Sentiment(api_key=api_key, base_url=base_url + "/v1", headers=headers).analyze - self.validate = Validate(api_key=api_key, base_url=base_url + "/v1") - self.summary = Summary(api_key=api_key, base_url=base_url + "/v1").summarize + self.validate = Validate(api_key=api_key, base_url=base_url + "/v1", headers=headers) + self.summary = Summary(api_key=api_key, base_url=base_url + "/v1", headers=headers).summarize - self.vision = Vision(api_key=api_key, base_url=base_url + "/v1") + self.vision = Vision(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.prediction = Prediction(api_key=api_key, base_url=base_url + "/v1").predict + self.prediction = Prediction(api_key=api_key, base_url=base_url + "/v1", headers=headers).predict - self.text_to_sql = SQL(api_key=api_key, base_url=base_url + "/v1").text_to_sql + self.text_to_sql = SQL(api_key=api_key, base_url=base_url + "/v1", headers=headers).text_to_sql - self.store = Store(api_key=api_key, base_url=base_url + "/v1") + self.store = Store(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.translate = Translate(api_key=api_key, base_url=base_url + "/v1") + self.translate = Translate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.embedding = Embedding(api_key=api_key, base_url=base_url + "/v1").execute + self.embedding = Embedding(api_key=api_key, base_url=base_url + "/v1", headers=headers).execute - self.embedding_v2 = EmbeddingV2(api_key=api_key, base_url=base_url + "/v2").execute + self.embedding_v2 = EmbeddingV2(api_key=api_key, base_url=base_url + "/v2", headers=headers).execute self.image_generation = ImageGeneration( - api_key=api_key, base_url=base_url + "/v1" + api_key=api_key, base_url=base_url + "/v1", headers=headers ).image_generation - self.classification = Classification(api_key=api_key, base_url=base_url + "/v1").classify + self.classification = Classification(api_key=api_key, base_url=base_url + "/v1", headers=headers).classify - self.prompt_engine = PromptEngine(api_key=api_key, base_url=base_url + "/v1") + self.prompt_engine = PromptEngine(api_key=api_key, base_url=base_url + "/v1", headers=headers) class AsyncJigsawStack: @@ -139,39 +139,39 @@ def __init__( self.base_url = base_url self.headers = headers or {"Content-Type": "application/json"} - self.web = AsyncWeb(api_key=api_key, base_url=base_url + "/v1") + self.web = AsyncWeb(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.validate = AsyncValidate(api_key=api_key, base_url=base_url + "/v1") + self.validate = AsyncValidate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.audio = AsyncAudio(api_key=api_key, base_url=base_url + "/v1") + self.audio = AsyncAudio(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.vision = AsyncVision(api_key=api_key, base_url=base_url + "/v1") + self.vision = AsyncVision(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.store = AsyncStore(api_key=api_key, base_url=base_url + "/v1") + self.store = AsyncStore(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.summary = AsyncSummary(api_key=api_key, base_url=base_url + "/v1").summarize + self.summary = AsyncSummary(api_key=api_key, base_url=base_url + "/v1", headers=headers).summarize self.prediction = AsyncPrediction(api_key=api_key, base_url=base_url + "/v1").predict - self.text_to_sql = AsyncSQL(api_key=api_key, base_url=base_url + "/v1").text_to_sql + self.text_to_sql = AsyncSQL(api_key=api_key, base_url=base_url + "/v1", headers=headers).text_to_sql - self.sentiment = AsyncSentiment(api_key=api_key, base_url=base_url + "/v1").analyze + self.sentiment = AsyncSentiment(api_key=api_key, base_url=base_url + "/v1", headers=headers).analyze - self.translate = AsyncTranslate(api_key=api_key, base_url=base_url + "/v1") + self.translate = AsyncTranslate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.embedding = AsyncEmbedding(api_key=api_key, base_url=base_url + "/v1").execute + self.embedding = AsyncEmbedding(api_key=api_key, base_url=base_url + "/v1", headers=headers).execute - self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, base_url=base_url + "/v2").execute + self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, base_url=base_url + "/v2", headers=headers).execute self.image_generation = AsyncImageGeneration( - api_key=api_key, base_url=base_url + "/v1" + api_key=api_key, base_url=base_url + "/v1", headers=headers ).image_generation self.classification = AsyncClassification( - api_key=api_key, base_url=base_url + "/v1" + api_key=api_key, base_url=base_url + "/v1", headers=headers ).classify - self.prompt_engine = AsyncPromptEngine(api_key=api_key, base_url=base_url + "/v1") + self.prompt_engine = AsyncPromptEngine(api_key=api_key, base_url=base_url + "/v1", headers=headers) # Create a global instance of the Web class From 6bef4e1c6b05b471bc3ddacad45064905ab5c8d6 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 20:47:01 -0700 Subject: [PATCH 78/95] feat: pass headers to endpoint. --- jigsawstack/__init__.py | 56 ++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/jigsawstack/__init__.py b/jigsawstack/__init__.py index 1613098..9218810 100644 --- a/jigsawstack/__init__.py +++ b/jigsawstack/__init__.py @@ -68,32 +68,48 @@ def __init__( self.web = Web(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.sentiment = Sentiment(api_key=api_key, base_url=base_url + "/v1", headers=headers).analyze + self.sentiment = Sentiment( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).analyze self.validate = Validate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.summary = Summary(api_key=api_key, base_url=base_url + "/v1", headers=headers).summarize + self.summary = Summary( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).summarize self.vision = Vision(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.prediction = Prediction(api_key=api_key, base_url=base_url + "/v1", headers=headers).predict + self.prediction = Prediction( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).predict - self.text_to_sql = SQL(api_key=api_key, base_url=base_url + "/v1", headers=headers).text_to_sql + self.text_to_sql = SQL( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).text_to_sql self.store = Store(api_key=api_key, base_url=base_url + "/v1", headers=headers) self.translate = Translate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.embedding = Embedding(api_key=api_key, base_url=base_url + "/v1", headers=headers).execute + self.embedding = Embedding( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).execute - self.embedding_v2 = EmbeddingV2(api_key=api_key, base_url=base_url + "/v2", headers=headers).execute + self.embedding_v2 = EmbeddingV2( + api_key=api_key, base_url=base_url + "/v2", headers=headers + ).execute self.image_generation = ImageGeneration( api_key=api_key, base_url=base_url + "/v1", headers=headers ).image_generation - self.classification = Classification(api_key=api_key, base_url=base_url + "/v1", headers=headers).classify + self.classification = Classification( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).classify - self.prompt_engine = PromptEngine(api_key=api_key, base_url=base_url + "/v1", headers=headers) + self.prompt_engine = PromptEngine( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ) class AsyncJigsawStack: @@ -149,19 +165,29 @@ def __init__( self.store = AsyncStore(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.summary = AsyncSummary(api_key=api_key, base_url=base_url + "/v1", headers=headers).summarize + self.summary = AsyncSummary( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).summarize self.prediction = AsyncPrediction(api_key=api_key, base_url=base_url + "/v1").predict - self.text_to_sql = AsyncSQL(api_key=api_key, base_url=base_url + "/v1", headers=headers).text_to_sql + self.text_to_sql = AsyncSQL( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).text_to_sql - self.sentiment = AsyncSentiment(api_key=api_key, base_url=base_url + "/v1", headers=headers).analyze + self.sentiment = AsyncSentiment( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).analyze self.translate = AsyncTranslate(api_key=api_key, base_url=base_url + "/v1", headers=headers) - self.embedding = AsyncEmbedding(api_key=api_key, base_url=base_url + "/v1", headers=headers).execute + self.embedding = AsyncEmbedding( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ).execute - self.embedding_v2 = AsyncEmbeddingV2(api_key=api_key, base_url=base_url + "/v2", headers=headers).execute + self.embedding_v2 = AsyncEmbeddingV2( + api_key=api_key, base_url=base_url + "/v2", headers=headers + ).execute self.image_generation = AsyncImageGeneration( api_key=api_key, base_url=base_url + "/v1", headers=headers @@ -171,7 +197,9 @@ def __init__( api_key=api_key, base_url=base_url + "/v1", headers=headers ).classify - self.prompt_engine = AsyncPromptEngine(api_key=api_key, base_url=base_url + "/v1", headers=headers) + self.prompt_engine = AsyncPromptEngine( + api_key=api_key, base_url=base_url + "/v1", headers=headers + ) # Create a global instance of the Web class From 5b29bef622fd18fd933eccb90eed6f36e40be679 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Mon, 15 Sep 2025 20:57:50 -0700 Subject: [PATCH 79/95] fix: AsyncPromptEngine must accept headers. --- jigsawstack/prompt_engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jigsawstack/prompt_engine.py b/jigsawstack/prompt_engine.py index 59932b6..c264db9 100644 --- a/jigsawstack/prompt_engine.py +++ b/jigsawstack/prompt_engine.py @@ -204,13 +204,13 @@ def __init__( self, api_key: str, base_url: str, - disable_request_logging: Union[bool, None] = False, + headers: Union[Dict[str, str], None] = None, ): - super().__init__(api_key, base_url, disable_request_logging) + super().__init__(api_key, base_url, headers) self.config = RequestConfig( base_url=base_url, api_key=api_key, - disable_request_logging=disable_request_logging, + headers=headers, ) async def create(self, params: PromptEngineCreateParams) -> PromptEngineCreateResponse: From 208689beadde06d1a3b1f887fad174e420cc56ed Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Mon, 15 Sep 2025 22:15:14 -0700 Subject: [PATCH 80/95] update to match js sdk version --- jigsawstack/version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jigsawstack/version.py b/jigsawstack/version.py index 95b9715..d277684 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.4" +__version__ = "0.3.5" def get_version() -> str: diff --git a/setup.py b/setup.py index 1aebb49..2a74d1c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.4", + version="0.3.5", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", From f4bc429f4d9f835528d7c267be07fead7f3aac7d Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Tue, 16 Sep 2025 19:59:31 -0700 Subject: [PATCH 81/95] fix: missing single binary upload. --- jigsawstack/async_request.py | 3 +++ jigsawstack/request.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index d8f530d..d86e6b2 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -238,6 +238,7 @@ async def make_request( headers = self.__get_headers() params = self.params verb = self.verb + data = self.data files = self.files _params = None @@ -254,6 +255,8 @@ async def make_request( _form_data.add_field("body", json.dumps(params), content_type="application/json") headers.pop("Content-Type", None) + elif data: # raw data request + _data = data else: # pure JSON request _json = params diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 38cbf01..84b25d9 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -247,6 +247,7 @@ def make_request(self, url: str) -> requests.Response: headers = self.__get_headers() params = self.params verb = self.verb + data = self.data files = self.files _requestParams = None @@ -261,7 +262,8 @@ def make_request(self, url: str) -> requests.Response: if params and isinstance(params, dict): _data = {"body": json.dumps(params)} headers.pop("Content-Type", None) # let requests set it for multipart - + elif data: # raw data request + _data = data else: # pure JSON request _json = params try: From 36cc7e68305769f7a5e9d7337b084baf54f4d5a7 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Tue, 16 Sep 2025 20:03:14 -0700 Subject: [PATCH 82/95] chore: update versions to 0.3.6 --- jigsawstack/version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jigsawstack/version.py b/jigsawstack/version.py index d277684..c59a198 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.5" +__version__ = "0.3.6" def get_version() -> str: diff --git a/setup.py b/setup.py index 2a74d1c..4bb1d30 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.5", + version="0.3.6", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", From c4fb4614f634f07ac504340a1aa7cd04f6a975a2 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 18 Sep 2025 11:01:47 -0700 Subject: [PATCH 83/95] typed now as object instead of object detection --- jigsawstack/vision.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 280b71d..072f441 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -102,9 +102,9 @@ class ObjectDetectionParams(TypedDict): List of prompts for object detection """ - features: NotRequired[List[Literal["object_detection", "gui"]]] + features: NotRequired[List[Literal["object", "gui"]]] """ - List of features to enable: object_detection, gui + List of features to enable: object, gui """ annotated_image: NotRequired[bool] @@ -215,7 +215,9 @@ def vocr( return resp @overload - def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -295,7 +297,9 @@ async def vocr( return resp @overload - async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + async def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None From a9002486acfa527bdfd61a5e3169ce722eeb9106 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Fri, 19 Sep 2025 16:52:56 -0700 Subject: [PATCH 84/95] updated test --- tests/test_object_detection.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 1fbd5ca..38e92c6 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -18,9 +18,7 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -IMAGE_URL = ( - "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" -) +IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" TEST_CASES = [ { @@ -49,7 +47,7 @@ "name": "with_blob_both_features", "blob": IMAGE_URL, "options": { - "features": ["object_detection", "gui"], + "features": ["object", "gui"], "annotated_image": True, "return_type": "url", }, @@ -63,7 +61,7 @@ "name": "with_blob_object_detection_features", "blob": IMAGE_URL, "options": { - "features": ["object_detection"], + "features": ["object"], "annotated_image": True, "return_type": "base64", }, @@ -80,7 +78,7 @@ "name": "with_all_options", "blob": IMAGE_URL, "options": { - "features": ["object_detection", "gui"], + "features": ["object", "gui"], "prompts": ["car", "road", "tree"], "annotated_image": True, "return_type": "base64", @@ -104,7 +102,9 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) + result = jigsaw.vision.object_detection( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) From 65902c47ff57bfecbd3f9a505199db674411cfa4 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Fri, 19 Sep 2025 18:23:24 -0700 Subject: [PATCH 85/95] update linting --- jigsawstack/vision.py | 8 ++------ tests/test_object_detection.py | 8 ++++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 072f441..8eccb51 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -215,9 +215,7 @@ def vocr( return resp @overload - def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -297,9 +295,7 @@ async def vocr( return resp @overload - async def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 38e92c6..b7286aa 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -18,7 +18,9 @@ jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +IMAGE_URL = ( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +) TEST_CASES = [ { @@ -102,9 +104,7 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) From 90e576926863da12a0bc45e0f981f089127ea4bc Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 17:59:59 -0700 Subject: [PATCH 86/95] updated response type for stt --- jigsawstack/audio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 575b839..c7a7acf 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -34,6 +34,8 @@ class SpeechToTextResponse(BaseResponse): text: str chunks: List[ChunkParams] speakers: Optional[List[BySpeakerParams]] + language_detected: Optional[str] + confidence: Optional[float] class SpeechToTextWebhookResponse(BaseResponse): From 1487dfce40d7afa5a7b598f9fbb225ef4a8d6dc1 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 18:05:54 -0700 Subject: [PATCH 87/95] update stt descriptions --- jigsawstack/audio.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index c7a7acf..0cfc23e 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -10,13 +10,44 @@ class SpeechToTextParams(TypedDict): url: NotRequired[str] + """ + the url of the audio file to transcribe, optional if file_store_key is provided + """ + file_store_key: NotRequired[str] + """ + the file store key of the audio file to transcribe, optional if url is provided + """ + language: NotRequired[Union[str, Literal["auto"]]] + """ + The language to transcribe or translate the file into. Use “auto” for automatic language detection, or specify a language code. If not specified, defaults to automatic detection. All supported language codes can be found + """ + translate: NotRequired[bool] + """ + When set to true, translates the content into English (or the specified language if language parameter is provided) + """ + by_speaker: NotRequired[bool] + """ + Identifies and separates different speakers in the audio file. When enabled, the response will include a speakers array with speaker-segmented transcripts. + """ + webhook_url: NotRequired[str] + """ + Webhook URL to send result to. When provided, the API will process asynchronously and send results to this URL when completed. + """ + batch_size: NotRequired[int] + """ + The batch size to return. Maximum value is 40. This controls how the audio is chunked for processing. + """ + chunk_duration: NotRequired[int] + """ + the duration of each chunk in seconds, defaults to 30 + """ class ChunkParams(TypedDict): @@ -32,10 +63,29 @@ class BySpeakerParams(ChunkParams): class SpeechToTextResponse(BaseResponse): text: str + """ + the text of the transcription + """ + chunks: List[ChunkParams] + """ + the chunks of the transcription + """ + speakers: Optional[List[BySpeakerParams]] + """ + the speakers of the transcription, available if by_speaker is set to true + """ + language_detected: Optional[str] + """ + the language detected in the transcription, available if language is set to auto + """ + confidence: Optional[float] + """ + the confidence of the transcription language detection, available if language is set to auto + """ class SpeechToTextWebhookResponse(BaseResponse): From 0d71a8da0e88a41540808312f42270f50a024fe3 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 18:08:31 -0700 Subject: [PATCH 88/95] updated description --- jigsawstack/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 0cfc23e..22b1f7a 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -46,7 +46,7 @@ class SpeechToTextParams(TypedDict): chunk_duration: NotRequired[int] """ - the duration of each chunk in seconds, defaults to 30 + the duration of each chunk in seconds, maximum value is 15, defaults to 3 """ From b6efb2e92e1ca91ca93d1408db6d0d5950b4ecb5 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 18:26:01 -0700 Subject: [PATCH 89/95] updated vers --- jigsawstack/version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jigsawstack/version.py b/jigsawstack/version.py index c59a198..b5bca8d 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.6" +__version__ = "0.3.7" def get_version() -> str: diff --git a/setup.py b/setup.py index 4bb1d30..4a268d3 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.6", + version="0.3.7", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", From c6744785f5f3986b3d9683e41d101a8386963764 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 9 Oct 2025 12:50:54 -0700 Subject: [PATCH 90/95] update python to accept preview urls --- tests/test_audio.py | 48 ++++++++++++++++++----- tests/test_classification.py | 16 +++++++- tests/test_deep_research.py | 16 +++++++- tests/test_embedding.py | 24 +++++++++--- tests/test_file_store.py | 24 ++++++++++-- tests/test_image_generation.py | 40 ++++++++++++++++---- tests/test_object_detection.py | 24 +++++++++--- tests/test_prediction.py | 24 ++++++++++-- tests/test_sentiment.py | 24 ++++++++++-- tests/test_sql.py | 16 +++++++- tests/test_summary.py | 24 ++++++++++-- tests/test_translate.py | 28 +++++++++++--- tests/test_validate.py | 28 +++++++++++--- tests/test_vocr.py | 69 +++++++++++++++++++++++++--------- tests/test_web.py | 25 +++++++++++- 15 files changed, 349 insertions(+), 81 deletions(-) diff --git a/tests/test_audio.py b/tests/test_audio.py index 309b191..6a8861d 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) AUDIO_URL = AUDIO_URL_LONG = "https://jigsawstack.com/preview/stt-example.wav" @@ -119,20 +131,26 @@ class TestAudioSync: """Test synchronous audio speech-to-text methods""" - @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.parametrize( + "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] + ) def test_speech_to_text(self, test_case): """Test synchronous speech-to-text with various inputs""" try: if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) + result = jigsaw.audio.speech_to_text( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure assert result["success"] - assert result.get("text", None) is not None and isinstance(result["text"], str) + assert result.get("text", None) is not None and isinstance( + result["text"], str + ) # Check for chunks if result.get("chunks", None): @@ -154,7 +172,9 @@ def test_speech_to_text_webhook(self, test_case): if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) + result = jigsaw.audio.speech_to_text( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -169,7 +189,9 @@ def test_speech_to_text_webhook(self, test_case): class TestAudioAsync: """Test asynchronous audio speech-to-text methods""" - @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.parametrize( + "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] + ) @pytest.mark.asyncio async def test_speech_to_text_async(self, test_case): """Test asynchronous speech-to-text with various inputs""" @@ -186,7 +208,9 @@ async def test_speech_to_text_async(self, test_case): # Verify response structure assert result["success"] - assert result.get("text", None) is not None and isinstance(result["text"], str) + assert result.get("text", None) is not None and isinstance( + result["text"], str + ) # Check for chunks if result.get("chunks", None): @@ -196,7 +220,9 @@ async def test_speech_to_text_async(self, test_case): if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + pytest.fail( + f"Unexpected JigsawStackError in async {test_case['name']}: {e}" + ) @pytest.mark.parametrize( "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] @@ -222,4 +248,6 @@ async def test_speech_to_text_webhook_async(self, test_case): except JigsawStackError as e: # Webhook URLs might fail if invalid - print(f"Expected possible error for async webhook test {test_case['name']}: {e}") + print( + f"Expected possible error for async webhook test {test_case['name']}: {e}" + ) diff --git a/tests/test_classification.py b/tests/test_classification.py index dba924a..a74c9a2 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) TEST_CASES = [ { diff --git a/tests/test_deep_research.py b/tests/test_deep_research.py index 3d584ab..ec67775 100644 --- a/tests/test_deep_research.py +++ b/tests/test_deep_research.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) URL = "https://jigsawstack.com" diff --git a/tests/test_embedding.py b/tests/test_embedding.py index c2bc59d..60acc60 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -13,15 +13,27 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) - -SAMPLE_TEXT = ( - "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, ) + +SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" -SAMPLE_PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" +SAMPLE_PDF_URL = ( + "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" +) # Test cases for Embedding V1 EMBEDDING_V1_TEST_CASES = [ diff --git a/tests/test_file_store.py b/tests/test_file_store.py index 97d07dd..b743623 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -13,8 +13,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) TEXT_FILE_CONTENT = b"This is a test file content for JigsawStack storage" JSON_FILE_CONTENT = b'{"test": "data", "key": "value"}' @@ -118,7 +130,9 @@ class TestFileStoreAsync: async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" try: - result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) + result = await async_jigsaw.store.upload( + test_case["file"], test_case["options"] + ) print(f"Async upload test {test_case['name']}: {result}") assert result.get("key") is not None @@ -133,7 +147,9 @@ async def test_file_upload_async(self, test_case): self.uploaded_keys.append(result["key"]) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + pytest.fail( + f"Unexpected JigsawStackError in async {test_case['name']}: {e}" + ) @pytest.mark.asyncio async def test_file_get_async(self): diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index 6b982ba..e11bf76 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" FILE_STORE_KEY = jigsaw.store.upload( @@ -111,7 +123,9 @@ class TestImageGenerationSync: """Test synchronous image generation methods""" - @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.parametrize( + "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] + ) def test_image_generation(self, test_case): """Test synchronous image generation with various parameters""" try: @@ -160,7 +174,9 @@ def test_image_to_image_generation(self, test_case): elif type(result) is bytes: assert isinstance(result, bytes) else: - pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + pytest.fail( + f"Unexpected result type in {test_case['name']}: {type(result)}" + ) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -168,7 +184,9 @@ def test_image_to_image_generation(self, test_case): class TestImageGenerationAsync: """Test asynchronous image generation methods""" - @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) + @pytest.mark.parametrize( + "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] + ) @pytest.mark.asyncio async def test_image_generation_async(self, test_case): """Test asynchronous image generation with various parameters""" @@ -195,7 +213,9 @@ async def test_image_generation_async(self, test_case): assert isinstance(result, bytes) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + pytest.fail( + f"Unexpected JigsawStackError in async {test_case['name']}: {e}" + ) @pytest.mark.parametrize( "test_case", @@ -215,7 +235,11 @@ async def test_image_to_image_generation_async(self, test_case): elif type(result) is bytes: assert isinstance(result, bytes) else: - pytest.fail(f"Unexpected result type in {test_case['name']}: {type(result)}") + pytest.fail( + f"Unexpected result type in {test_case['name']}: {type(result)}" + ) except JigsawStackError as e: - pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") + pytest.fail( + f"Unexpected JigsawStackError in async {test_case['name']}: {e}" + ) diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index b7286aa..4c846a6 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -15,13 +15,23 @@ logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) - -IMAGE_URL = ( - "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, ) +IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" + TEST_CASES = [ { "name": "with_url_only", @@ -104,7 +114,9 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) + result = jigsaw.vision.object_detection( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index a87ccab..e8489da 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -13,8 +13,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) def generate_dates(start_date, num_days): @@ -47,7 +59,9 @@ def generate_dates(start_date, num_days): { "name": "seasonal_pattern", "params": { - "dataset": [{"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21)], + "dataset": [ + {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) + ], "steps": 7, }, }, @@ -61,7 +75,9 @@ def generate_dates(start_date, num_days): { "name": "large_dataset_prediction", "params": { - "dataset": [{"date": dates[i], "value": 1000 + (i * 20)} for i in range(30)], + "dataset": [ + {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) + ], "steps": 10, }, }, diff --git a/tests/test_sentiment.py b/tests/test_sentiment.py index 5bb5914..71dbea7 100644 --- a/tests/test_sentiment.py +++ b/tests/test_sentiment.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) TEST_CASES = [ { @@ -30,7 +42,9 @@ }, { "name": "neutral_sentiment_factual", - "params": {"text": "The meeting is scheduled for 3 PM tomorrow in conference room B."}, + "params": { + "text": "The meeting is scheduled for 3 PM tomorrow in conference room B." + }, }, { "name": "mixed_sentiment_paragraph", @@ -66,7 +80,9 @@ }, { "name": "question_sentiment", - "params": {"text": "Why is this product so amazing? I can't believe how well it works!"}, + "params": { + "text": "Why is this product so amazing? I can't believe how well it works!" + }, }, ] diff --git a/tests/test_sql.py b/tests/test_sql.py index 822ae18..618a52e 100644 --- a/tests/test_sql.py +++ b/tests/test_sql.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) # Sample schemas for different databases MYSQL_SCHEMA = """ diff --git a/tests/test_summary.py b/tests/test_summary.py index ab79ea9..a71692b 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) LONG_TEXT = """ Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century. @@ -147,7 +159,9 @@ def test_summary(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert len(result["summary"]) <= test_case["params"]["max_characters"] + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -177,7 +191,9 @@ async def test_summary_async(self, test_case): else: assert isinstance(result["summary"], str) if "max_characters" in test_case["params"]: - assert len(result["summary"]) <= test_case["params"]["max_characters"] + assert ( + len(result["summary"]) <= test_case["params"]["max_characters"] + ) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_translate.py b/tests/test_translate.py index 5b560be..4f63615 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -13,8 +13,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) # Sample image URL for translation tests IMAGE_URL = "https://images.unsplash.com/photo-1580679137870-86ef9f9a03d6?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" @@ -144,7 +156,9 @@ def test_translate_text(self, test_case): # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) - assert len(result["translated_text"]) == len(test_case["params"]["text"]) + assert len(result["translated_text"]) == len( + test_case["params"]["text"] + ) else: assert isinstance(result["translated_text"], str) @@ -171,7 +185,9 @@ async def test_translate_text_async(self, test_case): # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) - assert len(result["translated_text"]) == len(test_case["params"]["text"]) + assert len(result["translated_text"]) == len( + test_case["params"]["text"] + ) else: assert isinstance(result["translated_text"], str) @@ -193,7 +209,9 @@ def test_translate_image(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.translate.image(blob_content, test_case.get("options", {})) + result = jigsaw.translate.image( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = jigsaw.translate.image(test_case["params"]) diff --git a/tests/test_validate.py b/tests/test_validate.py index d0d2c43..83b50b6 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -13,11 +13,25 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) # Sample URLs for NSFW testing -SAFE_IMAGE_URL = "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" +SAFE_IMAGE_URL = ( + "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" +) POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SPAM_CHECK_TEST_CASES = [ @@ -73,7 +87,9 @@ }, { "name": "mixed_correct_and_incorrect", - "params": {"text": "The weather is beatiful today, but tommorow might be diferent."}, + "params": { + "text": "The weather is beatiful today, but tommorow might be diferent." + }, }, { "name": "technical_text", @@ -425,7 +441,9 @@ async def test_nsfw_check_blob_async(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.validate.nsfw(blob_content, test_case["options"]) + result = await async_jigsaw.validate.nsfw( + blob_content, test_case["options"] + ) assert result["success"] assert "nsfw" in result diff --git a/tests/test_vocr.py b/tests/test_vocr.py index d233484..d7193f2 100644 --- a/tests/test_vocr.py +++ b/tests/test_vocr.py @@ -13,8 +13,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) IMAGE_URL = "https://jigsawstack.com/preview/vocr-example.jpg" @@ -69,7 +81,10 @@ }, { "name": "url_with_list_prompt", - "params": {"url": IMAGE_URL, "prompt": ["Extract headers", "Extract body text"]}, + "params": { + "url": IMAGE_URL, + "prompt": ["Extract headers", "Extract body text"], + }, "blob": None, "options": None, }, @@ -79,13 +94,21 @@ PDF_TEST_CASES = [ { "name": "pdf_with_page_range", - "params": {"url": PDF_URL, "page_range": [1, 3], "prompt": "Extract text from these pages"}, + "params": { + "url": PDF_URL, + "page_range": [1, 3], + "prompt": "Extract text from these pages", + }, "blob": None, "options": None, }, { "name": "pdf_single_page", - "params": {"url": PDF_URL, "page_range": [1, 1], "prompt": "What is on the first page?"}, + "params": { + "url": PDF_URL, + "page_range": [1, 1], + "prompt": "What is on the first page?", + }, "blob": None, "options": None, }, @@ -134,7 +157,9 @@ def test_vocr(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) + @pytest.mark.parametrize( + "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] + ) def test_vocr_pdf(self, test_case): """Test synchronous VOCR with PDF inputs""" try: @@ -152,13 +177,15 @@ def test_vocr_pdf(self, test_case): assert "context" in result assert "total_pages" in result - if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( - "page_range" - ): + if test_case.get("params", {}).get("page_range") or test_case.get( + "options", {} + ).get("page_range"): assert "page_range" in result assert isinstance(result["page_range"], list) - logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") + logger.info( + f"Test {test_case['name']}: total_pages={result.get('total_pages')}" + ) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -180,7 +207,9 @@ async def test_vocr_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) + result = await async_jigsaw.vision.vocr( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) @@ -207,7 +236,9 @@ async def test_vocr_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) + @pytest.mark.parametrize( + "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] + ) @pytest.mark.asyncio async def test_vocr_pdf_async(self, test_case): """Test asynchronous VOCR with PDF inputs""" @@ -215,7 +246,9 @@ async def test_vocr_pdf_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) + result = await async_jigsaw.vision.vocr( + blob_content, test_case.get("options", {}) + ) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) @@ -229,13 +262,15 @@ async def test_vocr_pdf_async(self, test_case): assert "total_pages" in result # PDF specific # Check if page_range is in response when requested - if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( - "page_range" - ): + if test_case.get("params", {}).get("page_range") or test_case.get( + "options", {} + ).get("page_range"): assert "page_range" in result assert isinstance(result["page_range"], list) - logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") + logger.info( + f"Test {test_case['name']}: total_pages={result.get('total_pages')}" + ) except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") diff --git a/tests/test_web.py b/tests/test_web.py index c22ccd7..fad3b83 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -10,8 +10,26 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +# const client = JigsawStack({ +# apiKey, +# baseURL: process.env.JIGSAWSTACK_BASE_URL ? `${process.env.JIGSAWSTACK_BASE_URL}/api` : "https://api.jigsawstack.com", +# headers: { "x-jigsaw-skip-cache": "true" }, +# }); + +jigsaw = jigsawstack.JigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + headers={"x-jigsaw-skip-cache": "true"}, +) URL = "https://jigsawstack.com" @@ -143,6 +161,7 @@ }, ] + class TestHTMLToAnySync: """Test synchronous HTML to Any methods""" @@ -227,8 +246,10 @@ def test_search_suggestions(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") + # Async Test Classes + class TestHTMLToAnyAsync: """Test asynchronous HTML to Any methods""" From 33bed1597b721afdf372afe94463a7794ecc66a8 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 9 Oct 2025 13:34:06 -0700 Subject: [PATCH 91/95] update vocr types and added confidence --- jigsawstack/vision.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 8eccb51..ffb69cf 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -152,13 +152,31 @@ class VOCRParams(TypedDict): page_range: NotRequired[List[int]] +class Word(TypedDict): + text: str + bounds: BoundingBox + confidence: float + + +class Line(TypedDict): + text: str + bounds: BoundingBox + average_confidence: float + words: List[Word] + + +class Section(TypedDict): + text: str + lines: List[Line] + + class OCRResponse(BaseResponse): context: str width: int height: int tags: List[str] has_text: bool - sections: List[object] + sections: List[Section] total_pages: Optional[int] page_range: Optional[ List[int] @@ -215,7 +233,9 @@ def vocr( return resp @overload - def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -295,7 +315,9 @@ async def vocr( return resp @overload - async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... + async def object_detection( + self, params: ObjectDetectionParams + ) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None From d0e19285e5acc827fbd399d1a5ab8855bb11581b Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 9 Oct 2025 13:41:51 -0700 Subject: [PATCH 92/95] formatted --- jigsawstack/vision.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index ffb69cf..0b5a483 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -233,9 +233,7 @@ def vocr( return resp @overload - def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload def object_detection( self, blob: bytes, options: ObjectDetectionParams = None @@ -315,9 +313,7 @@ async def vocr( return resp @overload - async def object_detection( - self, params: ObjectDetectionParams - ) -> ObjectDetectionResponse: ... + async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse: ... @overload async def object_detection( self, blob: bytes, options: ObjectDetectionParams = None From 42bf122f7a985530b34064946d73121a0c1ff13a Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 9 Oct 2025 18:12:28 -0700 Subject: [PATCH 93/95] update --- jigsawstack/image_generation.py | 7 ++++--- tests/test_image_generation.py | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/jigsawstack/image_generation.py b/jigsawstack/image_generation.py index 08cf81c..d3bc860 100644 --- a/jigsawstack/image_generation.py +++ b/jigsawstack/image_generation.py @@ -3,7 +3,7 @@ from typing_extensions import Literal, NotRequired, Required, TypedDict from ._config import ClientConfig -from .async_request import AsyncRequest +from .async_request import AsyncRequest, AsyncRequestConfig from .request import Request, RequestConfig @@ -103,6 +103,7 @@ def image_generation( self, params: ImageGenerationParams ) -> Union[ImageGenerationResponse, bytes]: path = "/ai/image_generation" + resp = Request( config=self.config, path=path, @@ -113,7 +114,7 @@ def image_generation( class AsyncImageGeneration(ClientConfig): - config: RequestConfig + config: AsyncRequestConfig def __init__( self, @@ -122,7 +123,7 @@ def __init__( headers: Union[Dict[str, str], None] = None, ): super().__init__(api_key, base_url, headers) - self.config = RequestConfig( + self.config = AsyncRequestConfig( base_url=base_url, api_key=api_key, headers=headers, diff --git a/tests/test_image_generation.py b/tests/test_image_generation.py index e11bf76..72c35f2 100644 --- a/tests/test_image_generation.py +++ b/tests/test_image_generation.py @@ -28,10 +28,10 @@ ) IMAGE_URL = "https://images.unsplash.com/photo-1494588024300-e9df7ff98d78?q=80&w=1284&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" -FILE_STORE_KEY = jigsaw.store.upload( - requests.get(IMAGE_URL).content, - {"filename": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True}, -) +# FILE_STORE_KEY = jigsaw.store.upload( +# requests.get(IMAGE_URL).content, +# {"filename": "test_image.jpg", "content_type": "image/jpeg", "overwrite": True}, +# ) TEST_CASES = [ { @@ -110,13 +110,13 @@ "return_type": "base64", }, }, - { - "name": "with_file_store_key", - "params": { - "prompt": "Apply a cyberpunk style to this image", - "file_store_key": FILE_STORE_KEY, - }, - }, + # { + # "name": "with_file_store_key", + # "params": { + # "prompt": "Apply a cyberpunk style to this image", + # "file_store_key": FILE_STORE_KEY, + # }, + # }, ] From e4462832091c6f9c4b266bef027b6347c575d74a Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Thu, 9 Oct 2025 18:22:37 -0700 Subject: [PATCH 94/95] update vers --- jigsawstack/version.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jigsawstack/version.py b/jigsawstack/version.py index b5bca8d..2819f1c 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.7" +__version__ = "0.3.8" def get_version() -> str: diff --git a/setup.py b/setup.py index 4a268d3..4524e2c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.7", + version="0.3.8", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", From c29bc3f1e7234f1a9598bd8a0312e83d06e55652 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Tue, 28 Oct 2025 18:04:37 -0700 Subject: [PATCH 95/95] fix: missed base_url for ai_scrape url. --- jigsawstack/version.py | 2 +- setup.py | 7 +++---- tests/test_ai_scrape.py | 20 +++++++++++++++++--- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/jigsawstack/version.py b/jigsawstack/version.py index 2819f1c..b8d2d3c 100644 --- a/jigsawstack/version.py +++ b/jigsawstack/version.py @@ -1,4 +1,4 @@ -__version__ = "0.3.8" +__version__ = "0.3.9" def get_version() -> str: diff --git a/setup.py b/setup.py index 4524e2c..8a9a443 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="jigsawstack", - version="0.3.8", + version="0.3.9", description="JigsawStack - The AI SDK for Python", long_description=open("README.md", encoding="utf8").read(), long_description_content_type="text/markdown", @@ -16,7 +16,7 @@ packages=find_packages(include=["jigsawstack"]), install_requires=install_requires, zip_safe=False, - python_requires=">=3.7", + python_requires=">=3.9", keywords=["AI", "AI Tooling"], setup_requires=["pytest-runner"], tests_require=["pytest", "pytest-asyncio"], @@ -27,10 +27,9 @@ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], ) diff --git a/tests/test_ai_scrape.py b/tests/test_ai_scrape.py index 4c30b33..1f53453 100644 --- a/tests/test_ai_scrape.py +++ b/tests/test_ai_scrape.py @@ -12,8 +12,20 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -jigsaw = jigsawstack.JigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) -async_jigsaw = jigsawstack.AsyncJigsawStack(api_key=os.getenv("JIGSAWSTACK_API_KEY")) +jigsaw = jigsawstack.JigsawStack( + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + headers={"x-jigsaw-skip-cache": "true"}, +) +async_jigsaw = jigsawstack.AsyncJigsawStack( + base_url=os.getenv("JIGSAWSTACK_BASE_URL") + "/api" + if os.getenv("JIGSAWSTACK_BASE_URL") + else "https://api.jigsawstack.com", + api_key=os.getenv("JIGSAWSTACK_API_KEY"), + headers={"x-jigsaw-skip-cache": "true"}, +) URL = "https://jigsawstack.com" @@ -70,7 +82,9 @@ "params": { "url": URL, "element_prompts": ["user data"], - "cookies": [{"name": "session", "value": "test123", "domain": "example.com"}], + "cookies": [ + {"name": "session", "value": "test123", "domain": "example.com"} + ], }, }, {