From d39861fc33695b093280444179a576889a87d308 Mon Sep 17 00:00:00 2001 From: CyMule Date: Fri, 27 Jun 2025 13:41:26 -0400 Subject: [PATCH 1/3] feat: improve PDF validation error handling with FileValidationError base class --- CHANGELOG.md | 10 ++++++++++ .../unit/test_split_pdf_hook.py | 12 +++++------- pyproject.toml | 2 +- src/unstructured_client/_hooks/custom/pdf_utils.py | 8 ++++---- .../_hooks/custom/split_pdf_hook.py | 10 ++-------- .../_hooks/custom/validation_errors.py | 14 ++++++++++++++ 6 files changed, 36 insertions(+), 20 deletions(-) create mode 100644 src/unstructured_client/_hooks/custom/validation_errors.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 582c5404..db6c12fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +## 0.37.3 + +### Enhancements +* Improve PDF validation error handling by introducing FileValidationError base class for better error abstraction + +### Features + +### Fixes +* Replace RequestError with PDFValidationError for invalid PDF files to provide more accurate error context + ## 0.37.0 ### Enhancements diff --git a/_test_unstructured_client/unit/test_split_pdf_hook.py b/_test_unstructured_client/unit/test_split_pdf_hook.py index 75e2c17b..9a58851c 100644 --- a/_test_unstructured_client/unit/test_split_pdf_hook.py +++ b/_test_unstructured_client/unit/test_split_pdf_hook.py @@ -8,7 +8,6 @@ from unittest.mock import MagicMock, patch import httpx -from httpx import RequestError import pytest import requests from requests_toolbelt import MultipartDecoder @@ -467,8 +466,8 @@ def test_unit_get_split_pdf_cache_tmp_data_dir_uses_dir_from_form_data(mock_path assert result == str(Path(mock_dir).resolve()) -def test_before_request_raises_request_error_when_pdf_check_fails(): - """Test that before_request raises RequestError when pdf_utils.check_pdf throws PDFValidationError.""" +def test_before_request_raises_pdf_validation_error_when_pdf_check_fails(): + """Test that before_request raises PDFValidationError when pdf_utils.check_pdf throws PDFValidationError.""" hook = SplitPdfHook() # Initialize the hook with a mock client @@ -514,13 +513,12 @@ def test_before_request_raises_request_error_when_pdf_check_fails(): mock_check_pdf.side_effect = pdf_utils.PDFValidationError(error_message) mock_get_base_url.return_value = "http://localhost:8888" - # Call the method under test and verify it raises RequestError - with pytest.raises(RequestError) as exc_info: + # Call the method under test and verify it raises PDFValidationError + with pytest.raises(pdf_utils.PDFValidationError) as exc_info: hook.before_request(mock_hook_ctx, mock_request) - # Verify the exception has the correct message and request object + # Verify the exception has the correct message assert str(exc_info.value) == error_message - assert exc_info.value.request == mock_request # Verify that the mocked functions were called as expected mock_get_fields.assert_called_once_with(mock_request) diff --git a/pyproject.toml b/pyproject.toml index 4a45b136..499cf574 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "unstructured-client" -version = "0.37.2" +version = "0.37.3" description = "Python Client SDK for Unstructured API" authors = [{ name = "Unstructured" },] readme = "README-PYPI.md" diff --git a/src/unstructured_client/_hooks/custom/pdf_utils.py b/src/unstructured_client/_hooks/custom/pdf_utils.py index eb0d0304..8fb5916a 100644 --- a/src/unstructured_client/_hooks/custom/pdf_utils.py +++ b/src/unstructured_client/_hooks/custom/pdf_utils.py @@ -8,6 +8,7 @@ from pypdf.errors import FileNotDecryptedError, PdfReadError from unstructured_client._hooks.custom.common import UNSTRUCTURED_CLIENT_LOGGER_NAME +from unstructured_client._hooks.custom.validation_errors import FileValidationError logger = logging.getLogger(UNSTRUCTURED_CLIENT_LOGGER_NAME) @@ -17,12 +18,11 @@ pdf_logger.setLevel(logging.ERROR) -class PDFValidationError(Exception): - """Base exception for PDF validation errors.""" +class PDFValidationError(FileValidationError): + """Exception for PDF validation errors.""" def __init__(self, message: str): - self.message = message - super().__init__(self.message) + super().__init__(message, file_type="PDF") def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]: diff --git a/src/unstructured_client/_hooks/custom/split_pdf_hook.py b/src/unstructured_client/_hooks/custom/split_pdf_hook.py index 7e18f09a..724a544b 100644 --- a/src/unstructured_client/_hooks/custom/split_pdf_hook.py +++ b/src/unstructured_client/_hooks/custom/split_pdf_hook.py @@ -16,7 +16,7 @@ import aiofiles import httpx import nest_asyncio # type: ignore -from httpx import AsyncClient, RequestError +from httpx import AsyncClient from pypdf import PdfReader, PdfWriter from unstructured_client._hooks.custom import form_utils, pdf_utils, request_utils @@ -303,13 +303,7 @@ def before_request( if pdf is None: return request - try: - pdf = pdf_utils.check_pdf(pdf) - except pdf_utils.PDFValidationError as e: - raise RequestError( - message=e.message, - request=request, - ) from e + pdf = pdf_utils.check_pdf(pdf) starting_page_number = form_utils.get_starting_page_number( form_data, diff --git a/src/unstructured_client/_hooks/custom/validation_errors.py b/src/unstructured_client/_hooks/custom/validation_errors.py new file mode 100644 index 00000000..4c6535c0 --- /dev/null +++ b/src/unstructured_client/_hooks/custom/validation_errors.py @@ -0,0 +1,14 @@ +"""File validation error classes for the Unstructured client.""" + + +class FileValidationError(Exception): + """Base exception for file validation errors. + + This exception should be raised when a file fails validation + checks before being processed by the API. + """ + + def __init__(self, message: str, file_type: str = None): + self.message = message + self.file_type = file_type + super().__init__(self.message) From f99710f0399257eb185031f3a878cac139364e04 Mon Sep 17 00:00:00 2001 From: CyMule Date: Fri, 27 Jun 2025 13:59:00 -0400 Subject: [PATCH 2/3] lint --- .../_hooks/custom/validation_errors.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/unstructured_client/_hooks/custom/validation_errors.py b/src/unstructured_client/_hooks/custom/validation_errors.py index 4c6535c0..b7c539dd 100644 --- a/src/unstructured_client/_hooks/custom/validation_errors.py +++ b/src/unstructured_client/_hooks/custom/validation_errors.py @@ -1,5 +1,7 @@ """File validation error classes for the Unstructured client.""" +from typing import Optional + class FileValidationError(Exception): """Base exception for file validation errors. @@ -8,7 +10,7 @@ class FileValidationError(Exception): checks before being processed by the API. """ - def __init__(self, message: str, file_type: str = None): - self.message = message - self.file_type = file_type + def __init__(self, message: str, file_type: Optional[str] = None): + self.message: str = message + self.file_type: Optional[str] = file_type super().__init__(self.message) From d7a2b3b4085c4f506cd493cb4bd79547fb9b4f9c Mon Sep 17 00:00:00 2001 From: CyMule Date: Mon, 30 Jun 2025 15:36:48 -0400 Subject: [PATCH 3/3] update gen --- gen.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gen.yaml b/gen.yaml index a0125f9a..d56f57b0 100644 --- a/gen.yaml +++ b/gen.yaml @@ -14,7 +14,7 @@ generation: oAuth2ClientCredentialsEnabled: false oAuth2PasswordEnabled: false python: - version: 0.37.2 + version: 0.37.3 additionalDependencies: dev: deepdiff: '>=6.0'