Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6d65ed4

Browse files
Merge branch 'main' into misc-rearrange-pipeline-action-dropdown-items
2 parents a8de1fa + 4071ff4 commit 6d65ed4

File tree

7 files changed

+48
-15
lines changed

7 files changed

+48
-15
lines changed

backend/api_v2/api_deployment_views.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def get(
126126
)
127127
if not enable_highlight:
128128
response.remove_result_metadata_keys(["highlight_data"])
129+
response.remove_result_metadata_keys(["extracted_text"])
129130
if not include_metadata:
130131
response.remove_result_metadata_keys()
131132
if not include_metrics:

backend/api_v2/deployment_helper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def execute_workflow(
218218
)
219219
if not enable_highlight:
220220
result.remove_result_metadata_keys(["highlight_data"])
221+
result.remove_result_metadata_keys(["extracted_text"])
221222
if not include_metadata:
222223
result.remove_result_metadata_keys()
223224
if not include_metrics:

backend/sample.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data
7878

7979
# Structure Tool Image (Runs prompt studio exported tools)
8080
# https://hub.docker.com/r/unstract/tool-structure
81-
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.84"
81+
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.85"
8282
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
83-
STRUCTURE_TOOL_IMAGE_TAG="0.0.84"
83+
STRUCTURE_TOOL_IMAGE_TAG="0.0.85"
8484

8585
# Feature Flags
8686
EVALUATION_SERVER_IP=unstract-flipt

backend/workflow_manager/endpoint_v2/destination.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -532,14 +532,22 @@ def get_tool_execution_result_from_metadata(
532532
return result
533533

534534
def has_valid_metadata(self, metadata: Any) -> bool:
535-
# Check if metadata is not None and metadata is a non-empty string
536-
if not metadata:
535+
# Check if metadata is not None and is either a non-empty dict or valid string
536+
if metadata is None:
537537
return False
538-
if not isinstance(metadata, str):
539-
return False
540-
if metadata.strip().lower() == "none":
541-
return False
542-
return True
538+
539+
# Handle dict metadata (which is valid and contains extracted_text)
540+
if isinstance(metadata, dict):
541+
return bool(metadata) # Return True if dict is not empty
542+
543+
# Handle string metadata
544+
if isinstance(metadata, str):
545+
if metadata.strip().lower() == "none" or not metadata.strip():
546+
return False
547+
return True
548+
549+
# For other types, consider them valid if they're truthy
550+
return bool(metadata)
543551

544552
def get_metadata(
545553
self, file_history: FileHistory | None = None
@@ -555,7 +563,6 @@ def get_metadata(
555563
else:
556564
return None
557565
metadata: dict[str, Any] = self.get_workflow_metadata()
558-
559566
return metadata
560567

561568
def delete_file_execution_directory(self) -> None:
@@ -777,6 +784,9 @@ def _push_to_queue(
777784
q_name = self._get_review_queue_name()
778785
whisper_hash = meta_data.get("whisper-hash") if meta_data else None
779786

787+
# Get extracted text from metadata (added by structure tool)
788+
extracted_text = meta_data.get("extracted_text") if meta_data else None
789+
780790
queue_result = QueueResult(
781791
file=file_name,
782792
status=QueueResultStatus.SUCCESS,
@@ -785,6 +795,7 @@ def _push_to_queue(
785795
file_content=file_content_base64,
786796
whisper_hash=whisper_hash,
787797
file_execution_id=file_execution_id,
798+
extracted_text=extracted_text,
788799
).to_dict()
789800

790801
queue_result_json = json.dumps(queue_result)
@@ -811,6 +822,9 @@ def _push_to_queue(
811822
else:
812823
whisper_hash = None
813824

825+
# Get extracted text from metadata (added by structure tool)
826+
extracted_text = meta_data.get("extracted_text") if meta_data else None
827+
814828
# Create QueueResult with TTL metadata
815829
queue_result_obj = QueueResult(
816830
file=file_name,
@@ -820,6 +834,7 @@ def _push_to_queue(
820834
file_content=file_content_base64,
821835
whisper_hash=whisper_hash,
822836
file_execution_id=file_execution_id,
837+
extracted_text=extracted_text,
823838
)
824839

825840
# Add TTL metadata based on HITLSettings

backend/workflow_manager/endpoint_v2/queue_utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class QueueResult:
9797
file_execution_id: str | None = None
9898
enqueued_at: float | None = None
9999
ttl_seconds: int | None = None
100+
extracted_text: str | None = None
100101

101102
def __post_init__(self):
102103
"""Initialize enqueued_at timestamp if not provided and validate required fields"""
@@ -122,5 +123,6 @@ def to_dict(self) -> Any:
122123
"file_execution_id": self.file_execution_id,
123124
"enqueued_at": self.enqueued_at,
124125
"ttl_seconds": self.ttl_seconds,
126+
"extracted_text": self.extracted_text,
125127
}
126128
return result_dict

tools/structure/src/config/properties.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"schemaVersion": "0.0.1",
33
"displayName": "Structure Tool",
44
"functionName": "structure_tool",
5-
"toolVersion": "0.0.84",
5+
"toolVersion": "0.0.85",
66
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
77
"input": {
88
"description": "File that needs to be indexed and parsed for answers"

tools/structure/src/main.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,25 @@ def run(
325325
)
326326

327327
# HACK: Replacing actual file's name instead of INFILE
328-
if SettingsKeys.METADATA in structured_output:
329-
structured_output[SettingsKeys.METADATA][SettingsKeys.FILE_NAME] = (
330-
self.source_file_name
331-
)
328+
# Ensure metadata section exists
329+
if SettingsKeys.METADATA not in structured_output:
330+
structured_output[SettingsKeys.METADATA] = {}
331+
self.stream_log("Created metadata section in structured_output")
332+
333+
structured_output[SettingsKeys.METADATA][SettingsKeys.FILE_NAME] = (
334+
self.source_file_name
335+
)
332336

337+
# Add extracted text for HITL raw view
338+
if extracted_text:
339+
structured_output[SettingsKeys.METADATA]["extracted_text"] = extracted_text
340+
self.stream_log(
341+
f"Added text extracted from the document to metadata (length: {len(extracted_text)} characters)"
342+
)
343+
else:
344+
self.stream_log(
345+
"No text is extracted from the document to add to the metadata"
346+
)
333347
if merged_metrics := self._merge_metrics(
334348
structured_output.get(SettingsKeys.METRICS, {}), index_metrics
335349
):

0 commit comments

Comments
 (0)