apify
diff --git a/‎docs/claude-code-tools.json‎
Lines changed: 508 additions & 0 deletions b/‎docs/claude-code-tools.json‎
Lines changed: 508 additions & 0 deletions
diff --git a/‎package-lock.json‎
Lines changed: 4 additions & 4 deletions b/‎package-lock.json‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/const.ts‎
Lines changed: 4 additions & 3 deletions b/‎src/const.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/tools/actor.ts‎
Lines changed: 65 additions & 59 deletions b/‎src/tools/actor.ts‎
Lines changed: 65 additions & 59 deletions
diff --git a/‎src/tools/dataset.ts‎
Lines changed: 33 additions & 18 deletions b/‎src/tools/dataset.ts‎
Lines changed: 33 additions & 18 deletions
diff --git a/‎src/tools/dataset_collection.ts‎
Lines changed: 12 additions & 6 deletions b/‎src/tools/dataset_collection.ts‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎src/tools/fetch-actor-details.ts‎
Lines changed: 11 additions & 9 deletions b/‎src/tools/fetch-actor-details.ts‎
Lines changed: 11 additions & 9 deletions
@@ -64,7 +64,7 @@
     "dotenv": "^16.4.7",
     "eslint": "^9.19.0",
     "eventsource": "^3.0.2",
-    "tsx": "^4.6.2",
+    "tsx": "^4.20.5",
     "typescript": "^5.3.3",
     "typescript-eslint": "^8.23.0",
     "vitest": "^3.0.8"
 
@@ -47,15 +47,16 @@ export enum HelperTools {
     GET_HTML_SKELETON = 'get-html-skeleton',
 }
 
-export const ACTOR_RAG_WEB_BROWSER = 'apify/rag-web-browser';
+export const RAG_WEB_BROWSER = 'apify/rag-web-browser';
+export const RAG_WEB_BROWSER_WHITELISTED_FIELDS = ['query', 'maxResults', 'outputFormats'];
+export const RAG_WEB_BROWSER_ADDITIONAL_DESC = `This tool provides general web browsing functionality, for specific sites like e-commerce, social media it is always better to search for a specific Actor`;
 
 export const defaults = {
     actors: [
-        ACTOR_RAG_WEB_BROWSER,
+        RAG_WEB_BROWSER,
     ],
 };
 
-export const ACTOR_ADDITIONAL_INSTRUCTIONS = 'Never call/execute tool/Actor unless confirmed by the user.';
 export const SKYFIRE_MIN_CHARGE_USD = 5.0;
 export const SKYFIRE_SELLER_ID = process.env.SKYFIRE_SELLER_SERVICE_ID;
 
 
@@ -7,9 +7,10 @@ import log from '@apify/log';
 
 import { ApifyClient } from '../apify-client.js';
 import {
-    ACTOR_ADDITIONAL_INSTRUCTIONS,
     ACTOR_MAX_MEMORY_MBYTES,
     HelperTools,
+    RAG_WEB_BROWSER,
+    RAG_WEB_BROWSER_ADDITIONAL_DESC,
     SKYFIRE_TOOL_INSTRUCTIONS,
     TOOL_MAX_OUTPUT_CHARS,
 } from '../const.js';
@@ -27,7 +28,7 @@ import type { ProgressTracker } from '../utils/progress.js';
 import type { JsonSchemaProperty } from '../utils/schema-generation.js';
 import { generateSchemaFromItems } from '../utils/schema-generation.js';
 import { getActorDefinition } from './build.js';
-import { actorNameToToolName, fixedAjvCompile, getToolSchemaID, transformActorInputSchemaProperties } from './utils.js';
+import { actorNameToToolName, buildActorInputSchema, fixedAjvCompile } from './utils.js';
 
 // Define a named return type for callActorGetDataset
 export type CallActorGetDatasetResult = {
@@ -157,45 +158,48 @@ export async function getNormalActorsAsTools(
 ): Promise<ToolEntry[]> {
     const tools: ToolEntry[] = [];
 
-    // Zip the results with their corresponding actorIDs
     for (const actorInfo of actorsInfo) {
         const { actorDefinitionPruned } = actorInfo;
 
-        if (actorDefinitionPruned) {
-            const schemaID = getToolSchemaID(actorDefinitionPruned.actorFullName);
-            if (actorDefinitionPruned.input && 'properties' in actorDefinitionPruned.input && actorDefinitionPruned.input) {
-                actorDefinitionPruned.input.properties = transformActorInputSchemaProperties(actorDefinitionPruned.input);
-                // Add schema $id, each valid JSON schema should have a unique $id
-                // see https://json-schema.org/understanding-json-schema/basics#declaring-a-unique-identifier
-                actorDefinitionPruned.input.$id = schemaID;
-            }
-            try {
-                const memoryMbytes = actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES;
-                const tool: ToolEntry = {
-                    type: 'actor',
-                    tool: {
-                        name: actorNameToToolName(actorDefinitionPruned.actorFullName),
-                        actorFullName: actorDefinitionPruned.actorFullName,
-                        description: `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results. Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests to use this specific Actor.
-Actor description: ${actorDefinitionPruned.description}
-Instructions: ${ACTOR_ADDITIONAL_INSTRUCTIONS}`,
-                        inputSchema: actorDefinitionPruned.input
-                            // So Actor without input schema works - MCP client expects JSON schema valid output
-                            || {
-                                type: 'object',
-                                properties: {},
-                                required: [],
-                            },
-                        // Additional props true to allow skyfire-pay-id
-                        ajvValidate: fixedAjvCompile(ajv, { ...actorDefinitionPruned.input, additionalProperties: true }),
-                        memoryMbytes: memoryMbytes > ACTOR_MAX_MEMORY_MBYTES ? ACTOR_MAX_MEMORY_MBYTES : memoryMbytes,
-                    },
-                };
-                tools.push(tool);
-            } catch (validationError) {
-                log.error('Failed to compile AJV schema for Actor', { actorName: actorDefinitionPruned.actorFullName, error: validationError });
-            }
+        if (!actorDefinitionPruned) continue;
+
+        const isRag = actorDefinitionPruned.actorFullName === RAG_WEB_BROWSER;
+        const { inputSchema } = buildActorInputSchema(actorDefinitionPruned.actorFullName, actorDefinitionPruned.input, isRag);
+
+        let description = `This tool calls the Actor "${actorDefinitionPruned.actorFullName}" and retrieves its output results.
+Use this tool instead of the "${HelperTools.ACTOR_CALL}" if user requests this specific Actor.
+Actor description: ${actorDefinitionPruned.description}`;
+        if (isRag) {
+            description += RAG_WEB_BROWSER_ADDITIONAL_DESC;
         }
+
+        const memoryMbytes = Math.min(
+            actorDefinitionPruned.defaultRunOptions?.memoryMbytes || ACTOR_MAX_MEMORY_MBYTES,
+            ACTOR_MAX_MEMORY_MBYTES,
+        );
+
+        let ajvValidate;
+        try {
+            ajvValidate = fixedAjvCompile(ajv, { ...inputSchema, additionalProperties: true });
+        } catch (e) {
+            log.error('Failed to compile schema', {
+                actorName: actorDefinitionPruned.actorFullName,
+                error: e,
+            });
+            continue;
+        }
+
+        tools.push({
+            type: 'actor',
+            tool: {
+                name: actorNameToToolName(actorDefinitionPruned.actorFullName),
+                actorFullName: actorDefinitionPruned.actorFullName,
+                description,
+                inputSchema,
+                ajvValidate,
+                memoryMbytes,
+            },
+        });
     }
     return tools;
 }
@@ -240,8 +244,7 @@ async function getMCPServersAsTools(
                 // Skip this Actor, connectMCPClient will log the error
                 return [];
             }
-            const serverTools = await getMCPServerTools(actorId, client, mcpServerUrl);
-            return serverTools;
+            return await getMCPServerTools(actorId, client, mcpServerUrl);
         } finally {
             if (client) await client.close();
         }
@@ -304,7 +307,7 @@ const callActorArgs = z.object({
         .describe('The name of the Actor to call. For example, "apify/rag-web-browser".'),
     step: z.enum(['info', 'call'])
         .default('info')
-        .describe(`Step to perform: "info" to get Actor details and input schema (required first step), "call" to execute the Actor (only after getting info).`),
+        .describe(`Step to perform: "info" to get Actor details and input schema (required first step), "call" to run the Actor (only after getting info).`),
     input: z.object({}).passthrough()
         .optional()
         .describe(`The input JSON to pass to the Actor. For example, {"query": "apify", "maxResults": 5, "outputFormats": ["markdown"]}. Required only when step is "call".`),
@@ -327,33 +330,35 @@ export const callActor: ToolEntry = {
     tool: {
         name: HelperTools.ACTOR_CALL,
         actorFullName: HelperTools.ACTOR_CALL,
-        description: `Call Any Actor from Apify Store - Two-Step Process
+        description: `Call any Actor from the Apify Store using a mandatory two-step workflow.
+This ensures you first get the Actor’s input schema and details before executing it safely.
 
-This tool uses a mandatory two-step process to safely call any Actor from the Apify store.
+There are two ways to run Actors:
+1. Dedicated Actor tools (e.g., ${actorNameToToolName('apify/rag-web-browser')}): These are pre-configured tools, offering a simpler and more direct experience.
+2. Generic call-actor tool (${HelperTools.ACTOR_CALL}): Use this when a dedicated tool is not available or when you want to run any Actor dynamically. This tool is especially useful if you do not want to add specific tools or your client does not support dynamic tool registration.
 
-USAGE:
-• ONLY for Actors that are NOT available as dedicated tools
-• If a dedicated tool exists (e.g., ${actorNameToToolName('apify/rag-web-browser')}), use that instead
+**Important:**
+
+Typically, a successful run returns a \`datasetId\` (the Actor's output stored as an Apify dataset) and a short preview of items.
+To fetch the full output, use the ${HelperTools.ACTOR_OUTPUT_GET} tool with the \`datasetId\`.
 
-MANDATORY TWO-STEP WORKFLOW:
+USAGE:
+- Always use dedicated tools when available (e.g., ${actorNameToToolName('apify/rag-web-browser')})
+- Use the generic call-actor tool only if a dedicated tool does not exist for your Actor.
 
+MANDATORY TWO-STEP-WORKFLOW:
 Step 1: Get Actor Info (step="info", default)
-• First call this tool with step="info" to get Actor details and input schema
-• For regular Actors: returns the Actor input schema
-• For MCP server Actors: returns list of available tools with their schemas
-• You MUST do this step first - it's required to understand how to call the Actor
+- First call this tool with step="info" to get Actor details and input schema
+- This returns the Actor description, documentation, and required input schema
+- You MUST do this step first - it's required to understand how to call the Actor
 
 Step 2: Call Actor (step="call")
-• Only after step 1, call again with step="call" and proper input based on the schema
-• For regular Actors: executes the Actor and returns results
-• For MCP server Actors: use format "actor-name:tool-name" to call specific tools
-
-MCP SERVER ACTORS:
-• For MCP server actors, step="info" lists available tools instead of input schema
-• To call an MCP tool, use actor name format: "actor-name:tool-name" with step="call"
-• Example: actor="apify/my-mcp-actor:search-tool", step="call", input={...}
+- Only after step 1, call this tool again with step="call" and proper input based on the schema
+- This runs the Actor. It will create an output as an Apify dataset (with datasetId).
+- This step returns a dataset preview, typically JSON-formatted tabular data.
 
-The step parameter enforces this workflow - you cannot call an Actor without first getting its info.`,
+EXAMPLES:
+- user_input: Get instagram posts using apify/instagram-scraper`,
         inputSchema: zodToJsonSchema(callActorArgs),
         ajvValidate: ajv.compile({
             ...zodToJsonSchema(callActorArgs),
@@ -414,6 +419,7 @@ The step parameter enforces this workflow - you cannot call an Actor without fir
                             return buildMCPResponse([`Actor information for '${baseActorName}' was not found. Please check the Actor ID or name and ensure the Actor exists.`]);
                         }
                         const content = [
+                            // TODO: update result to say: this is result of info step, you must now call again with step=call and proper input
                             { type: 'text', text: `**Input Schema:**\n${JSON.stringify(details.inputSchema, null, 0)}` },
                         ];
                         /**
 
@@ -46,11 +46,17 @@ export const getDataset: ToolEntry = {
     tool: {
         name: HelperTools.DATASET_GET,
         actorFullName: HelperTools.DATASET_GET,
-        description: 'Dataset is a collection of structured data created by an Actor run. '
-            + 'Returns information about dataset object with metadata (itemCount, schema, fields, stats). '
-            + `Fields describe the structure of the dataset and can be used to filter the data with the ${HelperTools.DATASET_GET_ITEMS} tool. `
-            + 'Note: itemCount updates may have 5s delay.'
-            + 'The dataset can be accessed with the dataset URL: GET: https://api.apify.com/v2/datasets/:datasetId',
+        description: `Get metadata for a dataset (collection of structured data created by an Actor run).
+The results will include dataset details such as itemCount, schema, fields, and stats.
+Use fields to understand structure for filtering with ${HelperTools.DATASET_GET_ITEMS}.
+Note: itemCount updates may be delayed by up to ~5 seconds.
+
+USAGE:
+- Use when you need dataset metadata to understand its structure before fetching items.
+
+USAGE EXAMPLES:
+- user_input: Show info for dataset 8TtYhCwKzQeQk7dJx
+- user_input: What fields does username~my-dataset have?`,
         inputSchema: zodToJsonSchema(getDatasetArgs),
         ajvValidate: ajv.compile(zodToJsonSchema(getDatasetArgs)),
         call: async (toolArgs) => {
@@ -74,16 +80,18 @@ export const getDatasetItems: ToolEntry = {
     tool: {
         name: HelperTools.DATASET_GET_ITEMS,
         actorFullName: HelperTools.DATASET_GET_ITEMS,
-        description: 'Returns dataset items with pagination support. '
-            + 'Items can be sorted (newest to oldest) and filtered (clean mode skips empty items and hidden fields). '
-            + 'Supports field selection - include specific fields or exclude unwanted ones using comma-separated lists. '
-            + 'For nested objects, you must first flatten them using the flatten parameter before accessing their fields. '
-            + 'Example: To get URLs from items like [{"metadata":{"url":"example.com"}}], '
-            + 'use flatten="metadata" and then fields="metadata.url". '
-            + 'The flattening transforms nested objects into dot-notation format '
-            + '(e.g. {"metadata":{"url":"x"}} becomes {"metadata.url":"x"}). '
-            + 'Retrieve only the fields you need, reducing the response size and improving performance. '
-            + 'The response includes total count, offset, limit, and items array.',
+        description: `Retrieve dataset items with pagination, sorting, and field selection.
+Use clean=true to skip empty items and hidden fields. Include or omit fields using comma-separated lists.
+For nested objects, first flatten them (e.g., flatten="metadata"), then reference nested fields via dot notation (e.g., fields="metadata.url").
+
+The results will include items along with pagination info (limit, offset) and total count.
+
+USAGE:
+- Use when you need to read data from a dataset (all items or only selected fields).
+
+USAGE EXAMPLES:
+- user_input: Get first 100 items from dataset 8TtYhCwKzQeQk7dJx
+- user_input: Get only metadata.url and title from dataset username~my-dataset (flatten metadata)`,
         inputSchema: zodToJsonSchema(getDatasetItemsArgs),
         ajvValidate: ajv.compile(zodToJsonSchema(getDatasetItemsArgs)),
         call: async (toolArgs) => {
@@ -136,9 +144,16 @@ export const getDatasetSchema: ToolEntry = {
     tool: {
         name: HelperTools.DATASET_SCHEMA_GET,
         actorFullName: HelperTools.DATASET_SCHEMA_GET,
-        description: 'Generates a JSON schema from dataset items. '
-            + 'The schema describes the structure of the data in the dataset, which can be used for validation, documentation, or data processing.'
-            + 'Since the dataset can be large it is convenient to understand the structure of the dataset before getting dataset items.',
+        description: `Generate a JSON schema from a sample of dataset items.
+The schema describes the structure of the data and can be used for validation, documentation, or processing.
+Use this to understand the dataset before fetching many items.
+
+USAGE:
+- Use when you need to infer the structure of dataset items for downstream processing or validation.
+
+USAGE EXAMPLES:
+- user_input: Generate schema for dataset 8TtYhCwKzQeQk7dJx using 10 items
+- user_input: Show schema of username~my-dataset (clean items only)`,
         inputSchema: zodToJsonSchema(getDatasetSchemaArgs),
         ajvValidate: ajv.compile(zodToJsonSchema(getDatasetSchemaArgs)),
         call: async (toolArgs) => {
 
@@ -30,12 +30,18 @@ export const getUserDatasetsList: ToolEntry = {
     tool: {
         name: HelperTools.DATASET_LIST_GET,
         actorFullName: HelperTools.DATASET_LIST_GET,
-        description: 'Lists datasets (collections of Actor run data). '
-            + 'Actor runs automatically produce unnamed datasets (use unnamed=true to include these). '
-            + 'Users can also create named datasets manually. '
-            + 'Each dataset includes itemCount, access settings, and usage stats (readCount, writeCount). '
-            + 'Results are sorted by createdAt in ascending order (use desc=true for descending). '
-            + 'Supports pagination with limit (max 20) and offset parameters.',
+        description: `List datasets (collections of Actor run data) for the authenticated user.
+Actor runs automatically produce unnamed datasets (set unnamed=true to include them). Users can also create named datasets.
+
+The results will include datasets with itemCount, access settings, and usage stats, sorted by createdAt (ascending by default).
+Use limit (max 20), offset, and desc to paginate and sort.
+
+USAGE:
+- Use when you need to browse available datasets (named or unnamed) to locate data.
+
+USAGE EXAMPLES:
+- user_input: List my last 10 datasets (newest first)
+- user_input: List unnamed datasets`,
         inputSchema: zodToJsonSchema(getUserDatasetsListArgs),
         ajvValidate: ajv.compile(zodToJsonSchema(getUserDatasetsListArgs)),
         call: async (toolArgs) => {
 
@@ -17,15 +17,17 @@ export const fetchActorDetailsTool: ToolEntry = {
     type: 'internal',
     tool: {
         name: HelperTools.ACTOR_GET_DETAILS,
-        description: `Get detailed information about an Actor by its ID or full name.\n`
-            + `This tool returns title, description, URL, README (Actor's documentation), input schema, and usage statistics. \n`
-            + `The Actor name is always composed of "username/name", for example, "apify/rag-web-browser".\n`
-            + `Present Actor information in user-friendly format as an Actor card.\n`
-            + `USAGE:\n`
-            + `- Use when user asks about an Actor its details, description, input schema, etc.\n`
-            + `EXAMPLES:\n`
-            + `- user_input: How to use apify/rag-web-browser\n`
-            + `- user_input: What is the input schema for apify/rag-web-browser`,
+        description: `Get detailed information about an Actor by its ID or full name (format: "username/name", e.g., "apify/rag-web-browser").
+This returns the Actor’s title, description, URL, README (documentation), input schema, pricing/usage information, and basic stats.
+Present the information in a user-friendly Actor card.
+
+USAGE:
+- Use when a user asks about an Actor’s details, input schema, README, or how to use it.
+
+USAGE EXAMPLES:
+- user_input: How to use apify/rag-web-browser
+- user_input: What is the input schema for apify/rag-web-browser?
+- user_input: What is the pricing for apify/instagram-scraper?`,
         inputSchema: zodToJsonSchema(fetchActorDetailsToolArgsSchema),
         ajvValidate: ajv.compile(zodToJsonSchema(fetchActorDetailsToolArgsSchema)),
         call: async (toolArgs) => {