196 lines
5.4 KiB
TypeScript
196 lines
5.4 KiB
TypeScript
// src/tools/file/fetch-url.ts
|
|
// Copyright (C) 2025 DTP Technologies, LLC
|
|
// All Rights Reserved
|
|
|
|
import type { ToolDefinition } from "../../lib/ai-client.js";
|
|
import {
|
|
DtpTool,
|
|
type ToolArguments,
|
|
type ToolContext,
|
|
} from "../../lib/tool.js";
|
|
import { ChatSessionMode } from "@/models/chat-session.js";
|
|
import webFetcherService from "../../services/web-fetcher.js";
|
|
|
|
export class FetchUrlTool extends DtpTool {
|
|
get name(): string {
|
|
return "FetchUrlTool";
|
|
}
|
|
get slug(): string {
|
|
return "fetch-url";
|
|
}
|
|
get metadata() {
|
|
return {
|
|
name: this.definition.function.name || "fetch_url",
|
|
category: "file",
|
|
tags: ["fetch", "url", "web", "http", "scrape", "io"],
|
|
modes: [
|
|
ChatSessionMode.Plan,
|
|
ChatSessionMode.Build,
|
|
ChatSessionMode.Test,
|
|
ChatSessionMode.Ship,
|
|
ChatSessionMode.Develop,
|
|
],
|
|
};
|
|
}
|
|
|
|
public definition: ToolDefinition = {
|
|
type: "function",
|
|
function: {
|
|
name: "fetch_url",
|
|
description:
|
|
"Fetches a URL and returns the content as line-numbered Markdown. Uses Playwright for browser automation, Readability for content extraction, and Turndown for HTML-to-Markdown conversion. Supports line range parameters (startLine, endLine) like file_read. Caches results to .gadget-cache directory.",
|
|
parameters: {
|
|
type: "object",
|
|
properties: {
|
|
url: {
|
|
type: "string",
|
|
description:
|
|
"The URL to fetch (must start with http:// or https://).",
|
|
},
|
|
startLine: {
|
|
type: "number",
|
|
description: "Starting line number (1-indexed). Defaults to 1.",
|
|
},
|
|
endLine: {
|
|
type: "number",
|
|
description:
|
|
"Ending line number (inclusive). Defaults to end of content.",
|
|
},
|
|
useCache: {
|
|
type: "boolean",
|
|
description:
|
|
"Whether to use cached content if available. Defaults to true.",
|
|
},
|
|
},
|
|
required: ["url"],
|
|
},
|
|
},
|
|
};
|
|
|
|
public async execute(
|
|
_context: ToolContext,
|
|
args: ToolArguments,
|
|
): Promise<string> {
|
|
const url = args.url as string | undefined;
|
|
const startLine = (args.startLine as number | undefined) ?? 1;
|
|
const endLine = args.endLine as number | undefined;
|
|
const useCache = (args.useCache as boolean | undefined) ?? true;
|
|
|
|
// Validate URL parameter
|
|
if (!url || url.trim().length === 0) {
|
|
return this.error("MISSING_PARAMETER", "URL must not be empty.", {
|
|
parameter: "url",
|
|
recoveryHint: "Provide a valid URL starting with http:// or https://",
|
|
});
|
|
}
|
|
|
|
// Validate URL format
|
|
const urlPattern = /^https?:\/\/.+/i;
|
|
if (!urlPattern.test(url)) {
|
|
return this.error(
|
|
"INVALID_PARAMETER",
|
|
"URL must start with http:// or https://",
|
|
{
|
|
parameter: "url",
|
|
expected: "A valid URL starting with http:// or https://",
|
|
example: "https://example.com",
|
|
},
|
|
);
|
|
}
|
|
|
|
// Validate startLine
|
|
if (startLine < 1) {
|
|
return this.error("INVALID_PARAMETER", "startLine must be >= 1.", {
|
|
parameter: "startLine",
|
|
expected: "A positive integer >= 1",
|
|
});
|
|
}
|
|
|
|
// Validate endLine
|
|
if (endLine !== undefined && endLine < startLine) {
|
|
return this.error("INVALID_PARAMETER", "endLine must be >= startLine.", {
|
|
parameter: "endLine",
|
|
expected: "An integer >= startLine",
|
|
});
|
|
}
|
|
|
|
try {
|
|
this.log.info("Fetching URL", { url, startLine, endLine, useCache });
|
|
|
|
// Fetch the URL with optional line range
|
|
const result = await webFetcherService.fetchUrlWithRange(
|
|
url,
|
|
startLine,
|
|
endLine,
|
|
useCache,
|
|
);
|
|
|
|
// Format the response similar to file_read
|
|
const plainTextResponse = `URL: ${result.url}
|
|
TITLE: ${result.title}
|
|
TOTAL LINES: ${result.lineCount}
|
|
LINES SHOWN: ${result.lineCount}
|
|
FETCH OPERATION: fetch_url
|
|
---
|
|
${result.markdown}`;
|
|
|
|
this.log.info("Successfully fetched URL", {
|
|
url,
|
|
title: result.title,
|
|
lineCount: result.lineCount,
|
|
});
|
|
|
|
return plainTextResponse;
|
|
} catch (error) {
|
|
const errorMessage =
|
|
error instanceof Error ? error.message : String(error);
|
|
|
|
this.log.error("Failed to fetch URL", {
|
|
url,
|
|
error: errorMessage,
|
|
});
|
|
|
|
if (errorMessage.includes("timeout")) {
|
|
return this.error(
|
|
"TIMEOUT",
|
|
`Request timed out while fetching: ${url}`,
|
|
{
|
|
recoveryHint:
|
|
"The page may be slow to load or the URL may be unreachable.",
|
|
},
|
|
);
|
|
}
|
|
|
|
if (
|
|
errorMessage.includes("ENOTFOUND") ||
|
|
errorMessage.includes("net::ERR_NAME_NOT_RESOLVED")
|
|
) {
|
|
return this.error(
|
|
"OPERATION_FAILED",
|
|
`Failed to resolve hostname: ${url}`,
|
|
{
|
|
recoveryHint: "Check the URL and ensure the domain is accessible.",
|
|
},
|
|
);
|
|
}
|
|
|
|
if (
|
|
errorMessage.includes("net::ERR_ABORTED") ||
|
|
errorMessage.includes("404")
|
|
) {
|
|
return this.error("NOT_FOUND", `Page not found: ${url}`, {
|
|
recoveryHint:
|
|
"The URL may be incorrect or the page may have been removed.",
|
|
});
|
|
}
|
|
|
|
return this.error(
|
|
"OPERATION_FAILED",
|
|
`Failed to fetch URL: ${errorMessage}`,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
export default new FetchUrlTool();
|