gadget/docs/archive/tools/file/fetch-url.ts

196 lines
5.4 KiB
TypeScript

// src/tools/file/fetch-url.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import type { ToolDefinition } from "../../lib/ai-client.js";
import {
DtpTool,
type ToolArguments,
type ToolContext,
} from "../../lib/tool.js";
import { ChatSessionMode } from "@/models/chat-session.js";
import webFetcherService from "../../services/web-fetcher.js";
export class FetchUrlTool extends DtpTool {
get name(): string {
return "FetchUrlTool";
}
get slug(): string {
return "fetch-url";
}
get metadata() {
return {
name: this.definition.function.name || "fetch_url",
category: "file",
tags: ["fetch", "url", "web", "http", "scrape", "io"],
modes: [
ChatSessionMode.Plan,
ChatSessionMode.Build,
ChatSessionMode.Test,
ChatSessionMode.Ship,
ChatSessionMode.Develop,
],
};
}
public definition: ToolDefinition = {
type: "function",
function: {
name: "fetch_url",
description:
"Fetches a URL and returns the content as line-numbered Markdown. Uses Playwright for browser automation, Readability for content extraction, and Turndown for HTML-to-Markdown conversion. Supports line range parameters (startLine, endLine) like file_read. Caches results to .gadget-cache directory.",
parameters: {
type: "object",
properties: {
url: {
type: "string",
description:
"The URL to fetch (must start with http:// or https://).",
},
startLine: {
type: "number",
description: "Starting line number (1-indexed). Defaults to 1.",
},
endLine: {
type: "number",
description:
"Ending line number (inclusive). Defaults to end of content.",
},
useCache: {
type: "boolean",
description:
"Whether to use cached content if available. Defaults to true.",
},
},
required: ["url"],
},
},
};
public async execute(
_context: ToolContext,
args: ToolArguments,
): Promise<string> {
const url = args.url as string | undefined;
const startLine = (args.startLine as number | undefined) ?? 1;
const endLine = args.endLine as number | undefined;
const useCache = (args.useCache as boolean | undefined) ?? true;
// Validate URL parameter
if (!url || url.trim().length === 0) {
return this.error("MISSING_PARAMETER", "URL must not be empty.", {
parameter: "url",
recoveryHint: "Provide a valid URL starting with http:// or https://",
});
}
// Validate URL format
const urlPattern = /^https?:\/\/.+/i;
if (!urlPattern.test(url)) {
return this.error(
"INVALID_PARAMETER",
"URL must start with http:// or https://",
{
parameter: "url",
expected: "A valid URL starting with http:// or https://",
example: "https://example.com",
},
);
}
// Validate startLine
if (startLine < 1) {
return this.error("INVALID_PARAMETER", "startLine must be >= 1.", {
parameter: "startLine",
expected: "A positive integer >= 1",
});
}
// Validate endLine
if (endLine !== undefined && endLine < startLine) {
return this.error("INVALID_PARAMETER", "endLine must be >= startLine.", {
parameter: "endLine",
expected: "An integer >= startLine",
});
}
try {
this.log.info("Fetching URL", { url, startLine, endLine, useCache });
// Fetch the URL with optional line range
const result = await webFetcherService.fetchUrlWithRange(
url,
startLine,
endLine,
useCache,
);
// Format the response similar to file_read
const plainTextResponse = `URL: ${result.url}
TITLE: ${result.title}
TOTAL LINES: ${result.lineCount}
LINES SHOWN: ${result.lineCount}
FETCH OPERATION: fetch_url
---
${result.markdown}`;
this.log.info("Successfully fetched URL", {
url,
title: result.title,
lineCount: result.lineCount,
});
return plainTextResponse;
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
this.log.error("Failed to fetch URL", {
url,
error: errorMessage,
});
if (errorMessage.includes("timeout")) {
return this.error(
"TIMEOUT",
`Request timed out while fetching: ${url}`,
{
recoveryHint:
"The page may be slow to load or the URL may be unreachable.",
},
);
}
if (
errorMessage.includes("ENOTFOUND") ||
errorMessage.includes("net::ERR_NAME_NOT_RESOLVED")
) {
return this.error(
"OPERATION_FAILED",
`Failed to resolve hostname: ${url}`,
{
recoveryHint: "Check the URL and ensure the domain is accessible.",
},
);
}
if (
errorMessage.includes("net::ERR_ABORTED") ||
errorMessage.includes("404")
) {
return this.error("NOT_FOUND", `Page not found: ${url}`, {
recoveryHint:
"The URL may be incorrect or the page may have been removed.",
});
}
return this.error(
"OPERATION_FAILED",
`Failed to fetch URL: ${errorMessage}`,
);
}
}
}
export default new FetchUrlTool();