Documentation Index
Fetch the complete documentation index at: https://webscraping.titannet.io/docs/llms.txt
Use this file to discover all available pages before exploring further.
The crawl action type is navigation-first: start from known URLs (or URLs produced by an earlier step) and follow links within configured depth and breadth limits. Use it when discovery needs systematic expansion of a site or subgraph, not only a single hop into a fixed list of pages.
Examples use TITAN_API_URL and TITAN_TOKEN. Tab titles match other integration pages. Rust: ureq + serde_json.
When to use crawl
- You know entry URLs but not every target page in advance.
- Layout changes often, but internal linking is stable enough to traverse.
- You will hand off URLs to
scrape for schema-shaped extraction, or combine crawl with search upstream.
static_urls — crawl starts from URLs on the task.
previous_step — crawl consumes URLs emitted by search or another step.
task_url_inventory — crawl is driven from the URL inventory when your Titan environment supports that flow.
Always set limits (depth, max pages, domain rules—whatever your template and script require) so runs stay bounded and predictable.
Single-action example (POST /api/v1/tasks)
Field names under limits are template-specific; align them with the script you bind.
cURL
Go
TypeScript
Python
Rust
curl -sS -X POST "$TITAN_API_URL/api/v1/tasks" \
-H "Authorization: Bearer $TITAN_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"name": "Vendor docs crawl",
"objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
"execution_type": "single",
"urls": ["https://docs.vendor.example/"],
"action_type": "crawl",
"input_source": "static_urls",
"template_slug": "docs-site-crawl",
"limits": {
"max_depth": 2,
"max_pages": 200
}
}'
body := map[string]any{
"name": "Vendor docs crawl",
"objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
"execution_type": "single",
"urls": []string{"https://docs.vendor.example/"},
"action_type": "crawl",
"input_source": "static_urls",
"template_slug": "docs-site-crawl",
"limits": map[string]any{"max_depth": 2, "max_pages": 200},
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("POST", os.Getenv("TITAN_API_URL")+"/api/v1/tasks", bytes.NewReader(b))
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))
const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const res = await fetch(`${base}/api/v1/tasks`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
name: "Vendor docs crawl",
objective:
"Enumerate documentation pages under https://docs.vendor.example up to depth 2",
execution_type: "single",
urls: ["https://docs.vendor.example/"],
action_type: "crawl",
input_source: "static_urls",
template_slug: "docs-site-crawl",
limits: { max_depth: 2, max_pages: 200 },
}),
});
console.log(await res.text());
import json
import os
import urllib.request
payload = {
"name": "Vendor docs crawl",
"objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
"execution_type": "single",
"urls": ["https://docs.vendor.example/"],
"action_type": "crawl",
"input_source": "static_urls",
"template_slug": "docs-site-crawl",
"limits": {"max_depth": 2, "max_pages": 200},
}
req = urllib.request.Request(
f"{os.environ['TITAN_API_URL']}/api/v1/tasks",
data=json.dumps(payload).encode(),
headers={
"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
"Content-Type": "application/json",
},
method="POST",
)
with urllib.request.urlopen(req) as resp:
print(resp.read().decode())
use serde_json::json;
let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let body = json!({
"name": "Vendor docs crawl",
"objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
"execution_type": "single",
"urls": ["https://docs.vendor.example/"],
"action_type": "crawl",
"input_source": "static_urls",
"template_slug": "docs-site-crawl",
"limits": { "max_depth": 2, "max_pages": 200 }
});
let resp = ureq::post(format!("{base}/api/v1/tasks"))
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(body)
.expect("create");
println!("{}", resp.into_string().expect("body"));
Chained example (crawl → scrape)
In an execution_plan, keep each step self-contained. Crawl discovers URLs; scrape reads them into records:
{
"name": "Crawl vendor docs then extract API tables",
"objective": "Walk the docs tree, then scrape each page for REST tables",
"execution_type": "single",
"execution_plan": {
"steps": [
{
"step_id": "walk",
"action_type": "crawl",
"input_source": "static_urls",
"template_slug": "docs-crawl",
"limits": { "max_depth": 2, "max_pages": 150 }
},
{
"step_id": "extract",
"action_type": "scrape",
"input_source": "previous_step",
"template_slug": "docs-api-table-scrape",
"output_schema": {
"type": "object",
"properties": {
"endpoint": { "type": "string" },
"method": { "type": "string" }
}
}
}
]
}
}
Run
cURL
Go
TypeScript
Python
Rust
curl -sS -X POST "$TITAN_API_URL/api/v1/tasks/$TASK_ID/run" \
-H "Authorization: Bearer $TITAN_TOKEN" \
-H "Content-Type: application/json" \
-d '{}'
req, _ := http.NewRequest(
"POST",
fmt.Sprintf("%s/api/v1/tasks/%s/run", os.Getenv("TITAN_API_URL"), os.Getenv("TASK_ID")),
bytes.NewReader([]byte("{}")),
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))
const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const taskId = process.env.TASK_ID!;
const res = await fetch(`${base}/api/v1/tasks/${taskId}/run`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: "{}",
});
console.log(await res.text());
import os
import urllib.request
tid = os.environ["TASK_ID"]
req = urllib.request.Request(
f"{os.environ['TITAN_API_URL']}/api/v1/tasks/{tid}/run",
data=b"{}",
headers={
"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
"Content-Type": "application/json",
},
method="POST",
)
with urllib.request.urlopen(req) as resp:
print(resp.read().decode())
let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let task_id = std::env::var("TASK_ID").expect("TASK_ID");
let resp = ureq::post(format!("{base}/api/v1/tasks/{task_id}/run"))
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(serde_json::json!({}))
.expect("run");
println!("{}", resp.into_string().expect("body"));