Crawl

The crawl action type is navigation-first: start from known URLs (or URLs produced by an earlier step) and follow links within configured depth and breadth limits. Use it when discovery needs systematic expansion of a site or subgraph, not only a single hop into a fixed list of pages.

Examples use TITAN_API_URL and TITAN_TOKEN. Tab titles match other integration pages. Rust: ureq + serde_json.

When to use `crawl`

You know entry URLs but not every target page in advance.
Layout changes often, but internal linking is stable enough to traverse.
You will hand off URLs to scrape for schema-shaped extraction, or combine crawl with search upstream.

Inputs and wiring

static_urls — crawl starts from URLs on the task.
previous_step — crawl consumes URLs emitted by search or another step.
task_url_inventory — crawl is driven from the URL inventory when your Titan environment supports that flow.

Always set limits (depth, max pages, domain rules—whatever your template and script require) so runs stay bounded and predictable.

Single-action example (`POST /api/v1/tasks`)

Field names under limits are template-specific; align them with the script you bind.

cURL
Go
TypeScript
Python
Rust

curl -sS -X POST "$TITAN_API_URL/api/v1/tasks" \
  -H "Authorization: Bearer $TITAN_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "Vendor docs crawl",
    "objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
    "execution_type": "single",
    "urls": ["https://docs.vendor.example/"],
    "action_type": "crawl",
    "input_source": "static_urls",
    "template_slug": "docs-site-crawl",
    "limits": {
      "max_depth": 2,
      "max_pages": 200
    }
  }'

body := map[string]any{
  "name":         "Vendor docs crawl",
  "objective":    "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
  "execution_type": "single",
  "urls":         []string{"https://docs.vendor.example/"},
  "action_type":  "crawl",
  "input_source": "static_urls",
  "template_slug": "docs-site-crawl",
  "limits": map[string]any{"max_depth": 2, "max_pages": 200},
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("POST", os.Getenv("TITAN_API_URL")+"/api/v1/tasks", bytes.NewReader(b))
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))

const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const res = await fetch(`${base}/api/v1/tasks`, {
  method: "POST",
  headers: {
    Authorization: `Bearer ${token}`,
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    name: "Vendor docs crawl",
    objective:
      "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
    execution_type: "single",
    urls: ["https://docs.vendor.example/"],
    action_type: "crawl",
    input_source: "static_urls",
    template_slug: "docs-site-crawl",
    limits: { max_depth: 2, max_pages: 200 },
  }),
});
console.log(await res.text());

import json
import os
import urllib.request

payload = {
    "name": "Vendor docs crawl",
    "objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
    "execution_type": "single",
    "urls": ["https://docs.vendor.example/"],
    "action_type": "crawl",
    "input_source": "static_urls",
    "template_slug": "docs-site-crawl",
    "limits": {"max_depth": 2, "max_pages": 200},
}
req = urllib.request.Request(
    f"{os.environ['TITAN_API_URL']}/api/v1/tasks",
    data=json.dumps(payload).encode(),
    headers={
        "Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
        "Content-Type": "application/json",
    },
    method="POST",
)
with urllib.request.urlopen(req) as resp:
    print(resp.read().decode())

use serde_json::json;

let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let body = json!({
    "name": "Vendor docs crawl",
    "objective": "Enumerate documentation pages under https://docs.vendor.example up to depth 2",
    "execution_type": "single",
    "urls": ["https://docs.vendor.example/"],
    "action_type": "crawl",
    "input_source": "static_urls",
    "template_slug": "docs-site-crawl",
    "limits": { "max_depth": 2, "max_pages": 200 }
});
let resp = ureq::post(format!("{base}/api/v1/tasks"))
    .set("Authorization", &format!("Bearer {token}"))
    .set("Content-Type", "application/json")
    .send_json(body)
    .expect("create");
println!("{}", resp.into_string().expect("body"));

Chained example (crawl → scrape)

In an execution_plan, keep each step self-contained. Crawl discovers URLs; scrape reads them into records:

{
  "name": "Crawl vendor docs then extract API tables",
  "objective": "Walk the docs tree, then scrape each page for REST tables",
  "execution_type": "single",
  "execution_plan": {
    "steps": [
      {
        "step_id": "walk",
        "action_type": "crawl",
        "input_source": "static_urls",
        "template_slug": "docs-crawl",
        "limits": { "max_depth": 2, "max_pages": 150 }
      },
      {
        "step_id": "extract",
        "action_type": "scrape",
        "input_source": "previous_step",
        "template_slug": "docs-api-table-scrape",
        "output_schema": {
          "type": "object",
          "properties": {
            "endpoint": { "type": "string" },
            "method": { "type": "string" }
          }
        }
      }
    ]
  }
}

Run

cURL
Go
TypeScript
Python
Rust

curl -sS -X POST "$TITAN_API_URL/api/v1/tasks/$TASK_ID/run" \
  -H "Authorization: Bearer $TITAN_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{}'

req, _ := http.NewRequest(
  "POST",
  fmt.Sprintf("%s/api/v1/tasks/%s/run", os.Getenv("TITAN_API_URL"), os.Getenv("TASK_ID")),
  bytes.NewReader([]byte("{}")),
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))

const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const taskId = process.env.TASK_ID!;
const res = await fetch(`${base}/api/v1/tasks/${taskId}/run`, {
  method: "POST",
  headers: {
    Authorization: `Bearer ${token}`,
    "Content-Type": "application/json",
  },
  body: "{}",
});
console.log(await res.text());

import os
import urllib.request

tid = os.environ["TASK_ID"]
req = urllib.request.Request(
    f"{os.environ['TITAN_API_URL']}/api/v1/tasks/{tid}/run",
    data=b"{}",
    headers={
        "Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
        "Content-Type": "application/json",
    },
    method="POST",
)
with urllib.request.urlopen(req) as resp:
    print(resp.read().decode())

let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let task_id = std::env::var("TASK_ID").expect("TASK_ID");
let resp = ureq::post(format!("{base}/api/v1/tasks/{task_id}/run"))
    .set("Authorization", &format!("Bearer {token}"))
    .set("Content-Type", "application/json")
    .send_json(serde_json::json!({}))
    .expect("run");
println!("{}", resp.into_string().expect("body"));

Action types overview
Search and Scrape
Datasets and media for how outputs surface after runs

Get started

Use cases

Action types

About the platform

Use the platform

When to use `crawl`

Inputs and wiring

Single-action example (`POST /api/v1/tasks`)

Chained example (crawl → scrape)

Run

Get started

Use cases

Action types

About the platform

Use the platform

Documentation Index

​When to use crawl

​Inputs and wiring

​Single-action example (POST /api/v1/tasks)

​Chained example (crawl → scrape)

​Run

​Related topics

When to use `crawl`

Inputs and wiring

Single-action example (`POST /api/v1/tasks`)

Chained example (crawl → scrape)

Run

Related topics