Scrape

The scrape action type is the closest analogue to classic scraping: you supply concrete URLs (or URLs from previous_step / task_url_inventory) and an output_schema, and workers return structured records (and media when the workflow and template support it). This is the right primitive when you already know which pages must become rows in your dataset.

Examples use TITAN_API_URL and TITAN_TOKEN. Tab titles match other integration pages. Rust: ureq + serde_json.

When to use `scrape`

You have a stable URL list (product pages, filings, dashboards).
You need schema-shaped JSON for downstream analytics, RAG chunks, or alerts.
An upstream search or crawl step produced the URLs you want to read deeply.

Single-action example (`POST /api/v1/tasks`)

schedule must satisfy validation for execution_type: scheduled in your environment. If you only need a one-off run, use execution_type: single and omit schedule.

cURL
Go
TypeScript
Python
Rust

curl -sS -X POST "$TITAN_API_URL/api/v1/tasks" \
  -H "Authorization: Bearer $TITAN_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{
    "name": "PDP price monitor",
    "objective": "Extract price, currency, and availability for each PDP URL",
    "execution_type": "scheduled",
    "urls": [
      "https://shop.example.com/p/100",
      "https://shop.example.com/p/200"
    ],
    "action_type": "scrape",
    "input_source": "static_urls",
    "template_slug": "commerce-pdp",
    "output_schema": {
      "type": "object",
      "properties": {
        "price": { "type": "number" },
        "currency": { "type": "string" },
        "in_stock": { "type": "boolean" }
      },
      "required": ["price", "currency", "in_stock"]
    },
    "schedule": {
      "cron": "15 */6 * * *",
      "timezone": "UTC"
    }
  }'

body := map[string]any{
  "name":      "PDP price monitor",
  "objective": "Extract price, currency, and availability for each PDP URL",
  "execution_type": "scheduled",
  "urls": []string{
    "https://shop.example.com/p/100",
    "https://shop.example.com/p/200",
  },
  "action_type":     "scrape",
  "input_source":    "static_urls",
  "template_slug":   "commerce-pdp",
  "output_schema": map[string]any{
    "type": "object",
    "properties": map[string]any{
      "price":    map[string]any{"type": "number"},
      "currency": map[string]any{"type": "string"},
      "in_stock": map[string]any{"type": "boolean"},
    },
    "required": []string{"price", "currency", "in_stock"},
  },
  "schedule": map[string]any{
    "cron": "15 */6 * * *", "timezone": "UTC",
  },
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("POST", os.Getenv("TITAN_API_URL")+"/api/v1/tasks", bytes.NewReader(b))
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))

const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const res = await fetch(`${base}/api/v1/tasks`, {
  method: "POST",
  headers: {
    Authorization: `Bearer ${token}`,
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    name: "PDP price monitor",
    objective: "Extract price, currency, and availability for each PDP URL",
    execution_type: "scheduled",
    urls: ["https://shop.example.com/p/100", "https://shop.example.com/p/200"],
    action_type: "scrape",
    input_source: "static_urls",
    template_slug: "commerce-pdp",
    output_schema: {
      type: "object",
      properties: {
        price: { type: "number" },
        currency: { type: "string" },
        in_stock: { type: "boolean" },
      },
      required: ["price", "currency", "in_stock"],
    },
    schedule: { cron: "15 */6 * * *", timezone: "UTC" },
  }),
});
console.log(await res.text());

import json
import os
import urllib.request

payload = {
    "name": "PDP price monitor",
    "objective": "Extract price, currency, and availability for each PDP URL",
    "execution_type": "scheduled",
    "urls": [
        "https://shop.example.com/p/100",
        "https://shop.example.com/p/200",
    ],
    "action_type": "scrape",
    "input_source": "static_urls",
    "template_slug": "commerce-pdp",
    "output_schema": {
        "type": "object",
        "properties": {
            "price": {"type": "number"},
            "currency": {"type": "string"},
            "in_stock": {"type": "boolean"},
        },
        "required": ["price", "currency", "in_stock"],
    },
    "schedule": {"cron": "15 */6 * * *", "timezone": "UTC"},
}
req = urllib.request.Request(
    f"{os.environ['TITAN_API_URL']}/api/v1/tasks",
    data=json.dumps(payload).encode(),
    headers={
        "Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
        "Content-Type": "application/json",
    },
    method="POST",
)
with urllib.request.urlopen(req) as resp:
    print(resp.read().decode())

use serde_json::json;

let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let body = json!({
    "name": "PDP price monitor",
    "objective": "Extract price, currency, and availability for each PDP URL",
    "execution_type": "scheduled",
    "urls": [
        "https://shop.example.com/p/100",
        "https://shop.example.com/p/200"
    ],
    "action_type": "scrape",
    "input_source": "static_urls",
    "template_slug": "commerce-pdp",
    "output_schema": {
        "type": "object",
        "properties": {
            "price": { "type": "number" },
            "currency": { "type": "string" },
            "in_stock": { "type": "boolean" }
        },
        "required": ["price", "currency", "in_stock"]
    },
    "schedule": { "cron": "15 */6 * * *", "timezone": "UTC" }
});
let resp = ureq::post(format!("{base}/api/v1/tasks"))
    .set("Authorization", &format!("Bearer {token}"))
    .set("Content-Type", "application/json")
    .send_json(body)
    .expect("create");
println!("{}", resp.into_string().expect("body"));

Consume output from a prior step

When input_source is previous_step, the scrape step belongs inside an execution_plan; the previous step must emit URLs your scrape template understands.

{
  "step_id": "extract_pdps",
  "action_type": "scrape",
  "input_source": "previous_step",
  "template_slug": "commerce-pdp",
  "output_schema": {
    "type": "object",
    "properties": {
      "price": { "type": "number" }
    }
  }
}

Run with execution-time secrets

Do not send secret_payload on create. Pass sensitive material when you trigger the run:

cURL
Go
TypeScript
Python
Rust

curl -sS -X POST "$TITAN_API_URL/api/v1/tasks/$TASK_ID/run" \
  -H "Authorization: Bearer $TITAN_TOKEN" \
  -H "Content-Type: application/json" \
  -d '{
    "secret_payload": {
      "vendor_session": "read-from-vault-at-runtime"
    }
  }'

body := map[string]any{
  "secret_payload": map[string]string{"vendor_session": "read-from-vault-at-runtime"},
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest(
  "POST",
  fmt.Sprintf("%s/api/v1/tasks/%s/run", os.Getenv("TITAN_API_URL"), os.Getenv("TASK_ID")),
  bytes.NewReader(b),
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))

const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const taskId = process.env.TASK_ID!;
const res = await fetch(`${base}/api/v1/tasks/${taskId}/run`, {
  method: "POST",
  headers: {
    Authorization: `Bearer ${token}`,
    "Content-Type": "application/json",
  },
  body: JSON.stringify({
    secret_payload: { vendor_session: "read-from-vault-at-runtime" },
  }),
});
console.log(await res.text());

import json
import os
import urllib.request

payload = {"secret_payload": {"vendor_session": "read-from-vault-at-runtime"}}
tid = os.environ["TASK_ID"]
req = urllib.request.Request(
    f"{os.environ['TITAN_API_URL']}/api/v1/tasks/{tid}/run",
    data=json.dumps(payload).encode(),
    headers={
        "Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
        "Content-Type": "application/json",
    },
    method="POST",
)
with urllib.request.urlopen(req) as resp:
    print(resp.read().decode())

use serde_json::json;

let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let task_id = std::env::var("TASK_ID").expect("TASK_ID");
let body = json!({
    "secret_payload": { "vendor_session": "read-from-vault-at-runtime" }
});
let resp = ureq::post(format!("{base}/api/v1/tasks/{task_id}/run"))
    .set("Authorization", &format!("Bearer {token}"))
    .set("Content-Type", "application/json")
    .send_json(body)
    .expect("run");
println!("{}", resp.into_string().expect("body"));

Fetch results (after execution completes)

cURL
Go
TypeScript
Python
Rust

curl -sS "$TITAN_API_URL/api/v1/executions/$EXECUTION_ID/results" \
  -H "Authorization: Bearer $TITAN_TOKEN"

req, _ := http.NewRequest(
  "GET",
  fmt.Sprintf("%s/api/v1/executions/%s/results", os.Getenv("TITAN_API_URL"), os.Getenv("EXECUTION_ID")),
  nil,
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))

const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const executionId = process.env.EXECUTION_ID!;
const res = await fetch(`${base}/api/v1/executions/${executionId}/results`, {
  headers: { Authorization: `Bearer ${token}` },
});
console.log(await res.text());

import os
import urllib.request

eid = os.environ["EXECUTION_ID"]
req = urllib.request.Request(
    f"{os.environ['TITAN_API_URL']}/api/v1/executions/{eid}/results",
    headers={"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}"},
    method="GET",
)
with urllib.request.urlopen(req) as resp:
    print(resp.read().decode())

let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let execution_id = std::env::var("EXECUTION_ID").expect("EXECUTION_ID");
let resp = ureq::get(format!("{base}/api/v1/executions/{execution_id}/results"))
    .set("Authorization", &format!("Bearer {token}"))
    .call()
    .expect("results");
println!("{}", resp.into_string().expect("body"));

Use the OpenAPI Task Service pages for export and media download options.

Get started

Use cases

Action types

About the platform

Use the platform

When to use `scrape`

Single-action example (`POST /api/v1/tasks`)

Consume output from a prior step

Run with execution-time secrets

Fetch results (after execution completes)

Get started

Use cases

Action types

About the platform

Use the platform

Documentation Index

​When to use scrape

​Single-action example (POST /api/v1/tasks)

​Consume output from a prior step

​Run with execution-time secrets

​Fetch results (after execution completes)

​Related topics

When to use `scrape`

Single-action example (`POST /api/v1/tasks`)

Consume output from a prior step

Run with execution-time secrets

Fetch results (after execution completes)

Related topics