Documentation Index
Fetch the complete documentation index at: https://webscraping.titannet.io/docs/llms.txt
Use this file to discover all available pages before exploring further.
The scrape action type is the closest analogue to classic scraping: you supply concrete URLs (or URLs from previous_step / task_url_inventory) and an output_schema, and workers return structured records (and media when the workflow and template support it). This is the right primitive when you already know which pages must become rows in your dataset.
Examples use TITAN_API_URL and TITAN_TOKEN. Tab titles match other integration pages. Rust: ureq + serde_json.
When to use scrape
- You have a stable URL list (product pages, filings, dashboards).
- You need schema-shaped JSON for downstream analytics, RAG chunks, or alerts.
- An upstream
search or crawl step produced the URLs you want to read deeply.
Single-action example (POST /api/v1/tasks)
schedule must satisfy validation for execution_type: scheduled in your environment. If you only need a one-off run, use execution_type: single and omit schedule.
cURL
Go
TypeScript
Python
Rust
curl -sS -X POST "$TITAN_API_URL/api/v1/tasks" \
-H "Authorization: Bearer $TITAN_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"name": "PDP price monitor",
"objective": "Extract price, currency, and availability for each PDP URL",
"execution_type": "scheduled",
"urls": [
"https://shop.example.com/p/100",
"https://shop.example.com/p/200"
],
"action_type": "scrape",
"input_source": "static_urls",
"template_slug": "commerce-pdp",
"output_schema": {
"type": "object",
"properties": {
"price": { "type": "number" },
"currency": { "type": "string" },
"in_stock": { "type": "boolean" }
},
"required": ["price", "currency", "in_stock"]
},
"schedule": {
"cron": "15 */6 * * *",
"timezone": "UTC"
}
}'
body := map[string]any{
"name": "PDP price monitor",
"objective": "Extract price, currency, and availability for each PDP URL",
"execution_type": "scheduled",
"urls": []string{
"https://shop.example.com/p/100",
"https://shop.example.com/p/200",
},
"action_type": "scrape",
"input_source": "static_urls",
"template_slug": "commerce-pdp",
"output_schema": map[string]any{
"type": "object",
"properties": map[string]any{
"price": map[string]any{"type": "number"},
"currency": map[string]any{"type": "string"},
"in_stock": map[string]any{"type": "boolean"},
},
"required": []string{"price", "currency", "in_stock"},
},
"schedule": map[string]any{
"cron": "15 */6 * * *", "timezone": "UTC",
},
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest("POST", os.Getenv("TITAN_API_URL")+"/api/v1/tasks", bytes.NewReader(b))
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))
const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const res = await fetch(`${base}/api/v1/tasks`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
name: "PDP price monitor",
objective: "Extract price, currency, and availability for each PDP URL",
execution_type: "scheduled",
urls: ["https://shop.example.com/p/100", "https://shop.example.com/p/200"],
action_type: "scrape",
input_source: "static_urls",
template_slug: "commerce-pdp",
output_schema: {
type: "object",
properties: {
price: { type: "number" },
currency: { type: "string" },
in_stock: { type: "boolean" },
},
required: ["price", "currency", "in_stock"],
},
schedule: { cron: "15 */6 * * *", timezone: "UTC" },
}),
});
console.log(await res.text());
import json
import os
import urllib.request
payload = {
"name": "PDP price monitor",
"objective": "Extract price, currency, and availability for each PDP URL",
"execution_type": "scheduled",
"urls": [
"https://shop.example.com/p/100",
"https://shop.example.com/p/200",
],
"action_type": "scrape",
"input_source": "static_urls",
"template_slug": "commerce-pdp",
"output_schema": {
"type": "object",
"properties": {
"price": {"type": "number"},
"currency": {"type": "string"},
"in_stock": {"type": "boolean"},
},
"required": ["price", "currency", "in_stock"],
},
"schedule": {"cron": "15 */6 * * *", "timezone": "UTC"},
}
req = urllib.request.Request(
f"{os.environ['TITAN_API_URL']}/api/v1/tasks",
data=json.dumps(payload).encode(),
headers={
"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
"Content-Type": "application/json",
},
method="POST",
)
with urllib.request.urlopen(req) as resp:
print(resp.read().decode())
use serde_json::json;
let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let body = json!({
"name": "PDP price monitor",
"objective": "Extract price, currency, and availability for each PDP URL",
"execution_type": "scheduled",
"urls": [
"https://shop.example.com/p/100",
"https://shop.example.com/p/200"
],
"action_type": "scrape",
"input_source": "static_urls",
"template_slug": "commerce-pdp",
"output_schema": {
"type": "object",
"properties": {
"price": { "type": "number" },
"currency": { "type": "string" },
"in_stock": { "type": "boolean" }
},
"required": ["price", "currency", "in_stock"]
},
"schedule": { "cron": "15 */6 * * *", "timezone": "UTC" }
});
let resp = ureq::post(format!("{base}/api/v1/tasks"))
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(body)
.expect("create");
println!("{}", resp.into_string().expect("body"));
Consume output from a prior step
When input_source is previous_step, the scrape step belongs inside an execution_plan; the previous step must emit URLs your scrape template understands.
{
"step_id": "extract_pdps",
"action_type": "scrape",
"input_source": "previous_step",
"template_slug": "commerce-pdp",
"output_schema": {
"type": "object",
"properties": {
"price": { "type": "number" }
}
}
}
Run with execution-time secrets
Do not send secret_payload on create. Pass sensitive material when you trigger the run:
cURL
Go
TypeScript
Python
Rust
curl -sS -X POST "$TITAN_API_URL/api/v1/tasks/$TASK_ID/run" \
-H "Authorization: Bearer $TITAN_TOKEN" \
-H "Content-Type: application/json" \
-d '{
"secret_payload": {
"vendor_session": "read-from-vault-at-runtime"
}
}'
body := map[string]any{
"secret_payload": map[string]string{"vendor_session": "read-from-vault-at-runtime"},
}
b, _ := json.Marshal(body)
req, _ := http.NewRequest(
"POST",
fmt.Sprintf("%s/api/v1/tasks/%s/run", os.Getenv("TITAN_API_URL"), os.Getenv("TASK_ID")),
bytes.NewReader(b),
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
req.Header.Set("Content-Type", "application/json")
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))
const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const taskId = process.env.TASK_ID!;
const res = await fetch(`${base}/api/v1/tasks/${taskId}/run`, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
secret_payload: { vendor_session: "read-from-vault-at-runtime" },
}),
});
console.log(await res.text());
import json
import os
import urllib.request
payload = {"secret_payload": {"vendor_session": "read-from-vault-at-runtime"}}
tid = os.environ["TASK_ID"]
req = urllib.request.Request(
f"{os.environ['TITAN_API_URL']}/api/v1/tasks/{tid}/run",
data=json.dumps(payload).encode(),
headers={
"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}",
"Content-Type": "application/json",
},
method="POST",
)
with urllib.request.urlopen(req) as resp:
print(resp.read().decode())
use serde_json::json;
let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let task_id = std::env::var("TASK_ID").expect("TASK_ID");
let body = json!({
"secret_payload": { "vendor_session": "read-from-vault-at-runtime" }
});
let resp = ureq::post(format!("{base}/api/v1/tasks/{task_id}/run"))
.set("Authorization", &format!("Bearer {token}"))
.set("Content-Type", "application/json")
.send_json(body)
.expect("run");
println!("{}", resp.into_string().expect("body"));
Fetch results (after execution completes)
cURL
Go
TypeScript
Python
Rust
curl -sS "$TITAN_API_URL/api/v1/executions/$EXECUTION_ID/results" \
-H "Authorization: Bearer $TITAN_TOKEN"
req, _ := http.NewRequest(
"GET",
fmt.Sprintf("%s/api/v1/executions/%s/results", os.Getenv("TITAN_API_URL"), os.Getenv("EXECUTION_ID")),
nil,
)
req.Header.Set("Authorization", "Bearer "+os.Getenv("TITAN_TOKEN"))
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
out, _ := io.ReadAll(resp.Body)
fmt.Println(string(out))
const base = process.env.TITAN_API_URL!;
const token = process.env.TITAN_TOKEN!;
const executionId = process.env.EXECUTION_ID!;
const res = await fetch(`${base}/api/v1/executions/${executionId}/results`, {
headers: { Authorization: `Bearer ${token}` },
});
console.log(await res.text());
import os
import urllib.request
eid = os.environ["EXECUTION_ID"]
req = urllib.request.Request(
f"{os.environ['TITAN_API_URL']}/api/v1/executions/{eid}/results",
headers={"Authorization": f"Bearer {os.environ['TITAN_TOKEN']}"},
method="GET",
)
with urllib.request.urlopen(req) as resp:
print(resp.read().decode())
let base = std::env::var("TITAN_API_URL").expect("TITAN_API_URL");
let token = std::env::var("TITAN_TOKEN").expect("TITAN_TOKEN");
let execution_id = std::env::var("EXECUTION_ID").expect("EXECUTION_ID");
let resp = ureq::get(format!("{base}/api/v1/executions/{execution_id}/results"))
.set("Authorization", &format!("Bearer {token}"))
.call()
.expect("results");
println!("{}", resp.into_string().expect("body"));
Use the OpenAPI Task Service pages for export and media download options.