curl --request POST \
--url https://gateway.webit.live/v1/crawl \
--header 'Content-Type: application/json' \
--data '
{
"url": "https://example.com",
"name": "The best crawl ever",
"sitemap": "include",
"crawl_entire_domain": false,
"limit": 100,
"max_discovery_depth": 3,
"exclude_paths": [
"/exclude-this-path",
"/and-this-path"
],
"include_paths": [
"/include-this-path",
"/and-this-path"
],
"ignore_query_parameters": false,
"allow_external_links": false,
"allow_subdomains": false,
"callback": {
"url": "https://example.com/webhook",
"headers": {
"X-Custom-Header": "value"
},
"metadata": {
"crawlId": "12345"
},
"events": [
"page"
]
},
"extract_options": {
"debug_options": {
"collect_har": true,
"record_screen": true,
"verbose": true,
"trace": true,
"no_retry_mode": true,
"upload_engine_logs": true,
"solve_captcha": true,
"show_cursor": true,
"with_proxy_usage": true,
"redact": true
},
"url": "https://example.com/page",
"cookies": [],
"parse_options": {
"merge_dynamic": true
},
"parse": true,
"dynamic_parser": {
"myParser": {
"option1": "value1"
}
},
"parser": {
"myParser": {
"option1": "value1"
}
},
"type": "generic",
"method": "GET",
"referrer_type": "no-referrer",
"expected_status_codes": [
200,
201
],
"headers": {
"User-Agent": "CustomBot/1.0",
"Accept-Language": "en-US"
},
"raw_headers": true,
"request_timeout": 30000,
"client_timeout": 25000,
"return_response_headers_as_header": true,
"format": "json",
"skill": "dynamic-content",
"http2": true,
"ip6": false,
"is_xhr": true,
"no_html": false,
"export_userbrowser": false,
"save_userbrowser": false,
"native_mode": "requester",
"driver": "vx8",
"disable_ip_check": false,
"template": {
"name": "<string>",
"params": {}
},
"markdown": false,
"consent_header": true,
"skip_ubct": false,
"userbrowser_creation_template_rendered": {
"id": "<string>",
"allowed_parameter_names": [
"<string>"
],
"render_flow_rendered": [
{}
]
},
"query_template": {
"id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"api_type": "WEB",
"params": {},
"pagination": {
"next_page_params": {}
}
},
"render": true,
"render_options": {
"wait_until": "networkidle2",
"render_type": "load",
"headless": true,
"timeout": 30000,
"userbrowser": true,
"connector_type": "webit-cdp",
"hackium_configuration": {
"collect_logs": false,
"enable_verbose_logs": false,
"enable_sniffer": false,
"do_not_fix_math_salt": false,
"enable_document_element_spoof": false,
"enable_key_ordering": false,
"enable_document_has_focus": false,
"enable_fake_navigation_history": false
},
"include_iframes": true,
"browser_engine": "chrome",
"fingerprint_id": "fp-abc123",
"disabled_resources": [
"image",
"stylesheet"
],
"adblock": true,
"cache": false,
"blocked_domains": [
"ads.example.com",
"tracker.com"
],
"with_performance_metrics": true,
"no_accept_encoding": true,
"override_permissions": true,
"store_local_storage": true,
"load_local_storage": true,
"local_storage_keys_to_load": [
"authToken",
"userId"
],
"enable_2captcha": true,
"mouse_strategy": "linear",
"typing_strategy": "simple",
"typing_interval": 100,
"random_header_order": true,
"extensions": [
"extension-id-1",
"extension-id-2"
]
},
"network_capture": [
{
"status_code": 349.5,
"method": "GET",
"url": {
"value": "<string>",
"type": "exact"
},
"resource_type": [
"document",
"script",
"xhr",
"fetch"
],
"validation": false,
"wait_for_requests_count": 0,
"wait_for_requests_count_timeout": 150000
}
],
"render_flow": [
{
"wait": {
"delay": 2000
}
},
{
"click": {
"selector": "#load-more",
"timeout": 5000
}
}
],
"session": {
"id": "<string>",
"timeout": 1,
"retry": false,
"prefetch_userbrowser": false
},
"tag": "campaign-2024-q1",
"metadata": {
"source": "web-app",
"pipeline_execution_id": 12345,
"execution_id": "exec-abc123",
"endpoint": "/api/v2/scrape",
"definition_id": 456,
"definition_name": "product-scraper",
"template_id": 789,
"template_name": "e-commerce-template",
"account_name": "acme-corp",
"flowit_task_id": "task-xyz789",
"input_id": "input-123",
"query_template_id": "template-qry-001"
},
"locale": "en-US",
"country": "US",
"device": "desktop",
"proxy_provider": "brightdata",
"proxy_providers": {
"brightdata": 70,
"oxylabs": 30
},
"browser": "chrome",
"os": "windows",
"no_userbrowser": false,
"state": "CA",
"city": "Los Angeles"
}
}
'{
"id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"url": "<string>"
}curl --request POST \
--url https://gateway.webit.live/v1/crawl \
--header 'Content-Type: application/json' \
--data '
{
"url": "https://example.com",
"name": "The best crawl ever",
"sitemap": "include",
"crawl_entire_domain": false,
"limit": 100,
"max_discovery_depth": 3,
"exclude_paths": [
"/exclude-this-path",
"/and-this-path"
],
"include_paths": [
"/include-this-path",
"/and-this-path"
],
"ignore_query_parameters": false,
"allow_external_links": false,
"allow_subdomains": false,
"callback": {
"url": "https://example.com/webhook",
"headers": {
"X-Custom-Header": "value"
},
"metadata": {
"crawlId": "12345"
},
"events": [
"page"
]
},
"extract_options": {
"debug_options": {
"collect_har": true,
"record_screen": true,
"verbose": true,
"trace": true,
"no_retry_mode": true,
"upload_engine_logs": true,
"solve_captcha": true,
"show_cursor": true,
"with_proxy_usage": true,
"redact": true
},
"url": "https://example.com/page",
"cookies": [],
"parse_options": {
"merge_dynamic": true
},
"parse": true,
"dynamic_parser": {
"myParser": {
"option1": "value1"
}
},
"parser": {
"myParser": {
"option1": "value1"
}
},
"type": "generic",
"method": "GET",
"referrer_type": "no-referrer",
"expected_status_codes": [
200,
201
],
"headers": {
"User-Agent": "CustomBot/1.0",
"Accept-Language": "en-US"
},
"raw_headers": true,
"request_timeout": 30000,
"client_timeout": 25000,
"return_response_headers_as_header": true,
"format": "json",
"skill": "dynamic-content",
"http2": true,
"ip6": false,
"is_xhr": true,
"no_html": false,
"export_userbrowser": false,
"save_userbrowser": false,
"native_mode": "requester",
"driver": "vx8",
"disable_ip_check": false,
"template": {
"name": "<string>",
"params": {}
},
"markdown": false,
"consent_header": true,
"skip_ubct": false,
"userbrowser_creation_template_rendered": {
"id": "<string>",
"allowed_parameter_names": [
"<string>"
],
"render_flow_rendered": [
{}
]
},
"query_template": {
"id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"api_type": "WEB",
"params": {},
"pagination": {
"next_page_params": {}
}
},
"render": true,
"render_options": {
"wait_until": "networkidle2",
"render_type": "load",
"headless": true,
"timeout": 30000,
"userbrowser": true,
"connector_type": "webit-cdp",
"hackium_configuration": {
"collect_logs": false,
"enable_verbose_logs": false,
"enable_sniffer": false,
"do_not_fix_math_salt": false,
"enable_document_element_spoof": false,
"enable_key_ordering": false,
"enable_document_has_focus": false,
"enable_fake_navigation_history": false
},
"include_iframes": true,
"browser_engine": "chrome",
"fingerprint_id": "fp-abc123",
"disabled_resources": [
"image",
"stylesheet"
],
"adblock": true,
"cache": false,
"blocked_domains": [
"ads.example.com",
"tracker.com"
],
"with_performance_metrics": true,
"no_accept_encoding": true,
"override_permissions": true,
"store_local_storage": true,
"load_local_storage": true,
"local_storage_keys_to_load": [
"authToken",
"userId"
],
"enable_2captcha": true,
"mouse_strategy": "linear",
"typing_strategy": "simple",
"typing_interval": 100,
"random_header_order": true,
"extensions": [
"extension-id-1",
"extension-id-2"
]
},
"network_capture": [
{
"status_code": 349.5,
"method": "GET",
"url": {
"value": "<string>",
"type": "exact"
},
"resource_type": [
"document",
"script",
"xhr",
"fetch"
],
"validation": false,
"wait_for_requests_count": 0,
"wait_for_requests_count_timeout": 150000
}
],
"render_flow": [
{
"wait": {
"delay": 2000
}
},
{
"click": {
"selector": "#load-more",
"timeout": 5000
}
}
],
"session": {
"id": "<string>",
"timeout": 1,
"retry": false,
"prefetch_userbrowser": false
},
"tag": "campaign-2024-q1",
"metadata": {
"source": "web-app",
"pipeline_execution_id": 12345,
"execution_id": "exec-abc123",
"endpoint": "/api/v2/scrape",
"definition_id": 456,
"definition_name": "product-scraper",
"template_id": 789,
"template_name": "e-commerce-template",
"account_name": "acme-corp",
"flowit_task_id": "task-xyz789",
"input_id": "input-123",
"query_template_id": "template-qry-001"
},
"locale": "en-US",
"country": "US",
"device": "desktop",
"proxy_provider": "brightdata",
"proxy_providers": {
"brightdata": 70,
"oxylabs": 30
},
"browser": "chrome",
"os": "windows",
"no_userbrowser": false,
"state": "CA",
"city": "Los Angeles"
}
}
'{
"id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
"url": "<string>"
}Url to crawl.
"https://example.com"
Name of the crawl.
"The best crawl ever"
Sitemap and other methods will be used together to find URLs.
skip, include, only "include"
Allows the crawler to follow internal links to sibling or parent URLs, not just child paths.
false
Maximum number of pages to crawl.
1 <= x <= 10000100
Maximum depth to crawl based on discovery order.
1 <= x <= 203
URL pathname regex patterns that exclude matching URLs from the crawl.
["/exclude-this-path", "/and-this-path"]URL pathname regex patterns that include matching URLs in the crawl.
["/include-this-path", "/and-this-path"]Do not re-scrape the same path with different (or none) query parameters.
false
Allows the crawler to follow links to external websites.
false
Allows the crawler to follow links to subdomains of the main domain.
false
Webhook configuration for receiving crawl results.
Show child attributes
Show child attributes
Was this page helpful?