Skip to content
Scrappa Get API key

Web Scraper

Fetch any URL and extract structured data including title, meta description, keywords, favicon, social links, all links on the page, emails, phone numbers, images, body text, and detected languages. Returns the site's HTTP status code separately so you can distinguish site errors (404, 500) from infrastructure errors. Use response_type=markdown to get only the page content as clean markdown.

Run this endpoint

Web Scraper 1 credit/request

Endpoint

GET https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0
Request preview GET
https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0
Auth header x-api-key
Cost 1 credit/request
url = https://example.com
Response preview 200 OK
{
    "success": true,
    "site_status_code": 200,
    "url": "https://example.com",
    "final_url": "https://example.com",
    "data": {
        "title": "Example Domain",
        "description": "This domain is for use in illustrative examples.",
        "keywords": [],
        "favicon": "https://example.com/favicon.ico",
        "social_links": {
            "linkedin": null,
            "twitter": null,
            "facebook": null,
...

Parameters

Start with the required fields, then add optional filters only when your use case needs them.

Runnable path

1 required parameter needed before sending a request.

2 optional filters available.

url string Required
Open example

The URL to scrape (must include protocol, e.g., https://example.com)

Example value https://example.com
include_html boolean Optional

Set to true to include the raw HTML in the response. Default: false. Ignored when response_type=markdown

Example value false
response_type string Optional

Set to "markdown" to return only the page content as clean markdown text. When set to markdown, only the markdown content is returned (not JSON). Default: json

Example value example

Request Examples

<?php

$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => "https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0",
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_ENCODING => "",
    CURLOPT_MAXREDIRS => 10,
    CURLOPT_TIMEOUT => 30,
    CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
    CURLOPT_CUSTOMREQUEST => "GET",
    CURLOPT_HTTPHEADER => [
        "x-api-key: YOUR_API_KEY_HERE"
    ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
    echo "cURL Error #:" . $err;
} else {
    echo $response;
}
<?php

use Illuminate\Support\Facades\Http;

$response = Http::timeout(30)
    ->withHeaders(['x-api-key' => 'YOUR_API_KEY_HERE'])
    ->get('https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0');

if ($response->successful()) {
    echo $response->body();
} else {
    echo "Error: " . $response->status();
}
const options = {
    method: 'GET',
    headers: {
        'x-api-key': 'YOUR_API_KEY_HERE'
    }
};

fetch('https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0', options)
    .then(response => {
        if (!response.ok) {
            throw new Error(`HTTP error! status: ${response.status}`);
        }
        return response.text();
    })
    .then(data => console.log(data))
    .catch(error => console.error('Error:', error));
const axios = require('axios');

const options = {
    method: 'GET',
    url: 'https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0',
    headers: {
        x-api-key: 'YOUR_API_KEY_HERE',
    }
};

try {
    const response = await axios(options);
    console.log(response.data);
} catch (error) {
    console.error('Error:', error.message);
}
require 'net/http'
require 'uri'

uri = URI.parse("https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0")
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = uri.scheme == 'https'

request = Net::HTTP::Get.new(uri.request_uri)
request['x-api-key'] = 'YOUR_API_KEY_HERE'

begin
    response = http.request(request)
    puts response.body
rescue => e
    puts "Error: #{e.message}"
end
import http.client
import json

conn = http.client.HTTPSConnection("scrappa.co")

headers = {
    'x-api-key': 'YOUR_API_KEY_HERE',
}

try:
    conn.request("GET", "/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0", headers=headers)
    res = conn.getresponse()
    data = res.read()
    print(data.decode("utf-8"))
except Exception as e:
    print(f"Error: {e}")
finally:
    conn.close()
import requests

headers = {
    'x-api-key': 'YOUR_API_KEY_HERE',
}

try:
    response = requests.get('https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0', headers=headers)
    response.raise_for_status()
    print(response.text)
except requests.exceptions.RequestException as e:
    print(f"Error: {e}")
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;

public class ApiExample {
    public static void main(String[] args) {
        OkHttpClient client = new OkHttpClient();

        Request request = new Request.Builder()
            .url("https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0")
        .addHeader("x-api-key", "YOUR_API_KEY_HERE")
            .build();

        try (Response response = client.newCall(request).execute()) {
            if (response.isSuccessful()) {
                System.out.println(response.body().string());
            } else {
                System.out.println("Error: " + response.code());
            }
        } catch (IOException e) {
            System.out.println("Error: " + e.getMessage());
        }
    }
}
package main

import (
    "fmt"
    "net/http"
    "io/ioutil"
)

func main() {
    client := &http.Client{}
    req, err := http.NewRequest("GET", "https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0", nil)
    if err != nil {
        fmt.Println("Error creating request:", err)
        return
    }
    req.Header.Set("x-api-key", "YOUR_API_KEY_HERE")

    resp, err := client.Do(req)
    if err != nil {
        fmt.Println("Error making request:", err)
        return
    }
    defer resp.Body.Close()

    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {
        fmt.Println("Error reading response:", err)
        return
    }

    fmt.Println(string(body))
}
#!/bin/bash

curl -X GET \
    -H "x-api-key: YOUR_API_KEY_HERE" \
    "https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0"
using System;
using System.Net.Http;
using System.Threading.Tasks;

class Program
{
    static async Task Main()
    {
        using var client = new HttpClient();
        client.DefaultRequestHeaders.Add("x-api-key", "YOUR_API_KEY_HERE");

        try
        {
            var response = await client.SendAsync(new HttpRequestMessage(HttpMethod.Get, "https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0"));
            var content = await response.Content.ReadAsStringAsync();
            Console.WriteLine(content);
        }
        catch (Exception ex)
        {
            Console.WriteLine($"Error: {ex.Message}");
        }
    }
}
import axios from 'axios';

async function run(): Promise<void> {
    try {
        const response = await axios({
            method: 'GET',
            url: 'https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0',
            headers: {
        'x-api-key': 'YOUR_API_KEY_HERE',
            },
        });

        console.log(response.data);
    } catch (error) {
        console.error('Error:', error);
    }
}

void run();
use reqwest::Client;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = Client::new();

    let response = client
        .get("https://scrappa.co/api/web-scraper?url=https%3A%2F%2Fexample.com&include_html=0")
        .header("x-api-key", "YOUR_API_KEY_HERE")
        .send()
        .await?;

    println!("{}", response.text().await?);

    Ok(())
}

Response Schema

Example response fields are illustrative; inspect the JSON before integrating.

Example response fields

Scan these fields before integrating.

success site_status_code url final_url data
JSON Response
200 OK
{
    "success": true,
    "site_status_code": 200,
    "url": "https://example.com",
    "final_url": "https://example.com",
    "data": {
        "title": "Example Domain",
        "description": "This domain is for use in illustrative examples.",
        "keywords": [],
        "favicon": "https://example.com/favicon.ico",
        "social_links": {
            "linkedin": null,
            "twitter": null,
            "facebook": null,
            "instagram": null,
            "youtube": null,
            "tiktok": null
        },
        "extracted_keywords": [
            "domain",
            "example",
            "illustrative"
        ],
        "links": [
            "https://www.iana.org/domains/example"
        ],
        "emails": [],
        "phone_numbers": [],
        "images": [],
        "body_text": "Example Domain This domain is for use in illustrative examples in documents.",
        "languages_detected": [
            "en"
        ],
        "html": null
    }
}

Errors

Handle these documented responses before retrying or showing customer-facing failures.

400

Invalid URL

The url parameter is not a valid absolute URL or domain.

{
    "success": false,
    "error": "Invalid URL format.",
    "error_code": "INVALID_URL"
}
422

Validation Error

Required parameters failed request validation.

{
    "message": "The request validation failed",
    "errors": {
        "url": [
            "The url parameter is required."
        ]
    }
}
502

Proxy Or SSL Failure

The proxy pool was exhausted or the target SSL connection failed. This is separate from the target site HTTP status code.

{
    "success": false,
    "error": "Proxy pool exhausted while scraping the URL.",
    "error_code": "PROXY_EXHAUSTED",
    "diagnostics": {
        "proxy_attempts": 3,
        "max_proxy_attempts": 3
    }
}
504

Connection Timeout

The target could not be fetched through the proxy layer before the scraper timeout.

{
    "success": false,
    "error": "Connection timed out while scraping the URL.",
    "error_code": "CONNECTION_TIMEOUT"
}

Generate Code with AI

Copy a ready-made prompt with all the endpoint details, parameters, and example responses. Paste it into ChatGPT, Claude, or any AI assistant to instantly generate working code.

Try It Live

Test this endpoint in our interactive playground with real data.