Web scraping with Javascript rendering

Getting started

Simply send a GET request to http://chrome-v2.browsercloud.io with two query string parameters and the API will return the HTML response:

info

Make sure you do URL encoding the address in &url= parameter to avoid conflict with other parameters in GET request. You can use services like https://www.urlencoder.org

NOTE: the value of &url= parameter is encoded (see info block above)

curl 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&url=https%3A%2F%2Fsite.com'

import requests

params = {
    'token': 'API_TOKEN',
    'url': 'https://browsercloud.io/doc-examples/content.html',
}

res = requests.get('https://chrome-v2.browsercloud.io/content', params)

if res:
    print('Response OK')
    print(res.text)
else:
    print('Response Failed')

//npm i axios
const axios = require('axios');

doRequest({
    token: 'API_TOKEN', //required
    url: 'https://browsercloud.io/doc-examples/content.html' //required
});

async function doRequest(params) {
    try {
        let res = await axios.get('https://chrome-v2.browsercloud.io/content', {params : params});
        console.log(res.data);
    } catch (error) {
        console.log(error);
    }
}

<?php

echo doRequest([
    'token' => 'API_TOKEN',
    'url' => 'https://browsercloud.io/doc-examples/content.html'
]);

function doRequest($params) {
    $url = 'https://chrome-v2.browsercloud.io/content?'. http_build_query($params);

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_URL, $url);
    $response = curl_exec($ch);

    if (!$response) {
        echo "Error: ". curl_error($ch). "; Code: ". curl_errno($ch);
    }

    return $response;
}

package main

import (
	"fmt"
	"io/ioutil"
	"net/http"
	"net/url"
)

func main() {
	res, err := doRequest(url.Values{
		"token": {"API_TOKEN"},                                         // required
		"url":   {"https://browsercloud.io/doc-examples/content.html"}, // required
	})

	if err != nil {
		fmt.Printf("Error: %v", err)
	}

	fmt.Printf("response: %s", res)
}

func doRequest(params url.Values) ([]byte, error) {
	req, err := http.NewRequest("GET", "https://chrome-v2.browsercloud.io/content", nil)
	req.URL.RawQuery = params.Encode()

	res, err := http.DefaultClient.Do(req)
	if err != nil {
		return nil, err
	}

	body, err := ioutil.ReadAll(res.Body)
	if err != nil {
		return nil, err
	}

	return body, nil
}

import java.net.URI;
import java.net.http.*;

public class Main {
    public static void main(String[] args) {
        sendRequest(
                "https://chrome-v2.browsercloud.io/content?"+
                "token=API_TOKEN"+
                "&url=https://browsercloud.io/doc-examples/content.html"
        );
    }

    private static void sendRequest(String url) {
        try {
            var client = HttpClient.newHttpClient();
            var request = HttpRequest.newBuilder(
                    URI.create(url))
                    .build();
            var response = client.send(request, HttpResponse.BodyHandlers.ofString());

            System.out.println(response.body());
        }
        catch (Exception e) { System.out.println(e); }
    }
}

Parameter	Available values	Description
`url`	string, required. Example : https://browsercloud.io	site URL you would like to scrape
`token`	string, required

Proxies & GEO targeting

Our residential proxy pool includes over 70 million IPs from 195 countries from over dozens of ISPs and should be sufficient for the vast majority of scraping jobs.

Proxy behavior is set by parameters proxy and proxyCountry

curl --location 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&proxy=datacenter&proxyCountry=GB&url=https://api.myip.com'

Parameter	Available values	Description
`proxy`	1) `datacenter` - datacenter proxy pool with 70mln of fast proxies 2) `residential` - premium proxy pool for a few particularly difficult to scrape sites 3) omit parameter to use direct connection from our servers	Proxy pool type
`proxyCountry`	parameter works with `proxy` parameter 1) two-letter country ISO code. Example: `US`, `CA`, `GB`, `DE` and more 2) omit parameter or set `ALL` for global rotating	Proxy geo targeting

Wait for Element when rendering

If a rendered request is a bit slow and the page stabilizes before the request is satisfied, it can fool the API into thinking the page has finished rendering.

To cope with this, you can tell the API to wait for a dom element (selector) to appear on the page when rendering. You just need to send the wait-for parameter, passing a URL-encoded jQuery selector.

The API will then wait for this to appear on the page before returning results.

curl 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&url=https://site.com&wait-for=%23ajax-content'

Parameter	Available values	Description
`wait-for`	string, Example: %23ajax-content (%23 is # symbol)	URL-encoded selector. Requires JS rendering

Javascript execution

info

We recommend using the Javascript Scenario endpoint for the advanced approach of interaction with a web page like form submitting, typing, clicks, and more

You can pass your custom Javascript code to run in the browser context using js_snippet param, and it will be executed after the page load will finish.

Custom javascript can be used for interaction with a page, like scrolling, pressing a button, etc.

curl -X POST \
  https://chrome-v2.browsercloud.io/content?token=API_TOKEN \
  -H 'Cache-Control: no-cache' \
  -H 'Content-Type: application/json' \
  -d '
{
    "url": "https://browsercloud.io/doc-examples/content.html",
    "addScriptTag" : [
        {
            "content" : "let node = document.querySelector(\"#header-2\"); node.textContent = \"My custom JS did it!\""
        }
    ]
}'

Additional parameters

Example with all available JSON options

curl -X POST \
  https://chrome-v2.browsercloud.io/content?token=API_TOKEN \
  -H 'Cache-Control: no-cache' \
  -H 'Content-Type: application/json' \
  -d '
{
    "url": "https://browsercloud.io/doc-examples/content.html",
    "addScriptTag" : [
        {
            "content" : "let node = document.querySelector(\"#header-2\"); node.textContent = \"My custom JS did it!\""
        }
    ],
    "setJavaScriptEnabled" : true,
    "waitFor" : "#delayed",
    "userAgent" : "Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.2 Mobile/15E148 Safari/604.1",
    "rejectResourceTypes" : ["image"],
    "authenticate" : {
        "username" : "test",
        "password" : "test"
    },
    "cookies" : [
        {
            "name" : "session",
            "value" : "session-value",
            "domain" : "browsercloud.io"
        }
    ]
}'

Parameters	Available values	Description
url	string	URL for web scraping
setJavaScriptEnabled	true(default), false - javascript rendering	Javascript rendering
waitFor	string	Script waits certain DOM element to be rendered
addScriptTag.content	string - js code	Adds custom <script> tag to the page
userAgent	string	sets custom UserAgent for a web scraper
rejectResourceTypes	string: 'document','stylesheet','image','media', 'font','script','texttrack','xhr', 'fetch','eventsource','websocket','manifest','other'	Blocks unnecessary resource type to boost page load
authenticate	username, password: string	Basic auth
cookies	string	Custom cookies (for example: auth session)

Web scraping with Javascript rendering

Getting started​

Proxies & GEO targeting​

Wait for Element when rendering​

Javascript execution​

Additional parameters​

Getting started

Proxies & GEO targeting

Wait for Element when rendering

Javascript execution

Additional parameters