Web scraping with Javascript rendering
Getting started
Simply send a GET request to http://chrome-v2.browsercloud.io with two query string parameters and the API will return the HTML response:
- Curl
- Python
- NodeJS
- PHP
- Go
- Java
curl 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&url=https://site.com'
import requests
params = {
'token': 'API_TOKEN',
'url': 'https://browsercloud.io/doc-examples/content.html',
}
res = requests.get('https://chrome-v2.browsercloud.io/content', params)
if res:
print('Response OK')
print(res.text)
else:
print('Response Failed')
//npm i axios
const axios = require('axios');
doRequest({
token: 'API_TOKEN', //required
url: 'https://browsercloud.io/doc-examples/content.html' //required
});
async function doRequest(params) {
try {
let res = await axios.get('https://chrome-v2.browsercloud.io/content', {params : params});
console.log(res.data);
} catch (error) {
console.log(error);
}
}
<?php
echo doRequest([
'token' => 'API_TOKEN',
'url' => 'https://browsercloud.io/doc-examples/content.html'
]);
function doRequest($params) {
$url = 'https://chrome-v2.browsercloud.io/content?'. http_build_query($params);
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_URL, $url);
$response = curl_exec($ch);
if (!$response) {
echo "Error: ". curl_error($ch). "; Code: ". curl_errno($ch);
}
return $response;
}
package main
import (
"fmt"
"io/ioutil"
"net/http"
"net/url"
)
func main() {
res, err := doRequest(url.Values{
"token": {"API_TOKEN"}, // required
"url": {"https://browsercloud.io/doc-examples/content.html"}, // required
})
if err != nil {
fmt.Printf("Error: %v", err)
}
fmt.Printf("response: %s", res)
}
func doRequest(params url.Values) ([]byte, error) {
req, err := http.NewRequest("GET", "https://chrome-v2.browsercloud.io/content", nil)
req.URL.RawQuery = params.Encode()
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, err
}
return body, nil
}
import java.net.URI;
import java.net.http.*;
public class Main {
public static void main(String[] args) {
sendRequest(
"https://chrome-v2.browsercloud.io/content?"+
"token=API_TOKEN"+
"&url=https://browsercloud.io/doc-examples/content.html"
);
}
private static void sendRequest(String url) {
try {
var client = HttpClient.newHttpClient();
var request = HttpRequest.newBuilder(
URI.create(url))
.build();
var response = client.send(request, HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
catch (Exception e) { System.out.println(e); }
}
}
Parameter | Available values | Description |
---|---|---|
url | string, required. Example : https://browsercloud.io | site URL you would like to scrape |
token | string, required |
Proxies & GEO targeting
Our residential proxy pool includes over 70 million IPs from 195 countries from over dozens of ISPs and should be sufficient for the vast majority of scraping jobs.
Proxy behavior is set by parameters proxy
and proxyCountry
curl --location 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&proxy=datacenter&proxyCountry=GB&url=https://api.myip.com'
Parameter | Available values | Description |
---|---|---|
proxy | 1) datacenter - datacenter proxy pool with 70mln of fast proxies 2) residential - premium proxy pool for a few particularly difficult to scrape sites 3) omit parameter to use direct connection from our servers | Proxy pool type |
proxyCountry | parameter works with proxy parameter1) two-letter country ISO code. Example: US , CA , GB , DE and more2) omit parameter or set ALL for global rotating | Proxy geo targeting |
Wait for Element when rendering
If a rendered request is a bit slow and the page stabilizes before the request is satisfied, it can fool the API into thinking the page has finished rendering.
To cope with this, you can tell the API to wait for a dom element (selector) to appear on the page when rendering. You just need to send the wait-for
parameter, passing a URL-encoded jQuery selector.
The API will then wait for this to appear on the page before returning results.
curl 'https://chrome-v2.browsercloud.io/content?token=API_TOKEN&url=https://site.com&wait-for=%23ajax-content'
Parameter | Available values | Description |
---|---|---|
wait-for | string, Example: %23ajax-content (%23 is # symbol) | URL-encoded selector. Requires JS rendering |
Javascript execution
You can pass your custom Javascript code to run in the browser context using js_snippet param, and it will be executed after the page load will finish.
Custom javascript can be used for interaction with a page, like scrolling, pressing a button, etc.
curl -X POST \
https://chrome-v2.browsercloud.io/content?token=API_TOKEN \
-H 'Cache-Control: no-cache' \
-H 'Content-Type: application/json' \
-d '
{
"url": "https://browsercloud.io/doc-examples/content.html",
"addScriptTag" : [
{
"content" : "let node = document.querySelector(\"#header-2\"); node.textContent = \"My custom JS did it!\""
}
]
}'
Additional parameters
Example with all available JSON options
curl -X POST \
https://chrome-v2.browsercloud.io/content?token=API_TOKEN \
-H 'Cache-Control: no-cache' \
-H 'Content-Type: application/json' \
-d '
{
"url": "https://browsercloud.io/doc-examples/content.html",
"addScriptTag" : [
{
"content" : "let node = document.querySelector(\"#header-2\"); node.textContent = \"My custom JS did it!\""
}
],
"setJavaScriptEnabled" : true,
"waitFor" : "#delayed",
"userAgent" : "Mozilla/5.0 (iPhone; CPU iPhone OS 13_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.2 Mobile/15E148 Safari/604.1",
"rejectResourceTypes" : ["image"],
"authenticate" : {
"username" : "test",
"password" : "test"
},
"cookies" : [
{
"name" : "session",
"value" : "session-value",
"domain" : "browsercloud.io"
}
]
}'
Parameters | Available values | Description |
---|---|---|
url | string | URL for web scraping |
setJavaScriptEnabled | true(default), false - javascript rendering | Javascript rendering |
waitFor | string | Script waits certain DOM element to be rendered |
addScriptTag.content | string - js code | Adds custom <script> tag to the page |
userAgent | string | sets custom UserAgent for a web scraper |
rejectResourceTypes | string: 'document','stylesheet','image','media', 'font','script','texttrack','xhr', 'fetch','eventsource','websocket','manifest','other' | Blocks unnecessary resource type to boost page load |
authenticate | username, password: string | Basic auth |
cookies | string | Custom cookies (for example: auth session) |