Amazon
Setup requirements
- Python
- Javascript
pip install httpx lxml playwright
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm i axios playwright node-html-parser
Collect products from amazon
This example will show you how to collect any product prices through Amazon search. You can use persistent or one time profile for this purpose, but one time profile is better suitable as soon as you don't need to save profile's state between starts.
- Python
- Javascript
import os
import httpx
from lxml import html
from playwright.async_api import async_playwright
API_TOKEN = os.environ['API_TOKEN']
PROXY = os.environ['PROXY']
PRODUCT = os.environ['PRODUCT']
async def start_one_time_browser():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post('/profiles/one_time', json={'proxy': PROXY})
browser_data_resp.raise_for_status()
return browser_data_resp.json()
async def main():
browser_data = await start_one_time_browser()
cdp_url = browser_data['ws_url']
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
page = await browser.new_page()
await page.goto(f'https://amazon.com/s?k={PRODUCT}')
content = await page.content()
document = html.fromstring(content)
prices = document.xpath('//span[@class="a-price"]/span/text()')
print(f'Prices: {prices}')
next_page_link = page.locator('//a[contains(@class, "s-pagination-next")]')
await next_page_link.click()
const axios = require('axios')
const { chromium } = require('playwright')
import { parse } from 'node-html-parser';
const API_TOKEN = process.env.API_TOKEN
const PROXY = process.env.PROXY
const PRODUCT = process.env.PRODUCT
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startOneTimeProfile() {
const response = await SURFSKY_API.post(
'/profiles/one_time',
{ proxy: PROXY },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
async function main() {
const browser_data = await startOneTimeProfile()
const cdp_url = browser_data.ws_url
const browser = await chromium.connectOverCDP(cdp_url)
const page = await browser.newPage()
await page.goto(`https://amazon.com/s?k=${PRODUCT}`)
const root = parse(await page.content())
const prices = root.querySelectorAll('span.a-price > span.a-offscreen').map(el => el.innerText)
const nextPageLink = page.locator('//a[contains(@class, "s-pagination-next")]')
await nextPageLink.click()
}