Skip to main content

Amazon

Setup requirements

pip install httpx lxml playwright

Collect products from amazon

This example will show you how to collect any product prices through Amazon search. You can use persistent or one time profile for this purpose, but one time profile is better suitable as soon as you don't need to save profile's state between starts.

import os

import httpx
from lxml import html
from playwright.async_api import async_playwright

API_TOKEN = os.environ['API_TOKEN']
PROXY = os.environ['PROXY']

PRODUCT = os.environ['PRODUCT']


async def start_one_time_browser():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post('/profiles/one_time', json={'proxy': PROXY})
browser_data_resp.raise_for_status()

return browser_data_resp.json()


async def main():
browser_data = await start_one_time_browser()

cdp_url = browser_data['ws_url']
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
page = await browser.new_page()

await page.goto(f'https://amazon.com/s?k={PRODUCT}')
content = await page.content()

document = html.fromstring(content)
prices = document.xpath('//span[@class="a-price"]/span/text()')
print(f'Prices: {prices}')

next_page_link = page.locator('//a[contains(@class, "s-pagination-next")]')
await next_page_link.click()