Setup requirements
- Python
- Javascript
pip install httpx playwright
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm i axios playwright
Login to Instagram
There are two options to works with Surfsky cloud browser: one time profile and persistent profile.
One time profile doesn't save it's state on close, and it is perfect for scraping, but we will show you how it's work here too.
Persistent profile instead saves it's state on close, so you can stop it and continue to work on the next start with all previously opened pages and stored cookies.
After either one time or persistent profile start you will get CDP url to connect browser automation tool to browser. You can see start profile response on our api page
- Python
- Javascript
- One time profile
- Persistent profile
import os
import httpx
API_TOKEN = os.environ['API_TOKEN']
PROXY = os.environ['PROXY']
async def start_one_time_profile():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post('/profiles/one_time', json={'proxy': PROXY})
browser_data_resp.raise_for_status()
return browser_data_resp.json()
import os
import httpx
API_TOKEN = os.environ['API_TOKEN']
PROFILE_UUID = os.environ['PROFILE_UUID']
async def start_persistent_profile():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post(f'/profiles/{PROFILE_UUID}/start')
browser_data_resp.raise_for_status()
return browser_data_resp.json()
- One time profile
- Persistent profile
const axios = require('axios')
const API_TOKEN = process.env.API_TOKEN
const PROXY = process.env.PROXY
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startOneTimeProfile() {
const response = await SURFSKY_API.post(
`/profiles/one_time`,
{ proxy: PROXY },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
const axios = require('axios')
const API_TOKEN = process.env.API_TOKEN
const PROFILE_UUID = process.env.PROFILE_UUID
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startPersistentProfile() {
const response = await SURFSKY_API.post(
`/profiles/${PROFILE_UUID}/start`,
{ },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
After getting CDP url you can use any of automation tool as if it is browser running on your local machine.
There is function to log in to Instagram that accepts CDP url as an argument.
- Python
- Javascript
import asyncio
import os
from playwright.async_api import async_playwright
USERNAME = os.environ['USERNAME']
PASSWORD = os.environ['PASSWORD']
async def login_to_instagram(cdp_url):
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
page = await browser.new_page()
await page.goto('https://instagram.com')
username = page.locator('//input[@name="username"]')
password = page.locator('//input[@name="password"]')
await username.wait_for()
await username.fill(USERNAME)
await password.fill(PASSWORD)
login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.wait_for()
await login.click()
await asyncio.sleep(10)
await page.screenshot(path='instagram.jpg')
const { chromium } = require('playwright')
const USERNAME = process.env.USERNAME
const PASSWORD = process.env.PASSWORD
async function loginToInstagram(cdp_url) {
const browser = await chromium.connectOverCDP(cdp_url)
const page = await browser.newPage()
await page.goto('https://instagram.com')
const username = page.locator('//input[@name="username"]')
const password = page.locator('//input[@name="password"]')
await username.waitFor()
await username.fill(USERNAME)
await password.fill(PASSWORD)
const login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.waitFor()
await login.click()
}
Full example
- Python
- Javascript
- One time profile
- Persistent profile
import os
import httpx
from playwright.async_api import async_playwright
USERNAME = os.environ['USERNAME']
PASSWORD = os.environ['PASSWORD']
API_TOKEN = os.environ['API_TOKEN']
PROXY = os.environ['PROXY']
async def start_persistent_profile():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post(f'/profiles/{PROFILE_UUID}/start')
browser_data_resp.raise_for_status()
return browser_data_resp.json()
async def login_to_instagram(cdp_url):
browser_data = await start_persistent_profile()
cdp_url = browser_data['ws_url']
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
page = await browser.new_page()
await page.goto('https://instagram.com')
username = page.locator('//input[@name="username"]')
password = page.locator('//input[@name="password"]')
await username.wait_for()
await username.fill(USERNAME)
await password.fill(PASSWORD)
login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.wait_for()
await login.click()
await asyncio.sleep(10)
await page.screenshot(path='instagram.jpg')
import os
import httpx
from playwright.async_api import async_playwright
USERNAME = os.environ['USERNAME']
PASSWORD = os.environ['PASSWORD']
API_TOKEN = os.environ['API_TOKEN']
PROXY = os.environ['PROXY']
async def start_one_time_profile():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post('/profiles/one_time', json={'proxy': PROXY})
browser_data_resp.raise_for_status()
return browser_data_resp.json()
async def login_to_instagram(cdp_url):
browser_data = await start_one_time_profile()
cdp_url = browser_data['ws_url']
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
page = await browser.new_page()
await page.goto('https://instagram.com')
username = page.locator('//input[@name="username"]')
password = page.locator('//input[@name="password"]')
await username.wait_for()
await username.fill(USERNAME)
await password.fill(PASSWORD)
login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.wait_for()
await login.click()
await asyncio.sleep(10)
await page.screenshot(path='instagram.jpg')
- One time profile
- Persistent profile
const axios = require('axios')
const { chromium } = require('playwright')
const API_TOKEN = process.env.API_TOKEN
const PROXY = process.env.PROXY
const USERNAME = process.env.USERNAME
const PASSWORD = process.env.PASSWORD
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startOneTimeProfile() {
const response = await SURFSKY_API.post(
`/profiles/one_time`,
{ proxy: PROXY },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
async function loginToInstagram() {
const browserData = await startOneTimeProfile()
const browser = await chromium.connectOverCDP(browserData.ws_url)
const page = await browser.newPage()
await page.goto('https://instagram.com')
const username = page.locator('//input[@name="username"]')
const password = page.locator('//input[@name="password"]')
await username.waitFor()
await username.fill(USERNAME)
await password.fill(PASSWORD)
const login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.waitFor()
await login.click()
}
const axios = require('axios')
const { chromium } = require('playwright')
const API_TOKEN = process.env.API_TOKEN
const PROFILE_UUID = process.env.PROXY
const USERNAME = process.env.USERNAME
const PASSWORD = process.env.PASSWORD
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startPersistentProfile() {
const response = await SURFSKY_API.post(
`/profiles/${PROFILE_UUID}/start`,
{ },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
async function loginToInstagram() {
const browserData = await startPersistentProfile()
const browser = await chromium.connectOverCDP(browserData.ws_url)
const page = await browser.newPage()
await page.goto('https://instagram.com')
const username = page.locator('//input[@name="username"]')
const password = page.locator('//input[@name="password"]')
await username.waitFor()
await username.fill(USERNAME)
await password.fill(PASSWORD)
const login = page.locator('//form[@id="loginForm"]//button[@type="submit"]')
await login.waitFor()
await login.click()
}
Collect user's followers
Same as in the previous example you have a choice to start one time or persistent profile, but rest of the code will work the same for both of them. In this example we will use persistent profile.
- Python
- Javascript
import asyncio
import os
from contextlib import contextmanager
import httpx
from playwright.async_api import async_playwright
API_TOKEN = os.environ['API_TOKEN']
PROFILE_UUID = os.environ['PROFILE_UUID']
INSTAGRAM_API = 'https://www.instagram.com/api/v1'
INSTAGRAM_USERNAME = os.environ['INSTAGRAM_USERNAME']
async def start_persistent_profile():
async with httpx.AsyncClient(
base_url='https://api-public.surfsky.io',
headers={'X-Cloud-Api-Token': API_TOKEN},
timeout=60.0,
) as client:
browser_data_resp = await client.post(
f'/profiles/{PROFILE_UUID}/start',
json={
'browser_settings': {
'inactive_kill_timeout': 240
}
}
)
browser_data_resp.raise_for_status()
return browser_data_resp.json()
@contextmanager
def user_info_handler(page):
result = asyncio.Future()
target_url = f'{INSTAGRAM_API}/users/web_profile_info/?username={INSTAGRAM_USERNAME}'
async def handler(response):
if response.url == target_url:
result.set_result((await response.json())['data']['user'])
page.on('response', handler)
yield result
page.remove_listener('response', handler)
@contextmanager
def followers_handler(user_id, page):
result = asyncio.Future()
target_url = f'{INSTAGRAM_API}/friendships/{user_id}/followers'
async def handler(response):
if response.url.startswith(target_url):
result.set_result((await response.json()))
page.on('response', handler)
yield result
page.remove_listener('response', handler)
async def main():
browser_data = await start_persistent_profile()
cdp_url = browser_data['ws_url']
async with async_playwright() as p:
browser = await p.chromium.connect_over_cdp(cdp_url)
async with browser:
default_context = browser.contexts[0]
page = await default_context.new_page()
with user_info_handler(page) as user_info:
await page.goto(f'https://instagram.com/{INSTAGRAM_USERNAME}')
user_info = await user_info
user_id = user_info['id']
all_followers = []
with followers_handler(user_id, page) as followers_fut:
await page.goto(f'https://instagram.com/{INSTAGRAM_USERNAME}/followers/')
followers = await asyncio.wait_for(followers_fut, 30)
all_followers.extend(followers['users'])
while followers['big_list']:
followers_modal = page.locator('//div[@role="dialog"]//div[@role="dialog"]')
await followers_modal.wait_for()
await followers_modal.hover()
with followers_handler(user_id, page) as followers_fut:
await page.mouse.wheel(0, 15000)
followers = await followers_fut
all_followers.extend(followers['users'])
print(f'Total followers scraped: {len(all_followers)}')
const axios = require('axios')
const { chromium } = require('playwright')
const API_TOKEN = process.env.API_TOKEN
const PROFILE_UUID = process.env.PROXY
const INSTAGRAM_API = 'https://www.instagram.com/api/v1'
const INSTAGRAM_USERNAME = process.env.INSTAGRAM_USERNAME
const SURFSKY_API = axios.create({
baseURL: 'https://api-public.surfsky.io',
timeout: 100000,
})
async function startPersistentProfile() {
const response = await SURFSKY_API.post(
`/profiles/${PROFILE_UUID}/start`,
{ },
{ headers: { 'X-Cloud-Api-Token': API_TOKEN } }
)
return response.data
}
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
async function main() {
const browserData = await startPersistentProfile()
const browser = await chromium.connectOverCDP(browserData.ws_url)
const defaultContext = browser.contexts()[0]
const page = await defaultContext.newPage()
var userInfo = null;
var followers = null;
var allFollowers = [];
page.on('response', response => {
if (response.url === `${INSTAGRAM_URL}/users/web_profile_info/?username=${INSTAGRAM_USERNAME}`) {
response.json().then(data => {userInfo = data.data.user})
} else if (response.url === `${INSTAGRAM_URL}/friendships/${userId}/followers`) {
response.json().then(data => {followers = data})
}
})
await page.goto(`https://instagram.com/${INSTAGRAM_USERNAME}`)
await delay(10000)
const userId = userInfo.id
const allFollowers = []
await page.goto(`https://instagram.com/${INSTAGRAM_USERNAME}/followers/`)
await delay(10000)
if (followers) {
allFollowers.push(...followers.users)
while (followers && followers.big_list) {
const followersModal = page.locator('//div[@role="dialog"]//div[@role="dialog"]')
await followersModal.waitFor()
await followersModal.hover()
followers = null
await page.mouse.wheel(0, 15000)
await delay(10000)
allFollowers.push(...followers.users)
}
}
console.log(`Total followers scraped: ${allFollowers.length}`)
}