Our API is designed to allow you to have multiple concurrent scraping operations. That means you can speed up scraping for hundreds, thousands or even millions of pages per day, depending on your plan.
The more concurrent requests limit you have the more calls you can have active in parallel, and the faster you can scrape.
import concurrent.futures
import time
from scrapingbee import ScrapingBeeClient # Importing SPB's client
client = ScrapingBeeClient(api_key='YOUR-API-KEY') # Initialize the client with your API Key, and using screenshot_full_page parameter to take a screenshot!
MAX_RETRIES = 5 # Setting the maximum number of retries if we have failed requests to 5.
MAX_THREADS = 4
urls = ["http://scrapingbee.com/blog", "http://reddit.com/"]
def scrape(url):
for _ in range(MAX_RETRIES):
response = client.get(url, params={'screenshot': True}) # Scrape!
if response.ok: # If we get a successful request
with open("./"+str(time.time())+"screenshot.png", "wb") as f:
f.write(response.content) # Save the screenshot in the file "screenshot.png"
break # Then get out of the retry loop
else: # If we get a failed request, then we continue the loop
print(response.content)
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
executor.map(scrape, urls)
Go back to tutorials