How to web-scrape multiple page with Selenium (Python)
10,435
Try the below code.It will loop through all pages not only 5 pages.Check the next button if available click on it else break the wile loop.
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
options = Options()
options.add_argument("window-size=1400,600")
from fake_useragent import UserAgent
ua = UserAgent()
a = ua.random
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')
driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options)
driver.get('https://www.immoweb.be/fr/recherche/immeuble-de-rapport/a-vendre')
import time
time.sleep(10)
Title = []
address = []
price = []
surface = []
desc = []
page=2
while True:
time.sleep(10)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
results = soup.find_all("div", {"class": "result-xl"})
for result in results:
Title.append(result.find("div", {"class": "title-bar-left"}).get_text().strip())
address.append(result.find("span", {"result-adress"}).get_text().strip())
price.append(result.find("div", {"class": "xl-price rangePrice"}).get_text().strip())
surface.append(result.find("div", {"class": "xl-surface-ch"}).get_text().strip())
desc.append(result.find("div", {"class": "xl-desc"}).get_text().strip())
if len(driver.find_elements_by_css_selector("a.next")) > 0:
url = "https://www.immoweb.be/fr/recherche/immeuble-de-rapport/a-vendre/?page={}".format(page)
driver.get(url)
page += 1
#It will traverse for only 5 pages as you are after if want more page just comment the below if block
if int(page)>5:
break
else:
break
df = pd.DataFrame({"Title": Title, "Address": address, "Price:": price, "Surface": surface, "Description": desc})
df.to_csv("output.csv")
Author by
mr-kim
Updated on June 04, 2022Comments
-
mr-kim almost 2 years
I've seen several solutions to scrape multiple pages from a website, but couldn't make it work on my code.
At the moment, I have this code, that is working to scrape the first page. And I would like to create a loop to scrape all the page of the website (from page 1 to 5)
import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup options = Options() options.add_argument("window-size=1400,600") from fake_useragent import UserAgent ua = UserAgent() a = ua.random user_agent = ua.random print(user_agent) options.add_argument(f'user-agent={user_agent}') driver = webdriver.Chrome('/Users/raduulea/Documents/chromedriver', options=options) driver.get('https://www.immoweb.be/fr/recherche/immeuble-de-rapport/a-vendre/liege/4000?page=1') import time time.sleep(10) html = driver.page_source soup = BeautifulSoup(html, 'html.parser') results = soup.find_all("div", {"class":"result-xl"}) title=[] address=[] price=[] surface=[] desc=[] for result in results: title.append(result.find("div", {"class":"title-bar-left"}).get_text().strip()) address.append(result.find("span", {"result-adress"}).get_text().strip()) price.append(result.find("div", {"class":"xl-price rangePrice"}).get_text().strip()) surface.append(result.find("div", {"class":"xl-surface-ch"}).get_text().strip()) desc.append(result.find("div", {"class":"xl-desc"}).get_text().strip()) df = pd.DataFrame({"Title":title,"Address":address,"Price:":price,"Surface" : surface,"Description":desc}) df.to_csv("output.csv")