Follow

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use
Contact

Scrape the information using selenium

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")

chrome_driver = webdriver.Chrome(
    service=Service(ChromeDriverManager().install()),
    options=options
)


def supplyvan_scraper():
    with chrome_driver as driver:
        driver.implicitly_wait(15)
        URL = 'https://zoekeenadvocaat.advocatenorde.nl/zoeken?q=&type=advocaten&limiet=10&sortering=afstand&filters%5Brechtsgebieden%5D=%5B%5D&filters%5Bspecialisatie%5D=0&filters%5Btoevoegingen%5D=0&locatie%5Badres%5D=Holland&locatie%5Bgeo%5D%5Blat%5D=52.132633&locatie%5Bgeo%5D%5Blng%5D=5.291266&locatie%5Bstraal%5D=56&locatie%5Bhash%5D=67eb2b8d0aab60ec69666532ff9527c9&weergave=lijst&pagina=1'
        driver.get(URL)
        time.sleep(3)

        page_links = [element.get_attribute('href') for element in
                      driver.find_elements(By.XPATH, "//span[@class='h4 no-margin-bottom']//a")]

        # visit all the links
        for link in page_links:
            driver.get(link)
            time.sleep(2)
            try:
                title = driver.find_element(By.CSS_SELECTOR, '.title h3').text
            except:
                pass
            
            
            details=driver.find_elements(By.XPATH,"//section[@class='lawyer-info']")
            for detail in details:
                try:
                    email=detail.find_element(By.XPATH, "//div[@class='row'][3]//div[@class='column small-9']").get_attribute('href')
                except:
                    pass
                try:
                    website=detail.find_element(By.XPATH, "//div[@class='row'][4]//div[@class='column small-9']").get_attribute('href')
                except:
                    pass
                print(title,email,website)
            # driver.back()
            time.sleep(2)

        time.sleep(2)
        driver.quit()


supplyvan_scraper()

I am trying to scrape email and website but they will give me none for website and email
the What suitable Xpath for email and website is there any fesiable kindly tell us this is page link https://zoekeenadvocaat.advocatenorde.nl/advocaten/oosterwolde-fr/de-heer-mr-wr-kamminga/11253211600

enter image description here

MEDevel.com: Open-source for Healthcare and Education

Collecting and validating open-source software for healthcare, education, enterprise, development, medical imaging, medical records, and digital pathology.

Visit Medevel

>Solution :

Change xpath for email:

//div[@class='row'][3]//div[@class='column small-9']/a

and for website:

//div[@class='row'][4]//div[@class='column small-9']/a
Add a comment

Leave a Reply

Keep Up to Date with the Most Important News

By pressing the Subscribe button, you confirm that you have read and are agreeing to our Privacy Policy and Terms of Use

Discover more from Dev solutions

Subscribe now to keep reading and get access to the full archive.

Continue reading