from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
import time
driver = webdriver.Chrome()
driver.maximize_window()
def crawlHouseDetailForInvoke():
driver.find_element_by_class_name("collapsible-header").click()# price/tax history
time.sleep(5)
table = driver.find_element_by_xpath('//div[@id = "wrapper"]/div/div/div/div/div[@id = "detail-container-column"]/div/div/main/div/div/div/div/div/section[3]/div/div/div/table')
print(table.text)
def crawlRegion(url):
driver.get(url)
div_parent = driver.find_element_by_id('list-results')
a_link = div_parent.find_elements_by_xpath('//div[@id = "search-results"]/ul/li/article/div/a')
print("information in this page:%d" % len(a_link))
for i in range(len(a_link)):
try:
print(i)
print(a_link[i].get_attribute("href"))
a_link[i].click()
time.sleep(8)
crawlHouseDetailForInvoke()
except Exception as e:
continue
finally:
driver.back()
if name == "__main__":
regionUrl = "https://www.zillow.com/homes/recently_sold/Culver-City-CA/house,condo,apartment_duplex,townhouse_type/51617_rid/12m_days/globalrelevanceex_sort/34.05529,-118.33211,33.956531,-118.485919_rect/12_zm/"
print("crawler is started...")
crawlRegion(regionUrl)
driver.close()
driver.quit()