-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathidealista.py
86 lines (68 loc) · 2.61 KB
/
idealista.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import logging
import time
import selenium.common.exceptions
from selenium.webdriver.common.by import By
import settings
from cli import Client
class IdealistaClient(Client):
baseurl = """
https://www.idealista.com/en/alquiler-viviendas/{province}-{city}/
con-precio-hasta_{max_price},precio-desde_{min_price},
metros-cuadrados-mas-de_{min_surface},metros-cuadrados-menos-de_{max_surface}
"""
# baseurl = "https://www.idealista.com/en/alquiler-viviendas/barcelona-barcelona"
file = "idea.pickle"
outfile = "ideares.txt"
next_page_xpath = "/html/body/div[2]/div/div/main/section/div/ul/li[8]/a"
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.baseurl.format(
province=settings.PROVINCE,
city=settings.CITY,
max_price=settings.MAX_PRICE,
min_price=settings.MIN_PRICE,
min_surface=settings.MIN_SURFACE,
max_surface=settings.MAX_SURFACE
)
def parse_listings(self) -> list[str]:
ret = []
while True:
time.sleep(2)
elems = self.client.find_elements(by=By.XPATH, value="//a[@href]")
for elem in elems:
l = elem.get_attribute("href")
if l and "/inmueble/" in l and l not in self.visited:
ret.append(l)
time.sleep(8)
try:
next_page_elem = self.client.find_element(By.XPATH, self.next_page_xpath)
except selenium.common.exceptions.NoSuchElementException:
break
else:
href = next_page_elem.get_attribute('href')
self.client.get(href)
return ret
def find_new(self):
# refresh_listings will call parse_listings
self.refresh_listings()
for u in self.new_aparts:
time.sleep(1)
self.client.get(u)
html = self.client.page_source.lower() # str
if "muchas peticiones" in html or "many requests" in html:
self.logger.error("robot activity detected... exiting...")
self.exit(1)
if (
(
"1 month deposit" in html
or "no agency" in html
or "private owner" in html
) and (
not "3 month deposit" in html
)
):
self.trgt_aparts.append(u)
self.visited.add(u)
print(f"found {len(self.trgt_aparts)} new apartements to visit:")
for a in self.trgt_aparts:
print(a)