-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrent.py
220 lines (155 loc) · 8 KB
/
rent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import sys, os, time
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import user
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
import xlrd
from openpyxl import load_workbook
from PIL import Image, ImageOps
username = user.username
passkey = user.password
# open an excel file
raw = xlrd.open_workbook('addresses1.xlsx')
# pulling the first sheet of the file
sheet = raw.sheet_by_index(0)
# calculating the number of addresses
count = len(sheet.col_values(0))-1
addresses = []
# formating and populating addresses array using data from address spreadsheet
for i in range(count):
addresses.append( str(int(sheet.cell(i+1, 0).value)) + " " + str(sheet.cell(i+1, 1).value) + " " + str(sheet.cell(i+1, 2).value) + "," + " " + str(sheet.cell(i+1, 3).value) + "," + " " + str(sheet.cell(i+1, 4).value) + " " + str(int(sheet.cell(i+1, 5).value)) )
# selenium
PATH = "C:\Program Files (x86)\chromedriver.exe"
# options to open browser window fully maxmimized and page zoomed in
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument(f"--force-device-scale-factor={1.1}")
#defining driver for capturing resnt extimate chart
driver = webdriver.Chrome(PATH, options=options)
# opening website used to retrieve rent estimate chart
driver.get("https://www.gosection8.com/logreg.aspx?user=landlord")
# sending user info to log into website
email = driver.find_element_by_id("txtUsername")
email.send_keys(username)
password = driver.find_element_by_id("txtPassword")
password.send_keys(passkey)
password.send_keys(Keys.RETURN)
# going to rent estimate section of website
driver.get("https://www.gosection8.com/ll/ll_CompareRent.aspx")
agree = driver.find_element_by_id("MainContentPlaceHolder_MainContent2_disclosureButton")
agree.send_keys(Keys.RETURN)
# defining sizes the rent estimate chart is cropped into
size = (450, 360)
size1 = (85,55,185,300)
size2 = (185,55,255,300)
size3 = (255,55,330,300)
size4 = (330,55,430,300)
#array to store the file names of the cropped images
imageList = []
# looping through the address list to enter them into the rent estimate website for different bedroom numbers and
# capturing chart and cropping the chart into smaller images where each image contains only one bar from the chart
for strAddress in addresses:
for bed in range(4):
address = driver.find_element_by_id("MainContentPlaceHolder_MainContent2_AutocompleteAddress")
beds = driver.find_element_by_id("MainContentPlaceHolder_MainContent2_BedroomCount")
address.clear()
beds.clear()
address.send_keys(strAddress)
beds.send_keys(bed+1)
beds.send_keys(Keys.RETURN)
image = driver.find_element_by_id("MainContentPlaceHolder_MainContent2_compareRentChart")
image.screenshot("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+".png")
openImg = Image.open("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+".png")
resizeImg = ImageOps.fit(openImg, size, Image.ANTIALIAS)
cropImg1 = resizeImg.crop(size1)
cropImg2 = resizeImg.crop(size2)
cropImg3 = resizeImg.crop(size3)
cropImg4 = resizeImg.crop(size4)
cropImg1.save("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-1"+".png")
imageList.append("/charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-1"+".png")
cropImg2.save("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-2"+".png")
imageList.append("/charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-2"+".png")
cropImg3.save("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-3"+".png")
imageList.append("/charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-3"+".png")
cropImg4.save("./charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-4"+".png")
imageList.append("/charts/"+strAddress.replace(" ", "").replace(",", "")+"-"+str(bed+1)+"-4"+".png")
# closing rent estimate website chrome window
driver.close()
driver.quit()
# opening the spreadsheet, storing the filenames of the cropped images and saving the spreadsheet
wb = load_workbook(filename="addresses1.xlsx")
ws = wb.worksheets[0]
for index, fileName in enumerate(imageList):
ws.cell(row=(index//16)+2, column=23+(index%16)).value = fileName
wb.save(filename="addresses1.xlsx")
#retrieving the cropped image file names from spreadsheet and storing them into an array
# to use the names to call the images and feed them into Optical Character Recognition website
fileList = []
wb = load_workbook(filename="addresses1.xlsx")
ws = wb.worksheets[0]
# get number of streest addresses from spreadsheet
numStreet = len(ws['A'])-1
for row in range(numStreet):
for col in range(16):
fileList.append( ws.cell(row=row+2, column=23+col).value )
# defining an array of drivers to open multiple chrome windows at once to save time
driverOCR = []
# populating drivers array
for i in range(16):
driverOCR.append(webdriver.Chrome(PATH, options=options))
# opening optical character recognition (OCR) website on each chrome window
for driverSingle in driverOCR:
driverSingle.get("https://brandfolder.com/workbench/extract-text-from-image")
# looping through the cropped images to feed them into the OCR website and retrieve rent estimate data
# and feed the data into the spreadsheet
for j in range(len(fileList)//16):
time.sleep(5)
upload = []
rentEst = []
for k in range(16):
print("k=", k)
try:
#capturing the input element of the OCR website to feed image
upload.append(driverOCR[k].find_element_by_class_name("fsp-drop-pane__input"))
except:
# in case capturing input element fails this esception reloads website and tries capturing input element again
print("exception error when appending upload and k=", k)
driverOCR[k].get("https://brandfolder.com/workbench/extract-text-from-image")
time.sleep(2)
upload.append(driverOCR[k].find_element_by_class_name("fsp-drop-pane__input"))
# clear the upload input element and send the cropped image
upload[k].clear()
upload[k].send_keys(os.getcwd() + fileList[16*j + k])
# wait afew moments until the OCR website processes images and sipts out text data
time.sleep(5)
# loop through the open chrome windows to capture the output text data
for l in range(16):
# capture the text element of the OCR website
text = driverOCR[l].find_element_by_id("extracted_text").text
# in case the image is not uploaded and there is no output text try reloading the page
# and upload image again and capture text data
while(len(text) == 0):
print("when text is null l=", l)
driverOCR[l].get("https://brandfolder.com/workbench/extract-text-from-image")
time.sleep(2)
uploadS = driverOCR[l].find_element_by_class_name("fsp-drop-pane__input")
uploadS.clear()
uploadS.send_keys(os.getcwd() + fileList[16*j + l])
time.sleep(5)
text = driverOCR[l].find_element_by_id("extracted_text").text
# add text data to rent estimate array
rentEst.append(text)
# refresh the page to prepare for feeding the cropped images for the next street address
driverOCR[l].refresh()
# open spreadsheet and save the rent estimate data array into the spreadsheet
wb = load_workbook(filename="addresses1.xlsx")
ws = wb.worksheets[0]
for index, estimate in enumerate(rentEst):
ws.cell(row=j+2, column=index+7).value = estimate
wb.save(filename="addresses1.xlsx")
# once all rent estimate data is saved into the spreadhseet close and quit all drivers and chrome windows
for driverS in driverOCR:
driverS.close()
driverS.quit()