-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
133 lines (102 loc) · 4.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from fastapi import FastAPI, Query
from typing import Optional
import os
from time import sleep
import numpy
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import ElementNotInteractableException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
app = FastAPI()
#ChromeDriver setting
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('lang=ko_KR')
driver = webdriver.Chrome(service= Service(ChromeDriverManager().install()), options = options)
name_list = []
all_review = []
score_list = []
@app.get("/user/hospital")
async def hospital(address: Optional[str] = Query(None, max_length=30)) :
global driver
driver.implicitly_wait(4) # 렌더링 될때까지 기다린다 4초
driver.get('https://map.kakao.com/') # 주소 가져오기
# 검색할 목록 -> server로 변경시 지역 이름 받아와서 검색
place_infos = ['상도 동물병원']
for i, place in enumerate(place_infos):
# delay
if i % 4 == 0 and i != 0:
sleep(5)
print("start")
search(place)
result = {"name" : name_list, "score" : score_list, "review" : all_review}
return result
def search(place):
global driver
search_area = driver.find_element(By.XPATH, '//*[@id="search.keyword.query"]') # 검색 창
search_area.send_keys(place) # 검색어 입력
driver.find_element(By.XPATH, '//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER) # Enter로 검색
sleep(1)
# 1번 페이지 place list 읽기
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록
try :
# 검색된 첫 페이지 장소 목록 크롤링하기 -> 첫 페이지에서 최대 7개 = 거리순 정렬
crawling(place, place_lists)
search_area.clear()
except ElementNotInteractableException :
print('not found')
def crawling(place, place_lists):
"""
페이지 목록을 받아서 크롤링 하는 함수
:param place: 리뷰 정보 찾을 장소이름
"""
while_flag = False
for i, place in enumerate(place_lists):
# 광고에 따라서 index 조정해야함
place_name = place.select('.head_item > .tit_name > .link_name')[0].text # place name
place_address = place.select('.info_item > .addr > p')[0].text # place address
detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]'
driver.find_element(By.XPATH, detail_page_xpath).send_keys(Keys.ENTER)
driver.switch_to.window(driver.window_handles[-1]) # 상세정보 탭으로 변환
sleep(1)
name_list.append(place_name)
# 첫 페이지 -> 일단은 첫 페이지만 review 저장
extract_review(place_name)
driver.switch_to.window(driver.window_handles[0]) # 검색 탭으로 전환
def extract_review(place_name):
global driver
ret = True
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# 첫 페이지 리뷰 목록 찾기
review_lists = soup.select('.list_evaluation > li')
#별점 가져오기
try :
num_rate = soup.select('.grade_star > em')
rating = num_rate[0].text
score_list.append(rating.replace('점',''))
except :
num_rate = 0.0
score_list.append(num_rate)
# 리뷰가 있는 경우
if len(review_lists) != 0:
reviews = []
for i, review in enumerate(review_lists):
comment = review.select('.txt_comment > span') # 리뷰
val = ''
if len(comment) != 0:
val = comment[0].text + '/0'
# print(val)
reviews.append(val)
all_review.append(reviews)
else:
print('no review in extract')
ret = False
return ret