-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgs_crawler.py
executable file
·86 lines (73 loc) · 2.84 KB
/
gs_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import aiohttp
import asyncio
import json
import logging
if __name__ == "__main__" or __name__ == "Crawler":
from base.crawler import Crawler
from event_items import EventItem
from event_items import PromotionType
from event_items import ConvenienceStoreType
else:
from .base.crawler import Crawler
from .event_items import EventItem
from .event_items import PromotionType
from .event_items import ConvenienceStoreType
class GSCrawler(Crawler):
__logger = logging.getLogger(__name__)
_base_url = "http://gs25.gsretail.com/gscvs/ko/products/event-goods-search"
__parameter_lists = ["ONE_TO_ONE", "TWO_TO_ONE"]
async def __parse_data(
self, session: aiohttp.ClientSession, json_data
) -> list[EventItem]:
event_items = []
for item in json_data["results"]:
event_item = EventItem(
promotion_type=(
PromotionType.buy_one_get_one_free
if item["eventTypeNm"] == "1+1"
else PromotionType.buy_two_get_one_free
),
store=ConvenienceStoreType.gs25,
event_name=item["goodsNm"],
price=item["price"],
name=item["goodsNm"],
image_url=(
item["attFileNm"]
if await self._is_valid_image(session, item["attFileNm"])
else None
),
)
event_items.append(event_item)
return event_items
async def execute(self):
data_array = []
async with aiohttp.ClientSession() as session:
for parameter_list in self.__parameter_lists:
page_num = 1
while True:
params = {
"pageNum": page_num,
"pageSize": 20,
"parameterList": parameter_list,
}
json_data = json.loads(
await self._fetch_data(session, self._base_url, params=params)
)
event_items = await self.__parse_data(session, json_data)
if not event_items:
self.__logger.debug("Finished parsing the data to the end")
break
data_array.extend(event_items)
total_pages = json_data["pagination"]["numberOfPages"]
if page_num >= total_pages:
break
self.__logger.debug(f"PageNumber Increasing... {page_num}")
page_num += 1
self.__logger.debug(f"GS: {parameter_list} Done.")
return data_array
async def main():
crawler = GSCrawler()
items = await crawler.execute()
print(len(items))
if __name__ == "__main__":
asyncio.run(main())