-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcodechef.py
126 lines (102 loc) · 4.65 KB
/
codechef.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from fpdf import FPDF
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium import webdriver
import os
options = webdriver.ChromeOptions()
options.add_argument("--headless")
capa = DesiredCapabilities.CHROME
capa["pageLoadStrategy"] = "none"
driver = webdriver.Chrome(desired_capabilities=capa, options=options)
baseurl = "https://www.codechef.com/problems"
wait = WebDriverWait(driver, 15)
# map to get url from its problem difficulty
problem_difficulty = {"Beginner": "school", "Easy": "easy",
"Medium": "medium", "Hard": "hard", "Challenge": "challenge"}
# get_problems returns the name and links of the problems
def get_problems(category, no_of_problems):
# A map to store problem name and problem url
problem_info = {}
try:
driver.get(baseurl + '/' + category)
# wait till the first element is loaded
wait.until(EC.element_to_be_clickable(
(By.XPATH, "//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[1]/td[1]/div/a/b")))
except TimeoutException as exception: # noqa
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
os._exit(-1)
for problem_index in range(1, no_of_problems + 1):
problem_name = driver.find_element_by_xpath(
"//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a/b".format(problem_index)).text # noqa
problem_url = driver.find_element_by_xpath(
"//*[@id='primary-content']/div/div[2]/div/div[2]/table/tbody/tr[{}]/td[1]/div/a".format(problem_index)).get_attribute('href') # noqa
print(problem_name, " ", problem_url)
problem_info[problem_name] = problem_url
return problem_info
# get_problem_desciption returns content of the problem
def get_problem_description(problem_url, problem_name):
try:
driver.get(problem_url)
wait.until(EC.element_to_be_clickable(
(By.XPATH, "//*[@id='problem-statement']/p[1]")))
problem_title = problem_name
problem_statement = driver.find_element_by_xpath(
"//*[@id='problem-statement']/p[1]").text
problem_test_cases = driver.find_element_by_xpath(
"//*[@id='problem-statement']/pre[1]").text
if (problem_test_cases.find("Output") == -1):
problem_test_cases = "Input\n" + problem_test_cases
problem_test_cases += "\nOutput\n"
problem_test_cases += driver.find_element_by_xpath(
"//*[@id='problem-statement']/pre[2]").text
else:
driver.execute_script("window.stop();")
problem = {'title': problem_title, 'statement': problem_statement,
'test_case': problem_test_cases, 'url': problem_url}
return problem
# Handling exceptions
except NoSuchElementException as e: # noqa
print("Couldn't scrap the element, Unable to locate it")
problem = None
except TimeoutException as exception: # noqa
print("Couldn't scrap the element, Unable to locate it")
problem = None
# storing the information in the pdf
def convert_to_pdf(problem):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=15)
# Replace character that aren't in latin-1 character set
title = problem["title"].encode('latin-1', 'replace').decode('latin-1')
statement = problem["statement"].encode(
'latin-1', 'replace').decode('latin-1')
test_case = problem["test_case"].encode(
'latin-1', 'replace').decode('latin-1')
url = problem["url"]
# add sections to pdf
pdf.cell(200, 10, txt=title, ln=1, align='C')
pdf.multi_cell(200, 10, txt=statement, align='L')
pdf.multi_cell(200, 10, txt=test_case, align='L')
pdf.write(5, 'Problem_Link: ')
pdf.write(5, url, url)
pdf.output(title + ".pdf")
# main function
def main():
category = input(
"Enter the difficulty level from the following \n Beginner \n Easy \n Medium \n Hard \n Challenge \n\n")
no_of_problems = int(
input("\n Enter the number of problems to be scrapped: \n"))
info = get_problems(problem_difficulty[category], no_of_problems)
for name, url in info.items():
problem = get_problem_description(url, name)
if(problem is not None):
convert_to_pdf(problem)
else:
pass
if __name__ == '__main__':
main()
driver.close()