-
Notifications
You must be signed in to change notification settings - Fork 88
/
Copy pathcheck_links.py
74 lines (56 loc) · 1.92 KB
/
check_links.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import re
import requests
class color:
CYAN = '\033[96m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
RESET = '\033[0m'
files = [
"docs/README.md",
"docs/security/README.md",
"docs/laws/README.md"
]
websites = []
error = 0
critical = 0
success = 0
bad_links = []
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
}
def conclude():
print(str(success + error + critical) + " Links checked\n" + str(success) + " Good links\n" +
str(error + critical) + " Bad links\n" + str(critical) + " Critical errors")
with open("result.txt", "w+") as result:
for bad_link in bad_links:
result.writelines(bad_link + "\n")
exit(0)
def extract_urls(fname):
with open(fname) as f:
return re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^)`]+", f.read())
for fi in files:
websites += extract_urls(fi)
print(color.YELLOW + str(len(websites)) + color.RESET + " links found")
for website in websites:
try:
response = requests.get(website, headers=headers)
if response.status_code == 200:
# print( "[" + color.GREEN + str(response.status_code) + color.RESET + "] " + color.CYAN + website + color.RESET)
success += 1
else:
print("[" + color.RED + str(response.status_code) +
color.RESET + "]" + color.CYAN + " " + website + color.RESET)
bad_links.append("[" + str(response.status_code) + "] " + website)
error += 1
except KeyboardInterrupt:
print("exiting...")
conclude()
exit(0)
except:
print("[" + color.RED + "ERR" + color.RESET + "] " +
color.CYAN + website + color.RESET)
bad_links.append("[ERR] " + website)
critical += 1
pass
conclude()