-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathoclcSearchForNewNum.py
43 lines (38 loc) · 1.31 KB
/
oclcSearchForNewNum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import requests
from bs4 import BeautifulSoup
import csv
import secrets
import time
import datetime
startTime = time.time()
baseURL = 'http://www.worldcat.org/webservices/catalog/content/'
wskey = secrets.wskey
f = csv.writer(open('newOclcNumResults.csv', 'w'))
f.writerow(['bibNum'] + ['search'] + ['newOclcNum'])
filename = 'noHathiTrustMatch.csv'
with open(filename) as csvfile:
reader = csv.DictReader(csvfile)
counter = 0
for row in reader:
counter = counter + 1
print(counter)
search = row['oclcNum']
bibNum = row['bibNum']
try:
response = requests.get(baseURL + search.strip()
+ '?wskey=' + wskey).content
record = BeautifulSoup(response, "lxml").find('record')
oclcNum = record.find('controlfield', {'tag': '001'}).text
oclcNum = oclcNum.lstrip('0')
except ValueError:
fullTitle = ''
oclcNum = ''
if search.lstrip('0') != oclcNum:
print(search, oclcNum)
f.writerow([bibNum] + [search] + [oclcNum])
else:
oclcNum = ''
f.writerow([bibNum] + [search] + [oclcNum])
elapsedTime = time.time() - startTime
td = datetime.timedelta(seconds=time.time() - startTime)
print("Elapsed time: {}".format(td))