add argparse

MITLibraries · Oct 24, 2018 · 1c9ca54 · 1c9ca54
1 parent 4fd7fbd
commit 1c9ca54
Showing 1 changed file with 25 additions and 5 deletions.
diff --git a/oclcTitlePhraseBorrowDirect.py b/oclcTitlePhraseBorrowDirect.py
@@ -5,15 +5,28 @@
 import urllib
 import re
 import time
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-f', '--fileName', help='the file of Borrow Direct data. optional - if not provided, the script will ask for input')
+args = parser.parse_args()
+
+if args.fileName:
+    fileName = args.fileName
+else:
+    fileName = raw_input('Enter the file of Borrow Direct data: ')
 
 startTime = time.time()
 
-fileName = raw_input('Enter file name: ')
 fileNameWithoutExtension = fileName[:fileName.index('.')]
 
 baseURL = 'http://www.worldcat.org/webservices/catalog/search/opensearch?q='
 baseURL2 = 'http://www.worldcat.org/webservices/catalog/content/'
 
+with open(fileName) as csvfile:
+    reader = csv.DictReader(csvfile)
+    rowCount = len(list(reader))
+
 wskey = secrets.wskey
 f=csv.writer(open(fileNameWithoutExtension+'oclcSearchMatches.csv', 'wb'))
 f.writerow(['searchOclcNum']+['borrower']+['lender']+['status']+['patronType']+['isbn']+['searchTitle']+['searchAuthor']+['searchDate']+['oclcNum']+['oclcTitle']+['oclcAuthor']+['oclcPublisher']+['callNumLetters']+['callNumFull']+['physDesc']+['oclcDate'])
@@ -22,6 +35,8 @@
 with open(fileName) as csvfile:
     reader = csv.DictReader(csvfile)
     for row in reader:
+        rowCount -= 1
+        print 'Items remaining: ', rowCount
         borrower = row['BORROWER']
         lender = row['LENDER']
         status = row['STATUS']
@@ -34,22 +49,27 @@
         searchPublisher = row['PUBLISHER']
         searchDate = row['PUBLICATION YEAR']
         try:
-            response = requests.get('http://www.worldcat.org/webservices/catalog/content/'+searchOclcNum+'?format=rss&wskey='+wskey).content
+            response = requests.get('http://www.worldcat.org/webservices/catalog/content/'+searchOclcNum+'?format=rss&wskey='+wskey)
+            response = response.content
             record = BeautifulSoup(response, "lxml").find('record')
             oclcNum = record.find('controlfield', {'tag' : '001'}).text
+            print 'search oclc #'
         except:
             originalTitle = searchTitle
             search = urllib.quote(searchTitle)
-            print search
-            response = requests.get(baseURL+search.strip()+'&count=1&format=rss&wskey='+wskey).content
+            response = requests.get(baseURL+search.strip()+'&count=1&format=rss&wskey='+wskey)
+            print 'search title'
+            response = response.content
             record = BeautifulSoup(response, "lxml").findAll('item')
             if record != []:
                 record = record[0]
                 url = record.find('guid').text.encode('utf-8')
                 oclcNum = url.replace('http://worldcat.org/oclc/','')
                 oclcAuthor = record.find('author').find('name').text.encode('utf-8')
 
-        response2 = requests.get(baseURL2+oclcNum+'?servicelevel=full&classificationScheme=LibraryOfCongress&wskey='+wskey).content
+        response2 = requests.get(baseURL2+oclcNum+'?servicelevel=full&classificationScheme=LibraryOfCongress&wskey='+wskey)
+        print 'search full record'
+        response2 = response2.content
         try:
             record2 = BeautifulSoup(response2, "lxml").find('record')
             try: