-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarcpy1.py
617 lines (560 loc) · 25.2 KB
/
marcpy1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
#!/usr/bin/env python
# coding: utf-8
# # Functions for Marc handling - 1
#
# In[1]:
from pprint import pprint as pp
import re #regular expressions
import requests
#import urllib, urllib.parse # used for percent-encoding strings
#import xml
#from xml import etree
#from xml.etree import ElementTree
from io import StringIO
import pymarc
from pymarc import Record, marcxml, Field, XMLWriter
from collections import Counter
import csv
import difflib
from difflib import SequenceMatcher
import itertools
import numpy
import unicodedata as ucd
# debugging
import pdb
import traceback
#local modules on git tkpy
import os
import sys
repopath=os.path.abspath('../Gitrepos/tkpy')
if repopath not in sys.path:
sys.path.append(repopath)
import utils
from utils import trim
#import iomarc
#import iogeneral
# In[2]:
from pymarc import Record, marcxml, Field
from collections import Counter
def fieldCounter(records):
#calculates the occurrences of all marc fields present in records
#records is a list of pymarc.Record objects
#returns a Counter object
cnt=Counter()
fields=[]
tags=[]
for rec in records:
fields.extend(rec.get_fields()) #extract all fields
for fld in fields:
tags.append(fld.tag)
for tag in sorted(tags):
cnt[tag] +=1
return cnt
def subfieldCounter(records, fieldtags, subfieldtags, delimiter='$'):
#calculates the occurrences of the subfields given by fieldtags and subfieldtags
#Example: If fieldtags=['100', '700'], subfieldtags=['e', '4'], the number of occurrences of
# 100$e, 100$4, 700$e and 700$4 are counted
#records is a list of pymarc.Record objects
#returns a Counter object
cnt=Counter()
fields=[]
tags=[]
for rec in records:
fields.extend(rec.get_fields(*fieldtags)) #extract all fields correspnding to fieldtags
for fld in fields:
for sfldtag in subfieldtags: #check if the given subfields exist in fld
if len(fld.get_subfields(sfldtag))>0:
tags.append(fld.tag + delimiter + sfldtag)
for tag in tags:
cnt[tag] +=1
return cnt
def subfieldCounter2(records, delimiter='$'):
#calculates the occurrences of all subfields in all fields in records.
#Also erroneous tags are counted
#records is a list of pymarc.Record objects
#returns a Counter object
#Multiple occurrences of a subfield are counted according to occurrence
fieldtags=[]
#Do not consider control fields
for tg in list(fieldCounter(records).keys()):
if not tg.startswith('00'):
fieldtags.append(tg)
cnt=Counter()
fields=[]
tags=[]
for rec in records:
fields.extend(rec.get_fields(*fieldtags)) #extract all fields correspnding to fieldtags
for fld in fields:
sublist=fld.subfields # e.g. ['a','Haugianismen','b','dens Historie og Væsen, 'b', 'samt Forhold til Herrnhuttismen']
subfieldtags=[]
for i in range(0, len(sublist)//2):
j=i*2
subfieldtags.append(sublist[j]) #e.g. ['a', 'b', 'b', 'c'] ('b' is repeated)
for sfldtag in subfieldtags: #check if the given subfields exist in fld
#if len(fld.get_subfields(sfldtag))>1:
#print(fld.get_subfields(sfldtag))
if len(fld.get_subfields(sfldtag))>0:
tags.append(fld.tag + delimiter + sfldtag)
for tag in tags:
cnt[tag] +=1
return cnt
def valueCounter(records, fieldtags, subfieldtags=None, fldPart=None, slice=None,
separateCounting=False, delimiter='$', leadertag='000', countDupl=True):
#***UPDATED 26.04.2022***** (configure counting of duplicate values)
#calculates the occurrences of different values the fields, subfields or slice of a controlfield or leader
#If fldPart is not None, but a nonempty numeric tuple, only the corresponding part of the value is considered
#fldPart is not considered if >1 subfields and separateCounting ==False
#subfields given by fieldtags and subfieldtags. Leader encoded by fieldtag leadertag
#Example: If fieldtags=['100', '700'], subfieldtags=['e', '4'], and separateCounting is True,
# the number of value occurrences of 100$e, 100$4, 700$e and 700$4 separately are counted
# If separateCounting is False, the number of value occurrences of 100$e$4 and 700$e$4
# delimited by delimiter are counted
# If subfieldtags and slice are not given, field.value() is used for coounting
#Example2: Fieldldtags=['651'] and subliedtags=['a']. If countDupl=False, then the same value of
# 651a within the same record is counted only once, hence the fields
# 651$aUSA$2noram and 651$aUSA$2humord in the same record count as 1 value in this case.
#records is a list of pymarc.Record objects
#returns a Counter object
cnt=Counter()
#fields=[]
allvalues=[]
#Get all field objects
for rec in records:
values=[]
if leadertag in fieldtags:
if slice in [None, ()]:
values.append(rec.leader) #slice None or empty
elif len(slice)>1:
values.append(rec.leader[slice[0]:slice[1]])
else:
values.append(rec.leader[slice[0]:])
for fld in rec.get_fields(*fieldtags): #alle strenger som ikke er feltkode blir ignorert
#fld a Controlfield?
if fld.tag in ['001', '003', '005', '006', '007', '008']:
if slice in [None, ()]:
values.append(fld.value()) #slice None or empty
elif len(slice)>1:
values.append(fld.value()[slice[0]:slice[1]])
else:
values.append(fld.value()[slice[0]:])
elif subfieldtags in [None, []]:
if fldPart in [None, ()]:
values.append(fld.value())
elif len(fldPart)>1:
values.append(fld.value()[fldPart[0]:fldPart[1]])
else:
values.append(fld.value()[fldPart[0]:])
elif len(subfieldtags)>1 and separateCounting==False:
svals=fld.get_subfields(*subfieldtags) #list of subfield values
values.append(delimiter.join(svals))
else: #len(subfieldtag)<=1 or separateCounting=True
for sfldtag in subfieldtags:
svals=fld.get_subfields(sfldtag)
for v in svals:
if fldPart in [None, ()]:
values.append(v)
elif len(fldPart)>1:
values.append(v[fldPart[0]:fldPart[1]])
else:
values.append(v[fldPart[0]:])
#Should internal duplicates count?
if countDupl:
allvalues.extend(values)
else:
allvalues.extend(list(set(values)))
for val in sorted(allvalues):
cnt[val] +=1
return cnt
def fieldExtractor(records, fieldtags):
#returns a list of field values corresponding to the tags in fieldtags
#records is a list of pymarc.Record objects
#Example: fieldExtractor(solstad, ['913']) returns (1 record per line)
#[['913: Solstad NB'],
# ['913: Solstad NB', '913: Littforsk NB'],
# ['913: Solstad NB'],
# ['913: Solstad NB', '913: Littforsk NB'],
# ['913: Solstad NB'],...
#]
result = []
for rec in records:
fields= rec.get_fields(*fieldtags)
fieldValues=[]
for fld in fields:
fieldValues.append(fld.tag + ': ' + fld.value())
result.append(fieldValues)
return result
def fieldExtractorCondensed(records, fieldtags):
#returns a list of lists of field values corresponding to the tags in fieldtags
#records is a list of pymarc.Record objects
#only records with value on at least one of the fields in fieldtags are represented in the list
result = []
for rec in records:
fields= rec.get_fields(*fieldtags)
fieldValues=[]
for fld in fields:
if fld.value()!= '':
fieldValues.append(fld.tag + ': ' + fld.value())
if fieldValues != []:
result.append(fieldValues)
return result
def fieldExtractorAsDict(records, fieldtags, IDfieldtag):
#returns a dict in which the
# keys are the field value of IDfieldtag NOTE: Uniqueness and of IDfieldtag is assumed
# values are a list of values of all subfields in the fields repr by fieldtags
#records is a list of pymarc.Record objects
result = dict()
for rec in records:
k=rec.get_fields(IDfieldtag)[0].value()
fields= rec.get_fields(*fieldtags)
fieldValues=[]
for fld in fields:
if fld.value()!= '':
fieldValues.append(fld.tag + ': ' + fld.value())
if fieldValues != []:
result[k]=fieldValues
return result
def fieldObjectExtractor(records, fieldtags):
#returns a list lists of pymarc.field objects corresponding to the tags in fieldtags
#records is a list of pymarc.Record objects
result = []
for rec in records:
fields= rec.get_fields(*fieldtags)
result.extend(fields)
return result
def subfieldExtractor(records, fieldtag, subfieldtags):
#returns a list of subfield values corresponding to the subfieldtags in fieldtag
#records is a list of pymarc.Record objects
#Example: subfieldExtractor(solstad, '913',['a','b']) returns (1 record per line)
#[[['Solstad', 'NB']],
# [['Solstad', 'NB'], ['Littforsk', 'NB']],
# [['Solstad', 'NB']],
# [['Solstad', 'NB'], ['Littforsk', 'NB']],
# [['Solstad', 'NB']],...
#]
result = []
for rec in records:
fields= rec.get_fields(fieldtag) #note: fieldtag may be repeatable
fieldValues=[]
for fld in fields:
fieldValues.append(fld.get_subfields(*subfieldtags))
result.append(fieldValues)
return result
def subfieldExtractor2(fields, subfieldtags):
#returns a list of values of subfieldtags
#fields is a list of pymarc.field objects
result = []
for fld in fields:
subflds=fld.get_subfields(*subfieldtags)
result.extend(subflds)
return result
def fieldValues (records, fieldtags , slice=None):
#returns a set containing all the different values of the fields specified
#records is a list of pymarc.Record objects
#Example: fieldValues(solstad, ['913']) returns
#{'solstad', 'littforsk'}
# slice is meant to denote the part to extract from control (position based) fields
# should be given as a tuple
values = []
for rec in records:
fields= rec.get_fields(*fieldtags)
for fld in fields:
if slice is None:
values.append(fld.value())
elif isinstance(slice, tuple):
values.append(fld.value()[slice[0]:slice[1]])
return set(values)
def leaderValues (records, slice=None):
#returns a set containing all the different values of the specified positions of leader
#records is a list of pymarc.Record objects
#Example: fieldValues(solstad, ['913']) returns
# slice is a tuple indicating the part to extract from leader
# if slice is None, the whole leader is extracted
values = []
for rec in records:
ldr= rec.leader
if slice is None:
values.append(ldr)
elif isinstance(slice, tuple):
values.append(ldr[slice[0]:slice[1]])
return set(values)
def subfieldValues (records, fieldtags, subfieldtags):
#returns a set containing all the different values of the fields/subfields specified
#records is a list of pymarc.Record objects
#Example: subfieldValues(solstad, ['913'],['a','b']) returns
#{'solstad', 'littforsk', 'NB'}
values = []
for rec in records:
fields= rec.get_fields(*fieldtags)
for fld in fields:
values.extend(fld.get_subfields(*subfieldtags))
return set(values)
def subfieldValueTuples (records, fieldtags, subfieldtags, includeId=True):
#returns a set containing all the different tuples of the fields/subfields specified
#The tuples have the same length and order as subfieldtags
#records is a list of pymarc.Record objects
#Example: subfieldValues(solstad, ['913'],['a','b']) returns
#{('solstad', 'NB'), ('littforsk', 'NB')}
#if a subfield does not exist, an empty string will be put in the tuple
values = []
for rec in records:
fields= rec.get_fields(*fieldtags)
for fld in fields:
slist=[]
if fld.is_control_field():
val=fld.value()
if val=='':
slist.append(())
else:
slist.append(tuple([val])) #singleton tuple
#print(slist)
else:
for sfld in subfieldtags: #iterate over subfieldtags to be sure of order
svals=fld.get_subfields(sfld)
if svals ==[]:
slist.append(())
else:
slist.append(tuple(svals))
#print(slist)
if includeId==True:
slist.insert(0, tuple([rec.get_fields('001')[0].value()])) #singleton tuple
#print(slist)
values.append(tuple(slist))
return set(values)
def writeFieldsToCSV(filename, records, fieldtags, condense=False):
#write the value of the fields specified in fieldtags to the file filename (should be a .csv or excel(?)file)
with open(filename, 'w', newline='', encoding = 'utf-8') as f:
thiswriter=csv.writer(f, delimiter= '|',)
if condense:
thiswriter.writerows(fieldExtractorCondensed(records, fieldtags))
else:
thiswriter.writerows(fieldExtractor(records, fieldtags))
f.close()
def writeSubfieldsToCSV(filename, records, fieldtag, subfieldtags):
#write the value of the fields specified in fieldtags to the file filename (should be a .csv or excel(?)file)
with open(filename, 'w', newline='', encoding = 'utf-8') as f:
thiswriter=csv.writer(f, delimiter= '|',)
subfieldvals=subfieldExtractor(records, fieldtag, subfieldtags)
for rowlist in subfieldvals: #rowlist is on the form [['Solstad', 'NB'], ['Littforsk', 'NB'], ...]
thiswriter.writerow(itertools.chain(*rowlist))
f.close()
def writeFieldDictToCSV(filename, records, fieldtags, IDfieldTag):
#write the value of IDfieldTag and ditto of the fields specified in fieldtags to the file filename (should be a .csv or excel(?)file)
#Only records for which at least one of the fields in fieldtags ahs value are represented in the list
#(assume that all records have value for IDfieldTag)
# Example dict:
# {'9500002': ['505: Biblioteket har: [Årg. 1] (1995)-', '505: ISSN 0806-2218'],
# '9500045': ['500: Forfatter av b. 7: Ådne Fardal Klev',
# '505: B.1. Lyngdal I : vestre del : gard og folk. - 599 s. - NLI 6858',
# '505: B.2. Lyngdal II : midtre del : gard og folk. - 737 s. - NLI 6788']
# }
with open(filename, 'w', newline='', encoding = 'utf-8') as f:
thiswriter=csv.writer(f, delimiter= '|',)
fieldDict=fieldExtractorAsDict(records, fieldtags, IDfieldTag)
for itm in fieldDict.items():
thiswriter.writerow([itm[0]])
for i in itm[1]:
#print(i)
thiswriter.writerow(['',trim(i)])
f.close()
#Defines Work as subclass of pymarc.Record
class Work(pymarc.Record):
def addManifestation(self, record):
self.manifestations.append(record)
def getManifestations(self):
return self.manifestations
def setPreferredTitle(self, title):
fld=Field('245', indicators=['1', ' '], subfields=['a', title, 'a', 'hei'])
self.add_field(fld)
#Not finished!!
#bruke difflib.get_close_matches??
def findSimilarRecords(records, record, compareDict, strict=False):
#returns a list of records in records similar to record
#when comparing the tags given in compareDict
#compareDict is on the form: {fieldtag1:[subfieldtag1, subfieldtag2,...], fieldtag2 : [subfieldtag3, subfieldtag4,...]}
#if strict is True, exact equality is required
result = []
#tags=compareDict.keys()
# specify values to compare from record
for tag in iter(compareDict):
subfields=compareDict[tag]
flds1=record.get_fields(tag)
for rec in records:
for tag in tags:
subfields=compareDict[tag]
flds2=rec.get_fields(tag)
for fld in subfields:
x=True
return True
def fieldValue(field, subfieldTags):
#Returns a the field value as a string consitutesd of the values of those of its subfield given by subfieldTags
#Essentialy the same as field.get_fields(*subfieldTags), but returns a string instead of a list of strings
valus=field.get_subfields(*subfieldTags)
#insert spaces between subfield values
if len(valus) > 1:
valus2=[valus[0]]
for i in range(1,len(valus)):
valus2.extend([' ', valus[i]])
else:
valus2=valus
return ''.join(valus2)
def workKey(record, fieldSpec):
#returns a tuple of strings based on the fields and subfields specified in fieldSpec (a list of 3-tuples)
# on the form: [(fieldtag1,occnum,[subfieldtag1, subfieldtag2,...]), (fieldtag2, occnum, [subfieldtag3, subfieldtag4,...]),...]
#the resulting tuple has 1 item per 3-tuple in fieldSpec
#if the list of subfieldtags is empty, all subfields are included
#if occnum=<0, the each occurrence is appended to the resulting tuple
#if occnum >=0, only tje designated occurrence is used
wKey = ()
for tpl in fieldSpec:
if tpl[1] < 0: #append all occurrences into this key
for fld in record.get_fields(tpl[0]):
if tpl[2] == []:
wKey= wKey + (fld.value(),)
else:
wKey= wKey + (fieldValue(fld, tpl[2]),)
else:
fld= record.get_fields(tpl[0])[tpl[1]] #use only the occurrence given by tpl[1]
if tpl[2] == []:
wKey= wKey + (fld.value(),)
else:
wKey= wKey + (fieldValue(fld, tpl[2]),)
return wKey
def assignWorkKey(record, keytuple):
#assign the keytuple to field '9xx' and subfields '1', '2', etc
subflds = []
for n in range(len(keytuple)):
subflds.extend([str(n+1), keytuple[n]])
record.add_field(Field(tag='9xx', indicators=[' ',' '], subfields=subflds))
def createWorkKey(record):
#create the most plausible work keys of the record
#the workKey is a list of tuples, each representing a key
#Each key is composed of the values of the fields and subfields most likely to represent
#the work represented by record.
# Original title?
varTitle = record.get_fields('246')
if len(varTitle)>0:
iFld = varTitle[0].get_subfields('i')
if len(iFld)>0:
if similar(iFld[0], 'Originaltittel', method=1):
return workKeyFromOriginalTitle(record)
unititl = record.get_fields('130')
if len(unititl)>0:
return workKeyFromUniformTitle(record)
titl = record.get_fields('245')
if len(titl)>0:
return workKeyFromTitle(record)
def workKeyFromOriginalTitle(record):
#1. Specifies the fields and subfields upon which to base the workKey for this record
#2. generates the workKey based on this specification, and assigns it to field '9xx', subfields '1', '2', etc of the record
#3. Returns the workKey as a tuple
#Assumption: Field 246 exists and represents original title.
keys=[]
for n in range(len(record.get_fields('246'))):
if len(record.get_fields('246')[n].get_subfields('i'))>0: #is this occurrence an original title
keySpec=[('246',n, ['a'])]
mainEntry=record.get_fields('100', '110', '111', '130') #returns 0 or 1 field
if len(mainEntry)>0:
keySpec.extend([(mainEntry[0].tag,0, ['a', 'b', 'c','d', 'e']),(mainEntry[0].tag,0, ['0'])])
elif len(record.get_fields('245'))>0:
keySpec.append(('245', 0, ['a', 'b']))
key=workKey(record, keySpec)
assignWorkKey(record, key)
keys.append(key)
return keys
def workKeyFromUniformTitle(record):
#1. Specifies the fields and subfields upon which to base the workKey for this record
#2. generates the workKey based on this specification, and assigns it to field '9xx', subfields '1', '2', etc of the record
#3. Returns the workKey as a tuple
#Assumption: Field 130 exists and represents uniform title
keySpec=[('130',0, [])]
key=workKey(record, keySpec)
assignWorkKey(record, key)
return [key]
def workKeyFromTitle(record):
#1. Specifies the fields and subfields upon which to base the workKey for this record
#2. generates the workKey based on this specification, and assigns it to field '9xx', subfields '1', '2', etc of the record
#3. Returns the workKey as a tuple
#Assumption: Field 245 exists and represents original title
keySpec=[('245',0, ['a', 'b'])]
uniform=record.get_fields('240')
if len(uniform)>0:
keySpec.append(('240',0, []))
mainEntry=record.get_fields('100', '110', '111', '130') #returns 0 or 1 field
if len(mainEntry)>0:
keySpec.extend([(mainEntry[0].tag,0, ['a', 'b', 'c','d', 'e']),(mainEntry[0].tag,0, ['0'])])
key=workKey(record, keySpec)
assignWorkKey(record, key)
return [key]
def similarity (str1, str2, method=0):
#calculates the similarity between 2 strings,
#using various methods:
# Method 0 is exact
# Method 1 is difflib.SequenceMatcher
# Method 2 is Jaccard (Lars)
# Method 3 is Levinstein difference (not implemented)
similarity = 0
if method == 0:
if str1 == str2:
similarity = 1
else: similarity = 0
elif method ==1:
s=SequenceMatcher()
s.set_seq1(str1)
s.set_seq2(str2)
similarity = s.ratio()
else: similarity = 0
return similarity
def similar (str1, str2, method=0, cutoff=0.9, compReq='all'):
if isinstance(str1, list) and isinstance(str2, list):
return similarityLists(str1, str2, method, compReq) >= cutoff
else:
return similarity(str1, str2, method) >= cutoff
def similarityLists(lst1, lst2, method=0, req='all'):
#calculates similarity between lists of strings
#generates a list of similarity-values. Each similarity value=similarity between corresponding elements in each list
#len(simlist)=max(len(lst1), len(lst2)). For de siste elementene i den lengste listen blir similarity=0
#RrReq designates the degree of completeness of the comparison between lst1 and the lst2.
# 'all': all items in lst1 is to be compared pairwise with the items in lst2. Resulting similarity is the
# average of the pairwise similarities
# 'allExist': all items in lst1 must be similar to some item in lst2.
# 'oneExists': at least one item in lst1 must be similar to some item in lst2
simlist=[]
if req=='all':
for i in range(min(len(lst1), len(lst2))):
simlist.append(similarity(lst1[i], lst2[i], method))
for i in range(abs(len(lst1)-len(lst2))):
simlist.append(0) #penalty for unequal length of lst1 and lst2
return numpy.average(simlist)
if req in ['allExist', 'oneExists']:
for e1 in lst1:
templst=[0]
for e2 in lst2:
templst.append(similarity(e1, e2, method))
simlist.append(max(templst)) #the highest similarity to any of the elements in lst2 counts
if req=='allExist':
return numpy.average(simlist)
else:
return max(simlist)
def pairWiseSimilarity(lst1, lst2, method=0):
#calculates similarity between lists of strings
#generates a list of similarity-values. Each similarity value=similarity between corresponding elements in each list
#len(simlist)=max(len(lst1), len(lst2)). For de siste elementene i den lengste listen blir similarity=0
#For further calculation of the similarities, see similarityLists
simlist=[]
for i in range(min(len(lst1), len(lst2))):
simlist.append(similarity(lst1[i], lst2[i], method))
for i in range(abs(len(lst1)-len(lst2))):
simlist.append(0) #penalty for unequal length of lst1 and lst2
return simlist
def strippedValue (record, fieldtag, subfieldtag, prefix='(EXLNZ-47BIBSYS_NETWORK)'):
#returns the last value of fieldtag.subfieldtag that has prefix as prefix, without the prefix
#Example: 035.a=(EXLNZ-47BIBSYS_NETWORK)990000029844702201 in record r
#strippedValue(r, '035', 'a') returns '990000029844702201'
result=''
for f in record.get_fields(fieldtag):
v=f.get_subfields(subfieldtag)
if v!=[]:
if v[0].startswith(prefix) == True:
result=v[0][len(prefix):]
return result
# In[ ]: