forked from gutenbergtools/autocat3
-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBaseSearcher.py
953 lines (731 loc) · 30.7 KB
/
BaseSearcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
# -*- mode: python; indent-tabs-mode: nil; -*- coding: iso-8859-1 -*-
"""
BaseSearcher.py
Copyright 2009-2014 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Project Gutenberg Catalog Search
Base class
"""
from __future__ import unicode_literals
from __future__ import division
import datetime
import logging
from six.moves import urllib
import cherrypy
import routes
import babel
import regex # module re does not support \p{L}
import six
from libgutenberg.MediaTypes import mediatypes as mt
from libgutenberg.GutenbergDatabase import xl, DatabaseError
from libgutenberg import DublinCore
from libgutenberg import GutenbergDatabaseDublinCore
from libgutenberg import GutenbergGlobals as gg
from i18n_tool import ugettext as _
from i18n_tool import ungettext as __
import DublinCoreI18n
from SupportedLocales import FB_LANGS, TWITTER_LANGS, GOOGLE_LANGS, PAYPAL_LANGS, FLATTR_LANGS
VALID_PROTOCOLS = ('http', 'https')
MEDIATYPE_TO_FORMAT = {
'text/html': 'html',
mt.mobile: 'html',
mt.opds: 'opds',
mt.json: 'json',
}
USER_FORMATS = 'html mobile print opds stanza json'.split()
# max no. of results returned by search
MAX_RESULTS = 1000
# sort orders available to the user
USER_SORT_ORDERS = 'downloads release_date title alpha quantity nentry random'.split()
# internally used sort orders
SORT_ORDERS = USER_SORT_ORDERS + 'nentry'.split()
# fk_categories of sound files
AUDIOBOOK_CATEGORIES = set([1, 2, 3, 6])
# updated by cron thread
books_in_archive = 0
class ClassAttr(object):
""" Holds an XML class attribute. """
__slots__ = 'value'
def __init__(self, v=None):
self.value = set()
self.__iadd__(v)
def __len__(self):
return len(self.value)
def __unicode__(self):
return ' '.join(self.value) if self.value else ''
def __str__(self):
return ' '.join(self.value) if self.value else ''
def __iadd__(self, v):
""" Implements operator += """
if not v:
return self
if isinstance(v, six.string_types):
for i in six.text_type(v).split():
self.value.add(i)
return self
if isinstance(v, ClassAttr):
self.value |= v.value
return self
def __contains__(self, b):
return b in self.value
class DC(GutenbergDatabaseDublinCore.GutenbergDatabaseDublinCore,
DublinCoreI18n.DublinCoreI18nMixin):
""" A localized DublinCore. """
def __init__(self, pool):
GutenbergDatabaseDublinCore.GutenbergDatabaseDublinCore.__init__(self, pool)
DublinCoreI18n.DublinCoreI18nMixin.__init__(self)
class Cat(object):
""" Hold data of one list item in output. """
def __init__(self):
self.type = None # use default
self.header = ''
self.class_ = ClassAttr()
self.downloads = 1
self.rel = None
self.order = 0
self.charset = None
self.title = None
self.subtitle = None
self.extra = None
self.icon = None
self.icon2 = None
self.url = None
self.thumb_url = None
class SearchUrlFormatter(object):
""" Callable to format a search url. """
def __init__(self, action):
self.action = action
def __call__(self, row):
os = cherrypy.request.os
return os.url(
self.action,
format = os.format,
id = row.pk)
class SQLStatement(object):
""" Class implementing an SQL statement. """
prefix_to_prefix = {
'a.': 'ax',
't.': 'tx',
's.': 'sx',
'bs.': 'bsx',
'l.': 'l0',
'#': 'no.',
'n.': 'no.',
'type.': 'y0',
'lcn.': 'lcnx',
'lcc.': 'lcc0',
'cat.': 'cat0',
}
"""Dict of user-visible prefixes to translate.
User-visible prefixes must be easy to type. The dot is on the
lowercase keyboard of most phones, so you need no shifting to type
these.
Internal prefixes exploit the quirks of the tsvec stemmer. Words
containing numbers do not get stemmed, so any '*0' prefix searches
for the unstemmed word. All other words get stemmed, so any '*x'
prefix searches for the stem of the word. 'x' was selected because
it is a rare character that will cause few false positives.
"""
regex_cache = {}
""" Cache of compiled regexes. """
def __init__(self):
self.query = ''
self.params = {}
self.from_ = []
self.where = []
self.groupby = []
self.sort_order = None
self.start_index = 1
self.items_per_page = -1
@classmethod
def sub(cls, regex_, replace, query):
""" Like re.sub but also compile and cache the regex. """
if not isinstance(query, str):
query = query[0] if isinstance(query, list) and len(query) > 0 else ''
cregex = cls.regex_cache.setdefault(
regex_, regex.compile(regex_, regex.UNICODE | regex.VERSION1))
return cregex.sub(replace, query)
@classmethod
def preprocess_query(cls, query):
""" Preprocess query.
The preprocessed query might get echoed to the user.
"""
sub = cls.sub
# strip most not (letter or digit)
query = sub(r'[\p{Z}\p{P}\p{S}\p{M}\p{C}--.!|()#]', ' ', query)
# strip operators adjacent to non-whitespace
# if you want grouping you have to add space on both sides of the parens
query = sub(r'\b[!)]', ' ', query)
query = sub(r'[(]\b', ' ', query)
# insert spaces around operators
query = sub(r'\s*[|!()]\s*', r' \g<0> ', query)
return ' '.join(query.split())
@classmethod
def translate_query(cls, query):
""" Translate query from user syntax to postgres tsvec syntax. """
sub = cls.sub
def prefix_sub(match_object):
""" Translate from user-visible prefix to internal prefix. """
s = match_object.group(0)
return cls.prefix_to_prefix.get(s, s)
def balance(query):
""" Balance parens. """
def scan(query, up, down):
scan = ''
depth = 0
for char in query:
if char == up:
depth += 1
scan += char
elif char == down:
depth += -1
if depth < 0:
depth = 0
else:
scan += char
else:
scan += char
return scan, depth
balanced, depth = scan(query, '(', ')')
if depth:
balanced, depth = scan(balanced[::-1], ')', '(')
balanced = balanced[::-1]
return balanced
# Replace the user-visible prefixes with the internally used prefixes.
query = sub(r'(\b\w+\.|#)(?=\w)', prefix_sub, query)
# add wildcards to all words
query = sub(r'\b(\p{L}+)(\s|$)', r'\1:*\2', query)
query = query.replace('. ', ' ')
# if parens aren't balanced, remove them
query = balance(query)
# if ! or | are at the wrong ends, remove them
query = sub(r'(^[ \|]+|[ \|\!]+$)', '', query)
# replace ' ' with ' & '
query = ' '.join(query.split())
query = sub(r'(?<![|!(\s])\s+(?![|)])', ' & ', query)
return query
def build(self):
""" Returns the SQL query string and parameter array. """
query = self.query
if self.from_:
query += " FROM " + ", ".join(self.from_)
if self.where:
query += " WHERE " + " AND ".join(self.where)
if self.groupby:
query += " GROUP BY " + ", ".join(self.groupby)
params = self.params
if self.sort_order in SORT_ORDERS:
if self.sort_order == 'random':
query += " ORDER BY random ()"
elif self.sort_order == 'title':
query += " ORDER BY filing"
elif self.sort_order == 'alpha':
query += " ORDER BY title"
elif self.sort_order == 'release_date':
query += " ORDER BY release_date DESC, pk DESC"
else:
query += " ORDER BY %s DESC" % (self.sort_order)
if self.start_index > 1:
# opensearch is 1-based, SQL is 0-based
params['offset'] = self.start_index - 1
query += " OFFSET %(offset)s"
if self.items_per_page > -1:
# need one more to know when to display 'next' link
params['limit'] = self.items_per_page + 1
query += " LIMIT %(limit)s"
return query, params
def split(self, field, query):
""" Split multiple-term query for sql consumption. """
terms = []
n = len(self.params)
for i, q in enumerate(query.split()):
q = q.strip('.,:;')
terms.append("%s ~* %%(p%d)s" % (field, n + i))
# self.params['p%d' % (n + i)] = r'\m' + q
self.params['p%d' % (n + i)] = '(^| )' + q
return terms
def split_and_append(self, field, query):
""" Split multiple-term query for sql consumption and append terms to query. """
for term in self.split(field, query):
self.where.append(term)
def fulltext(self, field, query, stemmer='english'):
""" Perform fulltext search on query words. """
query = query.strip()
if len(query) == 0:
return
query = self.translate_query(query)
self.where.append("%s @@ to_tsquery('%s', %%(p%d)s)" %
(field, stemmer, len(self.params)))
self.params['p%d' % len(self.params)] = query
class OpenSearch(object):
""" Hold search results and lots of other stuff.
We use this to pass everything we know around and into the
templating engine.
"""
lang_to_default_locale = {
'en': 'en_US',
'de': 'de_DE',
'fr': 'fr_FR',
'es': 'es_ES',
'it': 'it_IT',
'pt': 'pt_BR',
'ru': 'ru_RU',
}
def __init__(self):
self.format = None
self.page = None
self.template = None
self.query = None
self.id = None
self.sort_order = None
self.search_terms = None
self.start_index = 1
self.items_per_page = 1
self.total_results = -1
self.page_mode = 'screen'
self.user_dialog = ('', '')
self.opensearch_support = 0 # 0 = none, 1 = full, 2 = fake(Stanza, Aldiko, ...)
self.books_in_archive = babel.numbers.format_number(
books_in_archive, locale = str(cherrypy.response.i18n.locale))
self.breadcrumbs = [
(_('Project Gutenberg'), _('Go to the Main page.'), '/'),
(__('1 free ebook', '{count} free ebooks', books_in_archive).format(
count = self.books_in_archive), _('Start a new search.'), '/ebooks/'),
]
# default output formatting functions
self.f_format_title = self.format_title
self.f_format_subtitle = self.format_author
self.f_format_extra = self.format_none # depends on sort order, set in fix_sortorder ()
self.f_format_url = self.format_bibrec_url
self.f_format_thumb_url = self.format_thumb_url
self.f_format_icon = self.format_icon # icon class
self.user_agent = cherrypy.request.headers.get('User-Agent', '')
cherrypy.request.os = self
s = cherrypy.session
k = cherrypy.request.params
host = cherrypy.request.headers.get('X-Forwarded-Host', cherrypy.config['host'])
self.host = host.split(',')[-1].strip() # keep only the last hub
# turns out X-Forwarded-Protocol (X-Forwarded-Proto is the defacto standaard)
# is not a thing and has to be set in HAProxy
self.protocol = cherrypy.request.headers.get('X-Forwarded-Protocol', 'https')
# sanity check
if self.host not in (cherrypy.config['all_hosts']):
self.host = cherrypy.config['host']
if self.protocol not in VALID_PROTOCOLS:
self.protocol = 'https'
self.urlgen = routes.URLGenerator(cherrypy.routes_mapper, {'HTTP_HOST': self.host})
self.set_format(k.get('format'))
# query: this param is set when an actual query is requested
self.query = ''
if 'query' in k:
self.query = SQLStatement.preprocess_query(k['query'])
# search_terms: this is used to carry the last query
# to display in the search input box
self.search_terms = self.query or s.get('search_terms', '')
self.sort_order = k.get('sort_order') or s.get('sort_order') or USER_SORT_ORDERS[0]
if self.sort_order not in USER_SORT_ORDERS:
raise cherrypy.HTTPError(400, 'Bad Request. Unknown sort order.')
s['sort_order'] = self.sort_order
try:
self.id = int(k.get('id') or '0')
self.start_index = int(k.get('start_index') or '1')
self.items_per_page = min(100, int(k.get('items_per_page') or '25'))
except ValueError as what:
raise cherrypy.HTTPError(400, 'Bad Request. ' + str(what))
self.file_host = cherrypy.config['file_host']
self.now = datetime.datetime.utcnow().replace(microsecond = 0).isoformat() + 'Z'
self.do_animations = 'Kindle/' not in self.user_agent # no animations on e-ink
self.ip = cherrypy.request.remote.ip
self.type_opds = 'application/atom+xml;profile=opds-catalog'
self.base_url = None
self.canonical_url = None
self.entries = []
# NOTE: For page titles etc.
self.pg = self.title = _('Project Gutenberg')
# NOTE: The tagline at the top of every page.
self.tagline = _('Project Gutenberg offers {count} free ebooks to download.').format(
count = self.books_in_archive)
# NOTE: The site's description in the html meta tags.
self.description = _('Project Gutenberg offers {count} free ebooks for '
'Kindle, iPad, Nook, Android, and iPhone.').format(
count = self.books_in_archive)
# NOTE: The placeholder inside an empty search box.
self.placeholder = _('Search Project Gutenberg.')
# these need to be here because they have to be localized
# NOTE: Msg to user indicating the order of the search results.
self.sorted_msgs = {
'downloads': _("sorted by popularity"),
'release_date': _("sorted by release date"),
'quantity': _("sorted by quantity of books"),
'title': _("sorted alphabetically"),
'alpha': _("sorted alphabetically"),
'nentry': _("sorted by relevance"),
'random': _("in random order"),
}
self.snippet_image_url = self.url('/pics/logo-144x144.png', host=self.file_host)
self.og_type = 'website'
self.class_ = ClassAttr()
self.title_icon = None
self.icon = None
self.sort_orders = []
self.alternate_sort_orders = []
lang = self.lang = s.get('_lang_', 'en_US')
if len(lang) == 2:
lang = self.lang_to_default_locale.get(lang, 'en_US')
lang2 = self.lang[:2]
self.paypal_lang = lang if lang in PAYPAL_LANGS else 'en_US'
self.flattr_lang = lang if lang in FLATTR_LANGS else 'en_US'
lang = lang.replace('_', '-')
self.google_lang = lang if lang in GOOGLE_LANGS else (
lang2 if lang2 in GOOGLE_LANGS else 'en-US')
lang = lang.lower()
self.twitter_lang = lang if lang in TWITTER_LANGS else (
lang2 if lang2 in TWITTER_LANGS else 'en')
self.viewport = "width=device-width" # , initial-scale=1.0"
self.touch_icon = '/gutenberg/apple-icon.png'
self.touch_icon_precomposed = None # not yet used
if 'user_dialog' in s:
self.user_dialog = s['user_dialog']
del s['user_dialog']
msg = k.get('msg')
if msg is not None:
if msg == 'welcome_stranger':
self.user_dialog = (
_("Welcome to Project Gutenberg. "
"You'll find here {count} ebooks completely free of charge.")
.format(count = self.books_in_archive),
_('Welcome'))
def finalize(self):
""" Calculate fields that depend on start_index, items_per_page and total_results.
start_index, etc. must be set before calling this.
"""
# FIXME: android browser crashes on XHR with
# meta name=viewport or link rel=apple-touch-icon
# see:
# http://code.google.com/p/android/issues/detail?id=6593
# http://code.google.com/p/android/issues/detail?id=9261
#
# remove this from all browsers because we are caching responses
if self.start_index > 1:
self.touch_icon = None
self.viewport = None
self.desktop_host = cherrypy.config['host']
last_page = max((self.total_results - 1) // self.items_per_page, 0) # 0-based
self.end_index = min(self.start_index + self.items_per_page - 1, self.total_results)
self.prev_page_index = max(self.start_index - self.items_per_page, 1)
self.next_page_index = min(self.start_index + self.items_per_page, self.total_results)
self.last_page_index = last_page * self.items_per_page + 1
self.show_prev_page_link = self.start_index > 1
self.show_next_page_link = (self.end_index < self.total_results)
self.desktop_search = self.url('search', format = None)
self.base_url = self.url(host = self.file_host, protocol='https')
# for google, fb etc.
self.canonical_url = self.url_carry(host = self.file_host, format = None)
self.desktop_url = self.url_carry(host = self.desktop_host, format = None)
self.osd_url = self.qualify('/catalog/osd-books.xml')
s = cherrypy.session
# write this late so pages can change it
s['search_terms'] = self.search_terms
def url(self, *args, **params):
""" Generate url carrying the 'format' parameter from self.
See: http://tools.cherrypy.org/wiki/RoutesUrlGeneration """
# We need to explicitly carry the parameters in the query
# string (eg. those not matched by routes) because routes has
# no memory for those.
#
# Also route memory is not used when generating named routes
# eg. url('search').
params.setdefault('format', str(self.format))
route_name = args[0] if args else str(cherrypy.request.params['route_name'])
rn = cherrypy.routes_mapper._routenames # pylint: disable=protected-access
if route_name in rn:
route_obj = rn[route_name]
if 'id' in route_obj.minkeys:
params.setdefault('id', str(self.id))
# Eliminate null and superflous params.
for k, v in list(params.items()):
if v is None or (k == 'start_index' and
int(v) < 2) or (k == 'format' and v == 'html'):
del params[k]
return self.urlgen(route_name, **params)
@staticmethod
def params(**kwargs):
""" Get dict of current params with override option. """
d = cherrypy.request.params.copy()
# del d['action']
# del d['controller']
# del d['route_name']
try:
del d['fb_locale']
except KeyError:
pass
d.update(kwargs)
return d
def url_carry(self, *args, **params):
""" Generate url carrying most params from self. """
return self.url(*args, **self.params(**params))
@staticmethod
def add_amp(url):
""" Add ? or & to url. """
if '?' in url:
return url + '&'
return url + '?'
def qualify(self, url):
""" Append host part. """
return urllib.parse.urljoin(self.base_url, url)
def set_format(self, format_):
""" Sanity check and set the parameter we got from the user.
Calc format and mediatype to send to the client. """
if format_ and format_ not in USER_FORMATS:
raise cherrypy.HTTPError(400, 'Bad Request. Unknown format.')
# fold print into html
if format_ == 'print':
format_ = 'html'
self.page_mode = 'print'
# user explicitly requested format
if format_:
self.format = 'html' if format_ == 'mobile' else format_
self.mediatype = mt[format_]
self.opensearch_support = 1 if format_ == 'opds' else 2
return
# no specific format requested
ua = self.user_agent
format_ = 'html'
mediatype = 'text/html'
opensearch_support = 0
# user accessed the mobile site
# known OPDS consumers
# 'stanza' is the older opds-ish format supported by stanza et al.
if ua:
if ua.startswith('Stanza/'):
# Stanza/2.1.1 iPhone OS/3.1.3/iPod touch catalog/2.1.1
# Stanza/3.0 iPhone OS/3.1.3/iPod touch catalog/3.0
format_ = 'stanza'
mediatype = mt.opds
opensearch_support = 2
elif ua.startswith('FBReader/'):
# FBReader/0.6.6(java)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Aldiko/' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 2
elif ua.startswith('Ibis-Reader/'):
# Ibis-Reader/0.1
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif ua.startswith('ouiivo'):
# ouiivo
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif (ua.startswith('QuickR') or
ua.startswith('Young Reader') or
ua.startswith('MegaRead') or
ua.startswith('eBook Search')):
# MegaReadLite 1.0 (iPhone Simulator; iPhone OS 4.2; en_US)
# QuickReader 2.1.0 (iPhone; iPhone OS 3.1.3; en_US)
# QuickRdrLite 3.0.1 (iPhone Simulator; iPhone OS 4.2; en_US)
# eBook Search1.0(iPhone Simulator; iPhone OS 4.2; en_US)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif ua.startswith('CoolReader/'):
# CoolReader/3(Android)
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Freda' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
elif 'Duokan' in ua:
format_ = 'opds'
mediatype = mt.opds
opensearch_support = 1
self.format = format_
self.mediatype = mediatype
self.opensearch_support = opensearch_support
def log_request(self, page):
""" Log the request params. Now a dummy. """
pass
def fix_sortorder(self):
""" Check selected sort order against available sort orders. """
if self.sort_orders:
if not self.sort_order or self.sort_order not in self.sort_orders:
self.sort_order = self.sort_orders [0]
self.alternate_sort_orders = [x for x in self.sort_orders
if x != self.sort_order]
self.sorted_by = self.sorted_msgs [self.sort_order]
self.title += " (%s)" % self.sorted_by
# content of extra field depends on sorting
self.f_format_extra = {
'alpha': self.format_none,
'title': self.format_none,
'downloads': self.format_downloads,
'quantity': self.format_quantity,
'release_date': self.format_release_date,
'random': self.format_none,
}[self.sort_order]
if self.sort_order == 'title':
self.f_format_title = self.format_title_filing
@staticmethod
def format_title(row):
""" Format a book title for display in results. """
title = gg.cut_at_newline(row.get('title') or 'No Title')
for lang_id in row.get('fk_langs') or []:
if lang_id != 'en':
title += " (%s)" % cherrypy.response.i18n.locale.languages.get(lang_id, lang_id)
return title
@staticmethod
def format_title_filing(row):
""" Format a book title for display in results. """
title = gg.cut_at_newline(row.get('filing') or 'No Title')
for lang_id in row.get('fk_langs') or []:
if lang_id != 'en':
title += " (%s)" % cherrypy.response.i18n.locale.languages.get(lang_id, lang_id)
return title
@staticmethod
def format_author(row):
""" Format an author name for display in results. """
authors = row.get('author')
if authors is None:
return None
authors = [ DublinCore.DublinCore.make_pretty_name(a) for a in authors ]
return DublinCore.DublinCore.strunk(authors)
@staticmethod
def format_language(row):
""" Format a language name for display in results. """
if row.pk in cherrypy.response.i18n.locale.languages:
return cherrypy.response.i18n.locale.languages[row.pk]
return row.title
@staticmethod
def format_none(dummy_row):
""" Output nothing on results. """
return None
@staticmethod
def format_subtitle(row):
""" Format a book subtitle for display in results. """
return row.get('subtitle')
@staticmethod
def format_downloads(row):
""" Format the no. of download for display in results. """
downloads = int(row.get('downloads', 0))
# NOTE: No. of times a book was downloaded
return __('1 download', '{0} downloads', downloads).format(downloads)
@staticmethod
def format_quantity(row):
""" Format the quantity of books for display in results. """
count = int(row.get('quantity', 0))
# NOTE: No. of books by some author, on a subject, etc.
return __('1 book', '{0} books', count).format(count)
@staticmethod
def format_release_date(row):
""" Format the release date for display in results. """
return babel.dates.format_date(row.get('release_date'),
locale = str(cherrypy.response.i18n.locale))
def format_suggestion(self, row):
""" Format a suggestion for display in results. """
query = ' '.join(self.query.split()[0:-1])
if query:
query += ' '
return query + gg.cut_at_newline(row.get('title') or '')
@staticmethod
def format_no_url(dummy_row):
""" Show no url in results. """
return None
def format_bibrec_url(self, row):
""" Generate a bibrec url """
return self.url('bibrec', id = row.pk)
def format_canonical_bibrec_url(self, row):
""" Generate the rel=canonical bibrec url for a book. """
return self.url('bibrec', host=self.file_host, protocol='https', id=row.pk, format=None)
def format_thumb_url(self, row):
""" Generate the thumb url in results. """
if row.coverpages:
return '/' + row.coverpages[0]
return None
def format_icon(self, dummy_row):
""" Show a book icon in results. """
return self.icon
def format_icon_titles(self, row):
""" Show a book icon or audio icon in results. """
# for 'title' listings, replace book icon with audio icon
if row.fk_categories and AUDIOBOOK_CATEGORIES.intersection(row.fk_categories):
return 'audiobook'
return self.icon
def sql_get(query, **params):
""" Quick and dirty SQL query returning one value. """
conn = cherrypy.engine.pool.connect()
try:
c = conn.cursor()
c.execute(query, params)
row = c.fetchone()
if row:
return row[0]
return None
except DatabaseError as what:
cherrypy.log("SQL Error: %s\n" % what,
context = 'REQUEST', severity = logging.ERROR)
cherrypy.log("Query was: %s\n" % c.mogrify(query, params),
context = 'REQUEST', severity = logging.ERROR)
conn.detach()
raise
class SQLSearcher(object):
""" An SQL searcher. """
def search(self, os, sql):
"""
Perform the SQL query and format rows into `Cat´s .
Use plugin functions to format rows.
"""
sql.sort_order = os.sort_order
sql.start_index = os.start_index
sql.items_per_page = os.items_per_page
query, params = sql.build()
query += ' -- ' + os.ip
rows = self.execute(query, params)
# this is not necessarily the size of the result set.
# if the result set is bigger than this page can show
# total_results will be last item on page + 1
os.total_results = min(os.start_index - 1 + len(rows), MAX_RESULTS)
for i in range(0, min(len(rows), os.items_per_page)):
row = rows[i]
cat = Cat()
cat.title = os.f_format_title(row)
cat.subtitle = os.f_format_subtitle(row)
cat.extra = os.f_format_extra(row)
cat.url = os.f_format_url(row)
cat.thumb_url = os.f_format_thumb_url(row)
cat.icon = os.f_format_icon(row)
cat.header = row.get('header', '')
cat.class_ += os.class_
cat.order = 10
os.entries.append(cat)
return os
@staticmethod
def mogrify(dummy_os, sql):
""" Format a query and return it as string without executing it. """
conn = cherrypy.engine.pool.connect()
c = conn.cursor()
query, params = sql.build()
return c.mogrify(query, params).decode('utf-8')
@staticmethod
def execute(query, params):
""" Execute a query and return an array of rows. """
conn = cherrypy.engine.pool.connect()
try:
c = conn.cursor()
#cherrypy.log("SQL Query: %s\n" % c.mogrify (query, params),
# context = 'REQUEST', severity = logging.ERROR)
c.execute(query, params)
return [xl(c, row) for row in c.fetchall()]
except DatabaseError as what:
cherrypy.log("SQL Error: %s\n" % what,
context = 'REQUEST', severity = logging.ERROR)
cherrypy.log("Query was: %s\n" % c.mogrify(query, params),
context = 'REQUEST', severity = logging.ERROR)
conn.detach()
raise