-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcaptiontoimage.py
123 lines (99 loc) · 4.22 KB
/
captiontoimage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
import pywikibot
from pywikibot import pagegenerators
from query_store import QueryStore
from wikidata import WikidataEntityBot
class CaptionToImageBot(WikidataEntityBot):
'''
Bot re-adding file captions as qualifiers to the files on Wikidata
Supported parameters:
* -removeall - if a caption cannot be reused, remove it as well
'''
caption_property = 'P2096'
image_property = 'P18'
use_from_page = False
def __init__(self, generator, **kwargs):
self.available_options.update({
'removeall': False
})
kwargs.setdefault('bad_cache', []).append(self.caption_property)
super().__init__(**kwargs)
self.store = QueryStore()
self._generator = generator or self.custom_generator()
def custom_generator(self):
query = self.store.build_query('captions', prop=self.caption_property)
return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
@property
def generator(self):
return pagegenerators.PreloadingEntityGenerator(self._generator)
def filterProperty(self, prop_page):
return prop_page.type == 'commonsMedia'
def skip_page(self, item):
return super().skip_page(item) or (
self.caption_property not in item.claims)
def _save_entity(self, func, *args, **kwargs):
# fixme upstream
if 'asynchronous' in kwargs:
kwargs.pop('asynchronous')
return func(*args, **kwargs)
def treat_page_and_item(self, page, item):
our_prop = self.image_property
if our_prop not in item.claims:
our_prop = None
for prop in item.claims:
if self.checkProperty(prop):
if our_prop is None:
our_prop = prop
else:
pywikibot.info('More than one media property used')
return
remove_claims = []
remove_all = self.opt['removeall'] is True
if our_prop is None:
pywikibot.info('No media property found')
if remove_all:
remove_claims.extend(item.claims[self.caption_property])
self._save_page(item, self._save_entity, item.removeClaims,
remove_claims, summary='removing redundant property')
return
media_claim = item.claims[our_prop][0]
if len(item.claims[our_prop]) > 1:
pywikibot.info(f'Property {our_prop} has more than one value')
return
for caption in item.claims[self.caption_property]:
if self.caption_property in media_claim.qualifiers:
language = caption.getTarget().language
has_same_lang = any(
claim.getTarget().language == language
for claim in media_claim.qualifiers[self.caption_property])
if has_same_lang:
pywikibot.info(f'Property {our_prop} already has '
f'a caption in language {language}')
if remove_all:
remove_claims.append(caption)
continue
qualifier = caption.copy()
qualifier.isQualifier = True
if self._save_page(item, self._save_entity, media_claim.addQualifier,
qualifier):
remove_claims.append(caption)
if remove_claims:
self._save_page(item, self._save_entity, item.removeClaims,
remove_claims, summary='removing redundant property')
def main(*args):
options = {}
local_args = pywikibot.handle_args(args)
site = pywikibot.Site()
genFactory = pagegenerators.GeneratorFactory(site=site)
for arg in genFactory.handle_args(local_args):
if arg.startswith('-'):
arg, sep, value = arg.partition(':')
if value != '':
options[arg[1:]] = value if not value.isdigit() else int(value)
else:
options[arg[1:]] = True
generator = genFactory.getCombinedGenerator()
bot = CaptionToImageBot(generator=generator, site=site, **options)
bot.run()
if __name__ == '__main__':
main()