-
Notifications
You must be signed in to change notification settings - Fork 195
/
Copy pathnewitem.py
executable file
·225 lines (181 loc) · 7.5 KB
/
newitem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/usr/bin/env python3
"""This script creates new items on Wikidata based on certain criteria.
* When was the (Wikipedia) page created?
* When was the last edit on the page?
* Does the page contain interwikis?
This script understands various command-line arguments:
-lastedit The minimum number of days that has passed since the page was
last edited.
-pageage The minimum number of days that has passed since the page was
created.
-touch Do a null edit on every page which has a Wikibase item.
Be careful, this option can trigger edit rates or captchas
if your account is not autoconfirmed.
"""
#
# (C) Pywikibot team, 2014-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
from datetime import timedelta
from textwrap import fill
import pywikibot
from pywikibot import pagegenerators
from pywikibot.bot import WikidataBot
from pywikibot.exceptions import (
LockedPageError,
NoCreateError,
NoPageError,
PageSaveRelatedError,
)
DELETION_TEMPLATES = ('Q4847311', 'Q6687153', 'Q21528265')
class NewItemRobot(WikidataBot):
"""A bot to create new items."""
use_redirect = False
treat_missing_item = True
update_options = {
'always': True,
'lastedit': 7,
'pageage': 21,
'touch': 'newly', # Can be False, newly (pages linked to newly
# created items) or True (touch all pages)
}
def __init__(self, **kwargs) -> None:
"""Only accepts options defined in available_options."""
super().__init__(**kwargs)
self._skipping_templates = {}
def setup(self) -> None:
"""Setup ages."""
super().setup()
self.pageAgeBefore = self.repo.server_time() - timedelta(
days=self.opt.pageage)
self.lastEditBefore = self.repo.server_time() - timedelta(
days=self.opt.lastedit)
pywikibot.info(
f'Page age is set to {self.opt.pageage} days so only pages created'
f'\nbefore {self.pageAgeBefore.isoformat()} will be considered.\n'
f'\nLast edit is set to {self.opt.lastedit} days so only pages '
f'last edited\nbefore {self.lastEditBefore.isoformat()} will be'
' considered.\n'
)
@staticmethod
def _touch_page(page) -> None:
try:
pywikibot.info('Doing a null edit on the page.')
page.touch()
except (NoCreateError, NoPageError):
pywikibot.error(f'Page {page.title(as_link=True)} does not exist.')
except LockedPageError:
pywikibot.error(f'Page {page.title(as_link=True)} is locked.')
except PageSaveRelatedError as e:
pywikibot.error(f'Page {page} not saved:\n{e.args}')
def _callback(self, page, exc) -> None:
if exc is None and self.opt.touch:
self._touch_page(page)
def get_skipping_templates(self, site) -> set[pywikibot.Page]:
"""Get templates which leads the page to be skipped.
If the script is used for multiple sites, hold the skipping templates
as attribute.
"""
if site in self._skipping_templates:
return self._skipping_templates[site]
skipping_templates = set()
pywikibot.info(f'Retrieving skipping templates for site {site}...')
for item in DELETION_TEMPLATES:
template = site.page_from_repository(item)
if template is None:
continue
skipping_templates.add(template)
# also add redirect templates
skipping_templates.update(
template.getReferences(follow_redirects=False,
with_template_inclusion=False,
filter_redirects=True,
namespaces=site.namespaces.TEMPLATE))
self._skipping_templates[site] = skipping_templates
return skipping_templates
def skip_templates(self, page) -> str:
"""Check whether the page is to be skipped due to skipping template.
:param page: treated page
:type page: pywikibot.Page
:return: the template which leads to skip
"""
skipping_templates = self.get_skipping_templates(page.site)
for template, _ in page.templatesWithParams():
if template in skipping_templates:
return template.title(with_ns=False)
return ''
def skip_page(self, page) -> bool:
"""Skip pages which are unwanted to treat."""
if super().skip_page(page):
return True
if page.latest_revision.timestamp > self.lastEditBefore:
pywikibot.info(
f'Last edit on {page} was on {page.latest_revision.timestamp}.'
f'\nToo recent. Skipping.')
return True
if page.oldest_revision.timestamp > self.pageAgeBefore:
pywikibot.info(
f'Page creation of {page} on {page.oldest_revision.timestamp} '
f'is too recent. Skipping.')
return True
if page.isCategoryRedirect():
pywikibot.info(f'{page} is a category redirect. Skipping.')
return True
if page.langlinks():
# FIXME: Implement this
pywikibot.info(
f'Found language links (interwiki links) for {page}.\n'
f"Haven't implemented that yet so skipping.")
return True
template = self.skip_templates(page)
if template:
pywikibot.info(f'{page} contains {{{{{template}}}}}. Skipping.')
return True
return False
def treat_page_and_item(self, page, item) -> None:
"""Treat page/item."""
if item and item.exists():
pywikibot.info(f'{page} already has an item: {item}.')
if self.opt.touch is True:
self._touch_page(page)
return
self.create_item_for_page(
page, callback=lambda _, exc: self._callback(page, exc))
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
gen = pagegenerators.GeneratorFactory()
options = {}
for arg in local_args:
if arg.startswith(('-pageage:', '-lastedit:')):
key, val = arg.split(':', 1)
options[key[1:]] = int(val)
elif gen.handle_arg(arg):
pass
else:
options[arg[1:].lower()] = True
generator = gen.getCombinedGenerator(preload=True)
if not generator:
pywikibot.bot.suggest_help(missing_generator=True)
return
bot = NewItemRobot(generator=generator, **options)
if not bot.site.logged_in():
bot.site.login()
user = pywikibot.User(bot.site, bot.site.username())
if bot.opt.touch == 'newly' and not (
'autoconfirmed' in user.groups() or 'confirmed' in user.groups()):
pywikibot.warning(fill(
f'You are logged in as {user.username}, an account that is not in '
f'the autoconfirmed group on {bot.site.sitename}. Script will not '
f'touch pages linked to newly created items to avoid triggering '
f'edit rates or captchas. Use -touch param to force this.'))
bot.opt.touch = False
bot.run()
if __name__ == '__main__':
main()