forked from wikimedia/pywikibot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_text.py
executable file
·242 lines (179 loc) · 7.24 KB
/
add_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#!/usr/bin/env python3
r"""
Append text to the top or bottom of a page.
By default this adds the text to the bottom above the categories and interwiki.
Use the following command line parameters to specify what to add:
-text Text to append. "\n" are interpreted as newlines.
-textfile Path to a file with text to append
-summary Change summary to use
-up Append text to the top of the page rather than the bottom
-create Create the page if necessary. Note that talk pages are
created already without of this option.
-createonly Only create the page but do not edit existing ones
-always If used, the bot won't ask if it should add the specified
text
-major If used, the edit will be saved without the "minor edit" flag
-talkpage Put the text onto the talk page instead
-talk
-excepturl Skip pages with a url that matches this regular expression
-noreorder Place the text beneath the categories and interwiki
Furthermore, the following can be used to specify which pages to process...
¶ms;
Examples
--------
1. Append 'hello world' to the bottom of the sandbox:
python pwb.py add_text -page:Wikipedia:Sandbox \
-summary:"Bot: pywikibot practice" -text:"hello world"
2. Add a template to the top of the pages with 'category:catname':
python pwb.py add_text -cat:catname -summary:"Bot: Adding a template" \
-text:"{{Something}}" -except:"\{\{([Tt]emplate:|)[Ss]omething" -up
3. Command used on it.wikipedia to put the template in the page without any
category:
python pwb.py add_text -except:"\{\{([Tt]emplate:|)[Cc]ategorizzare" \
-text:"{{Categorizzare}}" -excepturl:"class='catlinks'>" -uncat \
-summary:"Bot: Aggiungo template Categorizzare"
"""
#
# (C) Pywikibot team, 2007-2023
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import codecs
import regex as re
import pywikibot
from pywikibot import config, pagegenerators, textlib
from pywikibot.backports import Sequence
from pywikibot.bot import AutomaticTWSummaryBot, ExistingPageBot
DEFAULT_ARGS: dict[str, bool | str] = {
'text': '',
'textfile': '',
'summary': '',
'up': False,
'create': False,
'createonly': False,
'always': False,
'minor': True,
'talk_page': False,
'reorder': True,
'regex_skip_url': '',
}
ARG_PROMPT = {
'-text': 'What text do you want to add?',
'-textfile': 'Which text file do you want to append to the page?',
'-summary': 'What summary do you want to use?',
'-excepturl': 'What url pattern should we skip?',
}
docuReplacements = {'¶ms;': pagegenerators.parameterHelp} # noqa: N816
class AddTextBot(AutomaticTWSummaryBot, ExistingPageBot):
"""A bot which adds a text to a page."""
use_redirects = False
summary_key = 'add_text-adding'
update_options = DEFAULT_ARGS
@property
def summary_parameters(self):
"""Return a dictionary of all parameters for i18n.
Line breaks are replaced by dash.
"""
text = re.sub(r'\r?\n', ' - ', self.opt.text[:200])
return {'adding': text}
def setup(self) -> None:
"""Read text to be added from file."""
if self.opt.textfile:
with codecs.open(self.opt.textfile, 'r',
config.textfile_encoding) as f:
self.opt.text = f.read()
else:
# Translating the \\n into binary \n if given from command line
self.opt.text = self.opt.text.replace('\\n', '\n')
if self.opt.talk_page:
self.generator = pagegenerators.PageWithTalkPageGenerator(
self.generator, return_talk_only=True)
def skip_page(self, page):
"""Skip if -exceptUrl matches or page does not exists."""
if page.exists():
if self.opt.createonly:
pywikibot.warning(f'Skipping because {page} already exists')
return True
if self.opt.regex_skip_url:
url = page.full_url()
result = re.findall(self.opt.regex_skip_url,
page.site.getUrl(url))
if result:
pywikibot.warning(
f'Skipping {page} because -excepturl matches {result}.'
)
return True
elif page.isTalkPage():
pywikibot.info(f"{page} doesn't exist, creating it!")
return False
elif self.opt.create or self.opt.createonly:
return False
return super().skip_page(page)
def treat_page(self) -> None:
"""Add text to the page."""
text = self.current_page.text
if self.opt.up:
text = self.opt.text + '\n' + text
elif not self.opt.reorder:
text += '\n' + self.opt.text
else:
text = textlib.add_text(text, self.opt.text,
site=self.current_page.site)
self.put_current(text, summary=self.opt.summary, minor=self.opt.minor)
def main(*argv: str) -> None:
"""
Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param argv: Command line arguments
"""
generator_factory = pagegenerators.GeneratorFactory()
try:
options = parse(argv, generator_factory)
except ValueError as exc:
pywikibot.bot.suggest_help(additional_text=str(exc))
return
generator = generator_factory.getCombinedGenerator()
if pywikibot.bot.suggest_help(missing_generator=not generator):
return
bot = AddTextBot(generator=generator, **options)
bot.run()
def parse(argv: Sequence[str],
generator_factory: pagegenerators.GeneratorFactory
) -> dict[str, bool | str]:
"""
Parses our arguments and provide a dictionary with their values.
:param argv: input arguments to be parsed
:param generator_factory: factory that will determine what pages to
process
:return: dictionary with our parsed arguments
:raise ValueError: if we receive invalid arguments
"""
args = dict(DEFAULT_ARGS)
argv = pywikibot.handle_args(argv)
argv = generator_factory.handle_args(argv)
for arg in argv:
option, _, value = arg.partition(':')
if not value and option in ARG_PROMPT:
value = pywikibot.input(ARG_PROMPT[option])
if option in ('-text', '-textfile', '-summary'):
args[option[1:]] = value
elif option in ('-up', '-always', '-create', '-createonly'):
args[option[1:]] = True
elif option in ('-talk', '-talkpage'):
args['talk_page'] = True
elif option == '-noreorder':
args['reorder'] = False
elif option == '-excepturl':
args['regex_skip_url'] = value
elif option == '-major':
args['minor'] = False
else:
raise ValueError(f"Argument '{option}' is unrecognized")
if not args['text'] and not args['textfile']:
raise ValueError("Either the '-text' or '-textfile' is required")
if args['text'] and args['textfile']:
raise ValueError("'-text' and '-textfile' cannot both be used")
return args
if __name__ == '__main__':
main()