-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathmain.py
executable file
·264 lines (241 loc) · 10.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#!/usr/bin/env python2
"""Save everything from your VK wall"""
__author__ = "Rast"
import logging
import argparse
from collections import defaultdict
from PostParser import PostParser
from Api import call_api, auth
import os
def arg_parse():
argparser = argparse.ArgumentParser()
argparser.add_argument("-d", "--dir",
type=str,
help="Directory to store dumped data",
dest="directory",
required=False,
default=".")
argparser.add_argument("-i", "--id",
type=int,
help="User ID to dump. To dump a group, specify its ID with '-' prefix",
metavar="USER_ID|-GROUP_ID",
dest="id",
required=True)
argparser.add_argument("-t", "--token",
type=str,
help="Access token, generated by VK for session",
dest="token",
required=False)
argparser.add_argument("-a", "--app_id",
type=int,
help="Your application ID to access VK API",
dest="app_id",
required=True)
argparser.add_argument("-m", "--mode",
option_strings=['wall', 'audio', 'video', 'notes', 'docs'],
nargs="+",
help="What to dump. Possible values: "+', '.join(['wall', 'audio', 'video', 'notes']),
dest="mode",
required=True)
argparser.add_argument("--wall_start",
type=int,
help="Post number to start from (first is 0)",
dest="wall_start",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--wall_end",
type=int,
help="Post number to end at (0 = all posts)",
dest="wall_end",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--audio_start",
type=int,
help="Audio number to start from (first is 0)",
dest="audio_start",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--audio_end",
type=int,
help="Audio number to end at (0 = all audios)",
dest="audio_end",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--video_start",
type=int,
help="Video number to start from (first is 0)",
dest="video_start",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--video_end",
type=int,
help="Video number to end at (0 = all videos)",
dest="video_end",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--notes_start",
type=int,
help="Note number to start from (first is 0)",
dest="notes_start",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--notes_end",
type=int,
help="Note number to end at (0 = all notes)",
dest="notes_end",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--docs_start",
type=int,
help="Document number to start from (first is 0)",
dest="docs_start",
required=False,
metavar="INT",
default=0)
argparser.add_argument("--docs_end",
type=int,
help="Document number to end at (0 = all docs)",
dest="docs_end",
required=False,
metavar="INT",
default=0)
argparser.add_argument("-v", "--verbose", action="store_true",
help="Print more info to STDOUT while processing")
argparser.add_argument("--no-download",
action="store_true",
help="Do not download attachments, only store links",
dest="no_download",
required=False)
args = argparser.parse_args()
return args
def process_post(number, post_data, post_parser, json_stuff):
"""Post-processing :)"""
data = defaultdict(lambda: "", post_data[1])
post_parser(number, data, json_stuff)
def process_audio(number, audio_data, post_parser, json_stuff):
"""Audio-processing"""
#data = defaultdict(lambda: "", audio_data[1])
try:
data = {'attachments': [{'type': 'audio',
'audio': audio_data[0],
}],
'id' : 'audio'
}
post_parser(number, data, json_stuff)
except IndexError: # deleted :(
logging.warning("Deleted track: {}".format(str(audio_data)))
return
def process_doc(number, doc_data, post_parser, json_stuff):
"""Doc-processing"""
data = {'attachments': [{'type': 'doc',
'doc': doc_data,
}],
'id' : 'doc'
}
post_parser(number, data, json_stuff)
def ranges(start, end, count):
"""Determine ranges"""
if end == 0:
end = count
if not 0 <= start < count + 1:
raise RuntimeError("Start argument not in valid range")
if not start <= end <= count:
raise RuntimeError("End argument not in valid range")
logging.info("Working range: from {} to {}".format(start, end))
total = end - start
return start, end, total
def main():
"""Main function"""
args = arg_parse()
args.access_rights = ["wall", "audio", "friends", "notes", "video", "docs"]
args.token = auth(args) if args.token is None else args.token
if args.token is None:
raise RuntimeError("Access token not found")
if 'wall' in args.mode:
#determine posts count
(response, json_stuff) = call_api("wall.get", [("owner_id", args.id), ("count", 1), ("offset", 0)], args)
count = response[0]
logging.info("Total posts: {}".format(count))
print("Wall dowload start")
args.wall_start, args.wall_end, total = ranges(args.wall_start, args.wall_end, count)
counter = 0.0 # float for %
post_parser = PostParser(args.directory, str(args.id), args)
for x in xrange(args.wall_start, args.wall_end):
if args.verbose and counter % 10 == 0:
print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
(post, json_stuff) = call_api("wall.get", [("owner_id", args.id), ("count", 1), ("offset", x)], args)
process_post(("wall post", x), post, post_parser, json_stuff)
counter += 1
if args.verbose:
print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
if 'audio' in args.mode:
#determine audio count
(response, json_stuff) = call_api("audio.getCount", [("oid", args.id)], args)
count = response
logging.info("Total audio tracks: {}".format(count))
print("Audio dowload start")
args.audio_start, args.audio_end, total = ranges(args.audio_start, args.audio_end, count)
counter = 0.0 # float for %
#audio_dir = os.path.join(str(args.id), 'audio')
audio_dir = str(args.id)
post_parser = PostParser(args.directory, audio_dir, args)
id_param = "uid" if args.id > 0 else "gid"
args.id *= -1 if args.id < 0 else 1
for x in xrange(args.audio_start, args.audio_end):
if args.verbose and counter % 10 == 0:
print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
(audio, json_stuff) = call_api("audio.get", [(id_param, args.id), ("count", 1), ("offset", x)], args)
process_audio(("audiotrack", x), audio, post_parser, json_stuff)
counter += 1
if args.verbose:
print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
if 'video' in args.mode:
raise NotImplementedError("Video mode is not written yet, sorry :(")
if 'notes' in args.mode:
raise NotImplementedError("Notes mode is not written yet, sorry :(")
if 'docs' in args.mode:
# get ALL docs
(response, json_stuff) = call_api("docs.get", [("oid", args.id)], args)
count = response[0]
data = response[1:]
logging.info("Total documents: {}".format(count))
print("Wall dowload start")
args.docs_start, args.docs_end, total = ranges(args.docs_start, args.docs_end, count)
counter = 0.0 # float for %
docs_dir = str(args.id)
post_parser = PostParser(args.directory, docs_dir, args)
data = data[args.docs_start:args.docs_end]
num = args.docs_start
for x in data:
if args.verbose and counter % 10 == 0:
print("\nDone: {:.2%} ({})".format(counter / total, int(counter)))
process_doc(("document", num), x, post_parser, json_stuff)
counter += 1
num += 1
if args.verbose:
print("\nDone: {:.2%} ({})".format(float(total) / total, int(total)))
if __name__ == '__main__':
logging.basicConfig(format=u"""%(filename).6s : %(lineno)4d #%(levelname)8s [%(asctime)s] %(message)s""",
level=logging.DEBUG,
filename=u'report.log')
ok = False
try:
logging.info("Start")
main()
logging.info("End")
ok = True
print("")
except KeyboardInterrupt:
logging.critical("Interrupted by keystroke")
print "\nWhy, cruel world?.."
finally:
if not ok:
logging.critical("Fail")