-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_pages.py
77 lines (56 loc) · 2.14 KB
/
parse_pages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import csv
import json
import requests
from constants import PARTIES
from datetime import date, timedelta
SEARCH_MAP = {
"50+": "50P",
"CU": "ChristenUnie",
"GL": "Groenlinks",
"PvdD": "Partij voor de Dieren",
}
def get_spending_report(party, cursor=None):
"""Retrieve Facebook page ids linked to parties from the Facebook Ad library spending report."""
page_ids = {}
url = (
"https://www.facebook.com/ads/library/report/async/advertiser_data/"
f"?report_ds={(date.today() - timedelta(days=2)).strftime('%Y-%m-%d')}&"
"country=NL"
"&time_preset=lifelong"
"&sort_column=spend"
"&component_id=advertiser_table"
f"&q={party}"
)
if cursor is not None:
url += f"&encrypted_forward_cursor={cursor}"
print(url)
r = requests.post(url, data={"__a": 1})
payload = json.loads(r.text[len("for (;;);") :])["payload"]
for advertiser in payload["advertisers"]:
if advertiser["advertiserPageID"] == 0:
continue
if party == "SP" and "SP" not in advertiser["advertiserPage"] and "SP" not in advertiser["disclaimer"]:
continue
page_ids[str(advertiser["advertiserPageID"])] = advertiser["advertiserPage"]
if payload["advertiserCursors"]["encryptedForwardCursor"] is not None:
page_ids.update(get_spending_report(party, payload["advertiserCursors"]["encryptedForwardCursor"]))
return page_ids
party_pages = {}
for p in PARTIES:
print(p)
if p in SEARCH_MAP:
party_pages[p] = get_spending_report(SEARCH_MAP[p])
else:
party_pages[p] = get_spending_report(p)
with open("../data/facebook_page_ids.csv", "w", newline="") as h_file:
writer = csv.DictWriter(h_file, ["Party", "Page Name", "Page ID"], quoting=csv.QUOTE_ALL)
writer.writeheader()
for party in sorted(party_pages.keys()):
for page_id, page_name in sorted(party_pages[party].items(), key=lambda e: e[1]):
writer.writerow(
{
"Party": party,
"Page Name": page_name,
"Page ID": page_id,
}
)