-
Notifications
You must be signed in to change notification settings - Fork 3
/
card_name_to_vector.py
57 lines (36 loc) · 1.48 KB
/
card_name_to_vector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import re
with open('CARDS.json') as cards_file:
cards = json.load(cards_file)
def clean_text(text):
return re.sub(r'[.,()]', '', text).casefold()
def get_vocab():
words = map(
lambda card:
list(map(clean_text, card['Description'].split())) +
list(map(clean_text, card['Description (Upgraded)'].split())), cards)
flattened_words = [word for sublist in words for word in sublist]
return list(sorted(set(flattened_words)))
def get_card(card_name, upgraded=False):
card = next(filter(lambda card: card_name.casefold() == card['Name'].casefold(), cards))
cost = card['Cost']
if cost == 'X' or cost == 'Unplayable':
cost = 0
elif type(cost) != int:
cost = int(re.sub(r'[()]', '', card['Cost']).split()[1] if upgraded else
re.sub(r'[()]', '', card['Cost']).split()[0])
if upgraded and card['Description (Upgraded)']:
text = card['Description (Upgraded)']
else:
text = card['Description']
text = re.sub(r'[0-9]+ \([0-9]+\)', lambda match:
(re.sub(r'[()]', '', match.group(0)).split()[1] if upgraded else
re.sub(r'[()]', '', match.group(0)).split()[0]), text)
return cost, text
def card_name_to_vector(card_name):
upgraded = False
if card_name[-2:] == '+1':
card_name = card_name[:-2]
upgraded = True
cost, text = get_card(card_name, upgraded)
return [cost] + [text.split().count(word) for word in get_vocab()]