Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ottokart committed Feb 9, 2017
1 parent c8d133b commit ad2fff0
Showing 1 changed file with 7 additions and 10 deletions.
17 changes: 7 additions & 10 deletions data.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,12 @@

DATA_PATH = "../data"

# path to text file in the format:
# word1 0.123 0.123 ... 0.123
# word2 0.123 0.123 ... 0.123 etc...
# e.g. glove.6B.50d.txt
PRETRAINED_EMBEDDINGS_PATH = None

END = "</S>"
UNK = "<UNK>"

@@ -238,21 +244,12 @@ def create_dev_test_train_split_and_vocabulary(root_path, build_vocabulary, trai
else:
sys.exit("The path to stage1 source data directory with txt files is missing")

if len(sys.argv) > 2:
# path to text file in the format:
# word1 0.123 0.123 ... 0.123
# word2 0.123 0.123 ... 0.123 etc...
# e.g. glove.6B.50d.txt
pretrained_embeddings_path = sys.argv[2]
else:
pretrained_embeddings_path = None

if not os.path.exists(DATA_PATH):
os.makedirs(DATA_PATH)
else:
sys.exit("Data already exists")

create_dev_test_train_split_and_vocabulary(path, True, TRAIN_FILE, DEV_FILE, TEST_FILE, pretrained_embeddings_path)
create_dev_test_train_split_and_vocabulary(path, True, TRAIN_FILE, DEV_FILE, TEST_FILE, PRETRAINED_EMBEDDINGS_PATH)

# Stage 2
if len(sys.argv) > 2:

0 comments on commit ad2fff0

Please sign in to comment.