This repository has been archived by the owner on Oct 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 306
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
decoupled encoders cfg for mixed speech model
- Loading branch information
1 parent
c385c55
commit 00b3fba
Showing
6 changed files
with
169 additions
and
117 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# @package _group_ | ||
|
||
# model type. One of [hf_bert, pytext_bert, fairseq_roberta] | ||
encoder_model_type: fairseq_roberta | ||
|
||
# this is only used by HF | ||
pretrained_model_cfg: roberta-base | ||
|
||
# Some encoders need to be initialized from a file | ||
pretrained_file: /private/home/vladk/data/fairseq_checkpoints/roberta.base/ | ||
|
||
# Extra linear layer on top of standard bert/roberta encoder | ||
projection_dim: 0 | ||
|
||
# Max length of the encoder input sequence | ||
sequence_length: 256 | ||
|
||
dropout: 0.1 | ||
|
||
# whether to fix (don't update) context encoder during training or not | ||
fix_ctx_encoder: False | ||
|
||
# if False, the model won't load pre-trained BERT weights | ||
pretrained: True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,45 @@ | ||
|
||
# @package _group_ | ||
|
||
# model type. One of [mixed_hf_bert_wav2vec, mixed_hf_bert_hubert] | ||
# CHANGE THIS TO for a different encoder! | ||
encoder_model_type: mixed_hf_bert_wav2vec | ||
# encoder_model_type: mixed_hf_bert_hubert | ||
|
||
# HuggingFace's config name for model initialization | ||
pretrained_model_cfg: bert-base-uncased | ||
encoder_model_type: mixed_audio | ||
|
||
# Some encoders need to be initialized from a file | ||
pretrained_file: | ||
|
||
# Which layer representation to use | ||
output_layer: | ||
# ------------ QUERY ENCODER ------------ | ||
q_encoder_type: # hf-wav2vec fairseq-wav2vec or fairseq-hubert | ||
|
||
# Extra linear layer on top of standard bert/roberta encoder | ||
projection_dim: 0 | ||
|
||
# Max length of the encoder input sequence | ||
sequence_length: 256 | ||
# HF only params | ||
q_wav2vec_model_cfg: #facebook/wav2vec2-base-960h | ||
|
||
dropout: 0.1 | ||
# fairseq only params | ||
q_wav2vec_cp_file: #/checkpoint/vladk/speechqa/wav2vec_small_960h.pt | ||
q_wav2vec_apply_mask: True | ||
q_output_layer: # Which layer representation to use | ||
|
||
# whether to fix (don't update) context encoder during training or not | ||
fix_ctx_encoder: False | ||
q_projection_dim: 768 # Extra linear layer on top of pre-trained encoder | ||
q_dropout: 0.1 | ||
q_use_activation: False | ||
q_max_audio_t: 300 | ||
q_audio_encoder_lr_factor: 0 | ||
|
||
# if False, the model won't load pre-trained BERT weights | ||
pretrained: True | ||
# ------------ CTX ENCODER ------------ | ||
ctx_encoder_type: # hf-bert or fairseq-roberta | ||
|
||
# fairseq only params | ||
ctx_pretrained_file: # /private/home/vladk/data/fairseq_checkpoints/roberta.base/ | ||
|
||
# HF params | ||
pretrained_wav2vec_model_cfg: | ||
#facebook/wav2vec2-base-960h | ||
ctx_model_cfg: bert-base-uncased # roberta-base | ||
ctx_projection_dim: 0 # Extra linear layer on top of pre-trained encoder | ||
ctx_sequence_length: 256 # Max length of the encoder input sequence | ||
ctx_dropout: 0.1 | ||
ctx_pretrained: True # if False, the model won't load pre-trained BERT weights | ||
|
||
wav2_vec_extra_proj_dim: 768 # TODO: make a common param | ||
wav2vec_dropout: 0.1 | ||
|
||
# fairseq params | ||
# CHANGE THIS TO for a different encoder! | ||
wav2vec_cp_file: /checkpoint/vladk/speechqa/wav2vec_small.pt | ||
# non finetuned | ||
# wav2vec_cp_file: /checkpoint/vladk/speechqa/wav2vec_small.pt | ||
# -------------- COMMON ------------------- | ||
|
||
wav2vec_apply_mask: True | ||
# whether to fix (don't update) context encoder during training or not | ||
fix_ctx_encoder: False | ||
|
||
#TODO: move to train config group? | ||
optimizer: hf-adam # fairseq-adam | ||
|
||
# wav2vec common params | ||
wav2vec_max_audio_t: 300 | ||
wav2vec_use_activation: False | ||
|
||
#TODO: move to train cfg group | ||
audio_encoder_lr_factor: 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.