-
Notifications
You must be signed in to change notification settings - Fork 0
/
reference_extractor.py
47 lines (39 loc) · 1.59 KB
/
reference_extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import numpy as np
import librosa
import crepe
SONGS_PATH = "./songs"
EMBED_PATH = "./embeddings"
TARGET_SR = 16000
def extract_crepe_embedding(audio, sr, model_capacity='tiny'):
# Confidence gating ensures we only average frames with enough pitch certainty
time_vals, frequency, confidence, activation = crepe.predict(
audio, sr, viterbi=True, model_capacity=model_capacity, step_size=10
)
valid_indices = (confidence >= 0.3)
if np.any(valid_indices):
emb = np.mean(activation[valid_indices], axis=0)
else:
emb = np.mean(activation, axis=0)
return emb
def extract_hpcp(audio, sr):
chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_chroma=12)
return np.mean(chroma, axis=1)
def process_songs():
if not os.path.exists(EMBED_PATH):
os.makedirs(EMBED_PATH)
for file in os.listdir(SONGS_PATH):
if file.endswith(".wav"):
file_path = os.path.join(SONGS_PATH, file)
print(f"[reference_extractor] Creating HPCP+CREPE embedding for {file}...")
audio, sr = librosa.load(file_path, sr=TARGET_SR, mono=True)
c_emb = extract_crepe_embedding(audio, sr, 'tiny')
h_emb = extract_hpcp(audio, sr)
combined_emb = np.concatenate([c_emb, h_emb], axis=0)
base_name = os.path.splitext(file)[0]
out_path = os.path.join(EMBED_PATH, f"{base_name}_embed.npy")
np.save(out_path, combined_emb)
print(f" Combined shape: {combined_emb.shape}")
print(f" Saved to {out_path}")
if __name__ == "__main__":
process_songs()