Fix misc bugs

jonnor · Mar 2, 2019 · a2e91af · a2e91af
1 parent 5de1de2
commit a2e91af
Show file tree

Hide file tree

Showing 6 changed files with 55 additions and 14 deletions.
diff --git a/TODO.md b/TODO.md
@@ -21,8 +21,17 @@ Figure out why SB-CNN baseline score is not reproduced
 - Check windowing functions, esp last frame and padding
 - Check data augmentations working
 ! `sbcnn16k32aug` did (little bit) worse than `sbcnn16k30`
+
 Are we picking the models in a good way?
 
+! With windowed prediction, out-of-fold versus in-fold is quite different. Suggests overfitting?
+Maybe picking models based on windowed performance can overcome this. As done in SB-CNN
+acc 0.6712485681557846
+acc 0.8524774774774775
+
+! when running same model in test, reqults vary by several % points..
+
+
 - train.py: Write history.csv file incrementally, for each epoch 
 
 More experiments

diff --git a/braindump.md b/braindump.md
@@ -178,6 +178,13 @@ Across most folds.
 SB-CNN 128mel 3 sec 16kHz 50% vote overlap on the other hand was very similar, as expected.
 
 
+### Effects of different overlap in voting
+
+Quick test on SB-CNN16k 30mels, fold0, validation
+0.1, acc 0.6666666666666666
+0.5, acc 0.6746849942726232
+0.9, acc 0.6758304696449027
+
 ### STM32Ai
 
 arm_rfft_fast_init_f32 called for every column
@@ -192,6 +199,8 @@ https://github.com/ARM-software/CMSIS_5/issues/217
 
 
 
+
+
 ### Simpler classes
 
 Attempt at narrower taxonomy.

diff --git a/microesc/features.py b/microesc/features.py
@@ -130,7 +130,7 @@ def load_sample(sample, settings, feature_dir, window_frames,
     path = feature_path(sample, out_folder=folder, augmentation=aug)
     mels = numpy.load(path)['arr_0']
     assert mels.shape[0] == n_mels, mels.shape
-    
+
     if start_time is None:
         # Sample a window in time randomly
         min_start = max(0, mels.shape[1]-window_frames)
@@ -145,12 +145,15 @@ def load_sample(sample, settings, feature_dir, window_frames,
     mels = mels[:, start:end]
 
     # Normalize the window
-    if normalize == 'max':
-        mels = librosa.core.power_to_db(mels, top_db=80, ref=numpy.max)
-    elif normalize == 'meanstd':
-        mels = librosa.core.power_to_db(mels, top_db=80)
-        mels -= numpy.mean(mels)
-        mels /= numpy.std(mels)
+    if mels.shape[1] > 0:
+        if normalize == 'max':
+            mels = librosa.core.power_to_db(mels, top_db=80, ref=numpy.max)
+        elif normalize == 'meanstd':
+            mels = librosa.core.power_to_db(mels, top_db=80)
+            mels -= numpy.mean(mels)
+            mels /= ( numpy.std(mels) + 1e-9)
+    else:
+        print('Warning: Sample {} with start {} has 0 length'.format(sample, start_time))
 
     # Pad to standard size
     if window_frames is None:

diff --git a/microesc/jobs.py b/microesc/jobs.py
@@ -105,7 +105,7 @@ def parse(args):
     a('--bucket', type=str, default='jonnor-micro-esc',
         help='GCS bucket to write to. Default: %(default)s')
 
-    a('--image', type=str, default='gcr.io/masterthesis-231919/base:20',
+    a('--image', type=str, default='gcr.io/masterthesis-231919/base:21',
         help='Docker image to use')
 
     parsed = parser.parse_args(args)

diff --git a/microesc/report.py b/microesc/report.py
@@ -55,6 +55,13 @@ def parse(args):
 
     return parsed
 
+def print_accuracies(accs, title):
+
+    m = numpy.mean(accs)
+    s = numpy.std(accs)
+    print('{} | mean: {:.3f}, std: {:.3f}'.format(title, m, s))
+    [ print("{:.3f}".format(v), end=',') for v in accs ]
+    print('\n')
 
 def main():
 
@@ -67,16 +74,18 @@ def main():
     classnames = urbansound8k.classnames
     val_fig = plot_confusion(100*numpy.mean(val, axis=0), classnames, normalize=True)
     test_fig = plot_confusion(100*numpy.mean(test, axis=0), classnames, normalize=True) 
+    val_fig.savefig('val.cm.png')
+    test_fig.savefig('test.cm.png')
 
     c_acc = cm_class_accuracy(numpy.mean(val, axis=0))
-    print('test_acc', numpy.mean(c_acc), c_acc) 
+    print_accuracies(c_acc, 'class_acc')
 
     folds_acc = [ cm_accuracy(val[f]) for f in range(0, len(val)) ]
+    print_accuracies(folds_acc, 'val_acc')
 
-    print('val_acc', numpy.mean(folds_acc), folds_acc)
+    tests_acc = [ cm_accuracy(test[f]) for f in range(0, len(test)) ]
+    print_accuracies(tests_acc, 'test_acc') 
 
-    val_fig.savefig('val.cm.png')
-    test_fig.savefig('test.cm.png')
 
     print('wrote')
 

diff --git a/microesc/test.py b/microesc/test.py
@@ -47,7 +47,7 @@ def predict_voted(settings, model, samples, loader, window_frames, method='mean'
             out.append(p)
         elif method == 'majority':
             votes = numpy.argmax(predictions, axis=1)
-            p = numpy.bincount(votes) / len(votes)
+            p = numpy.bincount(votes, minlength=10) / len(votes)
             out.append(p)
 
     ret = numpy.stack(out)
@@ -113,6 +113,8 @@ def score(model, data):
         p = predictor(model, data)
         y_pred = numpy.argmax(p, axis=1)
         # other metrics can be derived from confusion matrix
+        acc = sklearn.metrics.accuracy_score(y_true, y_pred)
+        print('acc', acc)
         confusion = sklearn.metrics.confusion_matrix(y_true, y_pred)
         return confusion
 
@@ -194,8 +196,17 @@ def main():
     overlap = settings['voting_overlap']
     settings = features.settings(settings)
 
+
+    all_folds = pandas.concat([f[0] for f in folds])
+    train_files = set(all_folds.slice_file_name.unique())
+    test_files = set(test.slice_file_name.unique())
+    assert len(train_files) > 7000
+    assert len(test_files) > 700
+    common_files = train_files.intersection(test_files)
+    assert len(common_files) == 0
+
     def load_sample(sample):
-        return features.load_sample(sample, settings,
+        return features.load_sample(sample, settings, start_time=sample.start,
                     window_frames=frames, feature_dir=args.features_dir)
 
     def predict(model, data):