From 7d21d0856e330bb52c89adffb1ba188d90f24971 Mon Sep 17 00:00:00 2001
From: Wentao Bao <wtbao2018@gmail.com>
Date: Wed, 3 Mar 2021 10:34:09 -0500
Subject: [PATCH] update

---
 configs/recognition/csn/inference_csn_dnn.py  |  4 +-
 configs/recognition/csn/inference_csn_enn.py  |  4 +-
 experiments/csn/run_reliability_evaluation.sh | 35 ++++++++++++++
 experiments/evaluate_calibration.py           | 41 +++++++++-------
 experiments/i3d/run_reliability_evaluation.sh | 47 +++++++++++++++++++
 .../slowfast/run_reliability_evaluation.sh    | 35 ++++++++++++++
 .../run_reliability_evaluation.sh             | 47 +++++++++++++++++++
 experiments/tsm/run_reliability_evaluation.sh | 35 ++++++++++++++
 8 files changed, 226 insertions(+), 22 deletions(-)
 create mode 100644 experiments/csn/run_reliability_evaluation.sh
 create mode 100644 experiments/i3d/run_reliability_evaluation.sh
 create mode 100644 experiments/slowfast/run_reliability_evaluation.sh
 create mode 100644 experiments/tpn_slowonly/run_reliability_evaluation.sh
 create mode 100644 experiments/tsm/run_reliability_evaluation.sh

diff --git a/configs/recognition/csn/inference_csn_dnn.py b/configs/recognition/csn/inference_csn_dnn.py
index 3f04f9f8..e442efb0 100644
--- a/configs/recognition/csn/inference_csn_dnn.py
+++ b/configs/recognition/csn/inference_csn_dnn.py
@@ -43,8 +43,8 @@
     dict(type='ToTensor', keys=['imgs'])
 ]
 data = dict(
-    videos_per_gpu=8,
-    workers_per_gpu=4,
+    videos_per_gpu=1,
+    workers_per_gpu=2,
     test=dict(
         type=dataset_type,
         ann_file=None,
diff --git a/configs/recognition/csn/inference_csn_enn.py b/configs/recognition/csn/inference_csn_enn.py
index c42f56b6..439e7558 100644
--- a/configs/recognition/csn/inference_csn_enn.py
+++ b/configs/recognition/csn/inference_csn_enn.py
@@ -49,8 +49,8 @@
     dict(type='ToTensor', keys=['imgs'])
 ]
 data = dict(
-    videos_per_gpu=8,
-    workers_per_gpu=4,
+    videos_per_gpu=1,
+    workers_per_gpu=2,
     test=dict(
         type=dataset_type,
         ann_file=None,
diff --git a/experiments/csn/run_reliability_evaluation.sh b/experiments/csn/run_reliability_evaluation.sh
new file mode 100644
index 00000000..23745fe4
--- /dev/null
+++ b/experiments/csn/run_reliability_evaluation.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+export CUDA_HOME='/usr/local/cuda'
+
+pwd_dir=$pwd
+cd ../../
+
+source activate mmaction
+
+OOD_DATASET=$1
+MODEL=$2
+RESULT_DIR='experiments/csn/results'
+
+case ${MODEL} in
+    dnn)
+    # DNN with Dropout model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/CSN_DNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/CSN_DNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    edlnokl_avuc_debias)
+    # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/CSN_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/CSN_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability
+    ;;
+    *)
+    echo "Invalid model: "${MODEL}
+    exit
+    ;;
+esac
+
+
+cd $pwd_dir
+echo "Experiments finished!"
\ No newline at end of file
diff --git a/experiments/evaluate_calibration.py b/experiments/evaluate_calibration.py
index 4159d546..fb43a265 100644
--- a/experiments/evaluate_calibration.py
+++ b/experiments/evaluate_calibration.py
@@ -7,10 +7,6 @@ def eval_calibration(predictions, confidences, labels, M=15):
     """
     M: number of bins for confidence scores
     """
-    # confidences = (confidences - np.min(confidences)) / (np.max(confidences) - np.min(confidences) + 1e-6)
-    # confidences = confidences / np.max(confidences)
-    # confidences = np.exp(confidences)
-    # confidences /= np.sum(confidences)
     num_Bm = np.zeros((M,), dtype=np.int32)
     accs = np.zeros((M,), dtype=np.float32)
     confs = np.zeros((M,), dtype=np.float32)
@@ -45,43 +41,52 @@ def callback(axes):
     parser = argparse.ArgumentParser(description='MMAction2 test')
     # model config
     parser.add_argument('--ood_result', help='the result file of ood detection')
-    parser.add_argument('--M', type=int, default=10, help='The number of bins')
-    parser.add_argument('--save_file', help='the image file path of generated calibration figure')
+    parser.add_argument('--M', type=int, default=15, help='The number of bins')
+    parser.add_argument('--save_prefix', help='the image file path of generated calibration figure')
     args = parser.parse_args()
 
     results = np.load(args.ood_result, allow_pickle=True)
-    ind_confidences = results['ind_conf']
-    ood_confidences = results['ood_conf']
     ind_uncertainties = results['ind_unctt']  # (N1,)
     ood_uncertainties = results['ood_unctt']  # (N2,)
     ind_results = results['ind_pred']  # (N1,)
     ood_results = results['ood_pred']  # (N2,)
     ind_labels = results['ind_label']
     ood_labels = results['ood_label']
+    if 'ind_conf' not in results:
+        ind_confidences = 1 - ind_uncertainties
+        ood_confidences = 1 - ood_uncertainties
+    else:
+        ind_confidences = results['ind_conf']
+        ood_confidences = results['ood_conf']
 
     # result path
-    result_path = os.path.dirname(args.save_file)
+    result_path = os.path.dirname(args.save_prefix)
     if not os.path.exists(result_path):
         os.makedirs(result_path)
 
-    # evaluation on in-distribution data
-    accs, confs, num_Bm, conf_intervals = eval_calibration(ind_results, ind_confidences, ind_labels, M=args.M)
+    accs, confs, num_Bm, conf_intervals = eval_calibration(ind_results, 1-ind_uncertainties, ind_labels, M=args.M)
 
     # compute Expected Calibration Error (ECE)
     ece = np.sum(np.abs(accs - confs) * num_Bm / np.sum(num_Bm))
-    print('The ECE result: %.3lf'%(ece))
+    print('The IND ECE result: %.3lf'%(ece))
 
     # plot the ECE figure
-    fig, ax = plt.subplots(figsize=(5,5))
+    fig, ax = plt.subplots(figsize=(4,4))
+    plt.rcParams["font.family"] = "Arial"  # Times New Roman
+    fontsize = 15
     plt.bar(conf_intervals, accs, width=1/args.M, linewidth=1, edgecolor='k', align='edge', label='Outputs')
     plt.bar(conf_intervals, np.maximum(0, conf_intervals - accs), bottom=accs, color='y', width=1/args.M, linewidth=1, edgecolor='k', align='edge', label='Gap')
-    plt.text(0.75, 0.1, 'ECE=%.4f'%(ece), backgroundcolor='y')
+    plt.text(0.1, 0.6, 'ECE=%.4f'%(ece), fontsize=fontsize)
     add_identity(ax, color='r', ls='--')
     plt.xlim(0, 1)
     plt.ylim(0, 1)
-    plt.xlabel('confidence')
-    plt.ylabel('accuracy')
-    plt.legend()
+    plt.xticks(fontsize=fontsize)
+    plt.yticks(fontsize=fontsize)
+    plt.xlabel('confidence', fontsize=fontsize)
+    plt.ylabel('accuracy', fontsize=fontsize)
+    plt.legend(fontsize=fontsize)
+    ax.set_aspect('equal', 'box')
     plt.tight_layout()
-    plt.savefig(args.save_file)
+    plt.savefig(args.save_prefix + '_ind.png')
+    plt.savefig(args.save_prefix + '_ind.pdf')
 
diff --git a/experiments/i3d/run_reliability_evaluation.sh b/experiments/i3d/run_reliability_evaluation.sh
new file mode 100644
index 00000000..85ef69da
--- /dev/null
+++ b/experiments/i3d/run_reliability_evaluation.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+export CUDA_HOME='/usr/local/cuda'
+
+pwd_dir=$pwd
+cd ../../
+
+source activate mmaction
+
+OOD_DATASET=$1
+MODEL=$2
+RESULT_DIR='experiments/i3d/results'
+
+case ${MODEL} in
+    dnn)
+    # DNN with Dropout model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/I3D_DNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/I3D_DNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    bnn)
+    # Evidential Deep Learning (without KL divergence loss term)
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/I3D_BNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/I3D_BNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    edlnokl)
+    # Evidential Deep Learning (without KL divergence loss term)
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/I3D_EDLNoKL_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/I3D_EDLNoKL_EDL_${OOD_DATASET}_reliability
+    ;;
+    edlnokl_avuc_debias)
+    # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/I3D_EDLNoKLAvUCCED_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/I3D_EDLNoKLAvUCCED_EDL_${OOD_DATASET}_reliability
+    ;;
+    *)
+    echo "Invalid model: "${MODEL}
+    exit
+    ;;
+esac
+
+
+cd $pwd_dir
+echo "Experiments finished!"
\ No newline at end of file
diff --git a/experiments/slowfast/run_reliability_evaluation.sh b/experiments/slowfast/run_reliability_evaluation.sh
new file mode 100644
index 00000000..3c431041
--- /dev/null
+++ b/experiments/slowfast/run_reliability_evaluation.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+export CUDA_HOME='/usr/local/cuda'
+
+pwd_dir=$pwd
+cd ../../
+
+source activate mmaction
+
+OOD_DATASET=$1
+MODEL=$2
+RESULT_DIR='experiments/slowfast/results'
+
+case ${MODEL} in
+    dnn)
+    # DNN with Dropout model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/SlowFast_DNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/SlowFast_DNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    edlnokl_avuc_debias)
+    # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/SlowFast_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/SlowFast_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability
+    ;;
+    *)
+    echo "Invalid model: "${MODEL}
+    exit
+    ;;
+esac
+
+
+cd $pwd_dir
+echo "Experiments finished!"
\ No newline at end of file
diff --git a/experiments/tpn_slowonly/run_reliability_evaluation.sh b/experiments/tpn_slowonly/run_reliability_evaluation.sh
new file mode 100644
index 00000000..7cbb03b3
--- /dev/null
+++ b/experiments/tpn_slowonly/run_reliability_evaluation.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+export CUDA_HOME='/usr/local/cuda'
+
+pwd_dir=$pwd
+cd ../../
+
+source activate mmaction
+
+OOD_DATASET=$1
+MODEL=$2
+RESULT_DIR='experiments/tpn_slowonly/results'
+
+case ${MODEL} in
+    dnn)
+    # DNN with Dropout model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TPN_SlowOnly_Dropout_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_Dropout_BALD_${OOD_DATASET}_reliability
+    ;;
+    bnn)
+    # BNN model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TPN_SlowOnly_BNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_BNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    edlnokl)
+    # Evidential Deep Learning (without KL divergence loss term)
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TPN_SlowOnly_EDLlogNoKL_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_EDLlogNoKL_EDL_${OOD_DATASET}_reliability
+    ;;
+    edlnokl_avuc)
+    # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TPN_SlowOnly_EDLlogNoKLAvUC_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_EDLlogNoKLAvUC_EDL_${OOD_DATASET}_reliability
+    ;;
+    *)
+    echo "Invalid model: "${MODEL}
+    exit
+    ;;
+esac
+
+
+cd $pwd_dir
+echo "Experiments finished!"
\ No newline at end of file
diff --git a/experiments/tsm/run_reliability_evaluation.sh b/experiments/tsm/run_reliability_evaluation.sh
new file mode 100644
index 00000000..42723d49
--- /dev/null
+++ b/experiments/tsm/run_reliability_evaluation.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+export CUDA_HOME='/usr/local/cuda'
+
+pwd_dir=$pwd
+cd ../../
+
+source activate mmaction
+
+OOD_DATASET=$1
+MODEL=$2
+RESULT_DIR='experiments/tsm/results'
+
+case ${MODEL} in
+    dnn)
+    # DNN with Dropout model
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TSM_DNN_BALD_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TSM_DNN_BALD_${OOD_DATASET}_reliability
+    ;;
+    edlnokl_avuc_debias)
+    # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing
+    CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \
+        --ood_result ${RESULT_DIR}/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \
+        --save_prefix ${RESULT_DIR}/../results_reliability/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability
+    ;;
+    *)
+    echo "Invalid model: "${MODEL}
+    exit
+    ;;
+esac
+
+
+cd $pwd_dir
+echo "Experiments finished!"
\ No newline at end of file