From 7d21d0856e330bb52c89adffb1ba188d90f24971 Mon Sep 17 00:00:00 2001 From: Wentao Bao Date: Wed, 3 Mar 2021 10:34:09 -0500 Subject: [PATCH] update --- configs/recognition/csn/inference_csn_dnn.py | 4 +- configs/recognition/csn/inference_csn_enn.py | 4 +- experiments/csn/run_reliability_evaluation.sh | 35 ++++++++++++++ experiments/evaluate_calibration.py | 41 +++++++++------- experiments/i3d/run_reliability_evaluation.sh | 47 +++++++++++++++++++ .../slowfast/run_reliability_evaluation.sh | 35 ++++++++++++++ .../run_reliability_evaluation.sh | 47 +++++++++++++++++++ experiments/tsm/run_reliability_evaluation.sh | 35 ++++++++++++++ 8 files changed, 226 insertions(+), 22 deletions(-) create mode 100644 experiments/csn/run_reliability_evaluation.sh create mode 100644 experiments/i3d/run_reliability_evaluation.sh create mode 100644 experiments/slowfast/run_reliability_evaluation.sh create mode 100644 experiments/tpn_slowonly/run_reliability_evaluation.sh create mode 100644 experiments/tsm/run_reliability_evaluation.sh diff --git a/configs/recognition/csn/inference_csn_dnn.py b/configs/recognition/csn/inference_csn_dnn.py index 3f04f9f8..e442efb0 100644 --- a/configs/recognition/csn/inference_csn_dnn.py +++ b/configs/recognition/csn/inference_csn_dnn.py @@ -43,8 +43,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - videos_per_gpu=8, - workers_per_gpu=4, + videos_per_gpu=1, + workers_per_gpu=2, test=dict( type=dataset_type, ann_file=None, diff --git a/configs/recognition/csn/inference_csn_enn.py b/configs/recognition/csn/inference_csn_enn.py index c42f56b6..439e7558 100644 --- a/configs/recognition/csn/inference_csn_enn.py +++ b/configs/recognition/csn/inference_csn_enn.py @@ -49,8 +49,8 @@ dict(type='ToTensor', keys=['imgs']) ] data = dict( - videos_per_gpu=8, - workers_per_gpu=4, + videos_per_gpu=1, + workers_per_gpu=2, test=dict( type=dataset_type, ann_file=None, diff --git a/experiments/csn/run_reliability_evaluation.sh b/experiments/csn/run_reliability_evaluation.sh new file mode 100644 index 00000000..23745fe4 --- /dev/null +++ b/experiments/csn/run_reliability_evaluation.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +export CUDA_HOME='/usr/local/cuda' + +pwd_dir=$pwd +cd ../../ + +source activate mmaction + +OOD_DATASET=$1 +MODEL=$2 +RESULT_DIR='experiments/csn/results' + +case ${MODEL} in + dnn) + # DNN with Dropout model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/CSN_DNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/CSN_DNN_BALD_${OOD_DATASET}_reliability + ;; + edlnokl_avuc_debias) + # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/CSN_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/CSN_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability + ;; + *) + echo "Invalid model: "${MODEL} + exit + ;; +esac + + +cd $pwd_dir +echo "Experiments finished!" \ No newline at end of file diff --git a/experiments/evaluate_calibration.py b/experiments/evaluate_calibration.py index 4159d546..fb43a265 100644 --- a/experiments/evaluate_calibration.py +++ b/experiments/evaluate_calibration.py @@ -7,10 +7,6 @@ def eval_calibration(predictions, confidences, labels, M=15): """ M: number of bins for confidence scores """ - # confidences = (confidences - np.min(confidences)) / (np.max(confidences) - np.min(confidences) + 1e-6) - # confidences = confidences / np.max(confidences) - # confidences = np.exp(confidences) - # confidences /= np.sum(confidences) num_Bm = np.zeros((M,), dtype=np.int32) accs = np.zeros((M,), dtype=np.float32) confs = np.zeros((M,), dtype=np.float32) @@ -45,43 +41,52 @@ def callback(axes): parser = argparse.ArgumentParser(description='MMAction2 test') # model config parser.add_argument('--ood_result', help='the result file of ood detection') - parser.add_argument('--M', type=int, default=10, help='The number of bins') - parser.add_argument('--save_file', help='the image file path of generated calibration figure') + parser.add_argument('--M', type=int, default=15, help='The number of bins') + parser.add_argument('--save_prefix', help='the image file path of generated calibration figure') args = parser.parse_args() results = np.load(args.ood_result, allow_pickle=True) - ind_confidences = results['ind_conf'] - ood_confidences = results['ood_conf'] ind_uncertainties = results['ind_unctt'] # (N1,) ood_uncertainties = results['ood_unctt'] # (N2,) ind_results = results['ind_pred'] # (N1,) ood_results = results['ood_pred'] # (N2,) ind_labels = results['ind_label'] ood_labels = results['ood_label'] + if 'ind_conf' not in results: + ind_confidences = 1 - ind_uncertainties + ood_confidences = 1 - ood_uncertainties + else: + ind_confidences = results['ind_conf'] + ood_confidences = results['ood_conf'] # result path - result_path = os.path.dirname(args.save_file) + result_path = os.path.dirname(args.save_prefix) if not os.path.exists(result_path): os.makedirs(result_path) - # evaluation on in-distribution data - accs, confs, num_Bm, conf_intervals = eval_calibration(ind_results, ind_confidences, ind_labels, M=args.M) + accs, confs, num_Bm, conf_intervals = eval_calibration(ind_results, 1-ind_uncertainties, ind_labels, M=args.M) # compute Expected Calibration Error (ECE) ece = np.sum(np.abs(accs - confs) * num_Bm / np.sum(num_Bm)) - print('The ECE result: %.3lf'%(ece)) + print('The IND ECE result: %.3lf'%(ece)) # plot the ECE figure - fig, ax = plt.subplots(figsize=(5,5)) + fig, ax = plt.subplots(figsize=(4,4)) + plt.rcParams["font.family"] = "Arial" # Times New Roman + fontsize = 15 plt.bar(conf_intervals, accs, width=1/args.M, linewidth=1, edgecolor='k', align='edge', label='Outputs') plt.bar(conf_intervals, np.maximum(0, conf_intervals - accs), bottom=accs, color='y', width=1/args.M, linewidth=1, edgecolor='k', align='edge', label='Gap') - plt.text(0.75, 0.1, 'ECE=%.4f'%(ece), backgroundcolor='y') + plt.text(0.1, 0.6, 'ECE=%.4f'%(ece), fontsize=fontsize) add_identity(ax, color='r', ls='--') plt.xlim(0, 1) plt.ylim(0, 1) - plt.xlabel('confidence') - plt.ylabel('accuracy') - plt.legend() + plt.xticks(fontsize=fontsize) + plt.yticks(fontsize=fontsize) + plt.xlabel('confidence', fontsize=fontsize) + plt.ylabel('accuracy', fontsize=fontsize) + plt.legend(fontsize=fontsize) + ax.set_aspect('equal', 'box') plt.tight_layout() - plt.savefig(args.save_file) + plt.savefig(args.save_prefix + '_ind.png') + plt.savefig(args.save_prefix + '_ind.pdf') diff --git a/experiments/i3d/run_reliability_evaluation.sh b/experiments/i3d/run_reliability_evaluation.sh new file mode 100644 index 00000000..85ef69da --- /dev/null +++ b/experiments/i3d/run_reliability_evaluation.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +export CUDA_HOME='/usr/local/cuda' + +pwd_dir=$pwd +cd ../../ + +source activate mmaction + +OOD_DATASET=$1 +MODEL=$2 +RESULT_DIR='experiments/i3d/results' + +case ${MODEL} in + dnn) + # DNN with Dropout model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/I3D_DNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/I3D_DNN_BALD_${OOD_DATASET}_reliability + ;; + bnn) + # Evidential Deep Learning (without KL divergence loss term) + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/I3D_BNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/I3D_BNN_BALD_${OOD_DATASET}_reliability + ;; + edlnokl) + # Evidential Deep Learning (without KL divergence loss term) + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/I3D_EDLNoKL_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/I3D_EDLNoKL_EDL_${OOD_DATASET}_reliability + ;; + edlnokl_avuc_debias) + # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/I3D_EDLNoKLAvUCCED_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/I3D_EDLNoKLAvUCCED_EDL_${OOD_DATASET}_reliability + ;; + *) + echo "Invalid model: "${MODEL} + exit + ;; +esac + + +cd $pwd_dir +echo "Experiments finished!" \ No newline at end of file diff --git a/experiments/slowfast/run_reliability_evaluation.sh b/experiments/slowfast/run_reliability_evaluation.sh new file mode 100644 index 00000000..3c431041 --- /dev/null +++ b/experiments/slowfast/run_reliability_evaluation.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +export CUDA_HOME='/usr/local/cuda' + +pwd_dir=$pwd +cd ../../ + +source activate mmaction + +OOD_DATASET=$1 +MODEL=$2 +RESULT_DIR='experiments/slowfast/results' + +case ${MODEL} in + dnn) + # DNN with Dropout model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/SlowFast_DNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/SlowFast_DNN_BALD_${OOD_DATASET}_reliability + ;; + edlnokl_avuc_debias) + # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/SlowFast_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/SlowFast_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability + ;; + *) + echo "Invalid model: "${MODEL} + exit + ;; +esac + + +cd $pwd_dir +echo "Experiments finished!" \ No newline at end of file diff --git a/experiments/tpn_slowonly/run_reliability_evaluation.sh b/experiments/tpn_slowonly/run_reliability_evaluation.sh new file mode 100644 index 00000000..7cbb03b3 --- /dev/null +++ b/experiments/tpn_slowonly/run_reliability_evaluation.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +export CUDA_HOME='/usr/local/cuda' + +pwd_dir=$pwd +cd ../../ + +source activate mmaction + +OOD_DATASET=$1 +MODEL=$2 +RESULT_DIR='experiments/tpn_slowonly/results' + +case ${MODEL} in + dnn) + # DNN with Dropout model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TPN_SlowOnly_Dropout_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_Dropout_BALD_${OOD_DATASET}_reliability + ;; + bnn) + # BNN model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TPN_SlowOnly_BNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_BNN_BALD_${OOD_DATASET}_reliability + ;; + edlnokl) + # Evidential Deep Learning (without KL divergence loss term) + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TPN_SlowOnly_EDLlogNoKL_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_EDLlogNoKL_EDL_${OOD_DATASET}_reliability + ;; + edlnokl_avuc) + # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TPN_SlowOnly_EDLlogNoKLAvUC_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TPN_SlowOnly_EDLlogNoKLAvUC_EDL_${OOD_DATASET}_reliability + ;; + *) + echo "Invalid model: "${MODEL} + exit + ;; +esac + + +cd $pwd_dir +echo "Experiments finished!" \ No newline at end of file diff --git a/experiments/tsm/run_reliability_evaluation.sh b/experiments/tsm/run_reliability_evaluation.sh new file mode 100644 index 00000000..42723d49 --- /dev/null +++ b/experiments/tsm/run_reliability_evaluation.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +export CUDA_HOME='/usr/local/cuda' + +pwd_dir=$pwd +cd ../../ + +source activate mmaction + +OOD_DATASET=$1 +MODEL=$2 +RESULT_DIR='experiments/tsm/results' + +case ${MODEL} in + dnn) + # DNN with Dropout model + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TSM_DNN_BALD_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TSM_DNN_BALD_${OOD_DATASET}_reliability + ;; + edlnokl_avuc_debias) + # Evidential Deep Learning (without KL divergence loss term) with AvU Calibration and Debiasing + CUDA_VISIBLE_DEVICES=${DEVICE} python experiments/evaluate_calibration.py \ + --ood_result ${RESULT_DIR}/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_result.npz \ + --save_prefix ${RESULT_DIR}/../results_reliability/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATASET}_reliability + ;; + *) + echo "Invalid model: "${MODEL} + exit + ;; +esac + + +cd $pwd_dir +echo "Experiments finished!" \ No newline at end of file