diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f969cce --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Source Data/ \ No newline at end of file diff --git a/__pycache__/winfault.cpython-35.pyc b/__pycache__/winfault.cpython-35.pyc index 5d5f766..dc93d9d 100644 Binary files a/__pycache__/winfault.cpython-35.pyc and b/__pycache__/winfault.cpython-35.pyc differ diff --git a/__pycache__/winfault.cpython-36.pyc b/__pycache__/winfault.cpython-36.pyc new file mode 100644 index 0000000..495bd6f Binary files /dev/null and b/__pycache__/winfault.cpython-36.pyc differ diff --git a/adaboost.py b/scripts/adaboost.py similarity index 61% rename from adaboost.py rename to scripts/adaboost.py index 68d38e6..974a309 100644 --- a/adaboost.py +++ b/scripts/adaboost.py @@ -44,15 +44,22 @@ 'CS101__Control_cabinet_temp', 'CS101__Transformer_temp'] +# This gets all the data EXCEPT the faults listed. Labels as nf for "no-fault" # This gets all the data EXCEPT the faults listed. Labels as nf for "no-fault" nf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", - 'fault_case_1', True, 600, 600, [62, 9, 80]) + 'fault_case_1', True, 600, 600, [62, 9, 228, 80]) # feeding fault ff = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", 'fault_case_1', False, 600, 600, 62) +# mains failure fault +# mf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", +# 'fault_case_1', False, 600, 600, 60) # generator heating fault gf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", 'fault_case_1', False, 600, 600, 9) +# aircooling fault +af = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", + 'fault_case_1', False, 600, 600, 228) # excitation fault ef = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status", 'fault_case_1', False, 600, 600, 80) @@ -63,81 +70,80 @@ print("=============================================================", "\n") # select the faults to include. -faults = [ff, ef, gf] +faults = [ff, gf, af, ef] # label and split into train, test and balanced training data -xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \ +X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \ Turbine.get_test_train_data(features, faults, nf) # labels for confusion matrix -labels = ['no-fault', 'feeding fault', 'excitation fault', 'generator fault'] +labels = ['no-fault', 'feeding fault', 'generator fault', + 'aircooling fault', 'excitation fault'] -print("========================================================") -print("------Building models using balanced training data------") -print("========================================================") +print("Building models using balanced training data") # train and test the SVM parameter_space_bal = { 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4], - 'C': [0.01, .1, 1, 10, 100, 1000], 'class_weight': [None]} + 'C': [0.01, .1, 1, 10, 100, 1000]} print("Building balanced SVM") SVM_bal = RandomizedSearchCV(SVC(C=1), parameter_space_bal, cv=10, - scoring='recall_weighted', iid=True) + scoring='recall_weighted', iid=True) print("fitting balanced SVM") -SVM_bal.fit(xbaltrain, ybaltrain) +SVM_bal.fit(X_train_bal, y_train_bal) print("Hyperparameters for balanced SVM found:") print(SVM_bal.best_params_) print("getting predictions for balanced SVM") -y_pred_svm_bal = SVM_bal.predict(xtest) +y_pred_svm_bal = SVM_bal.predict(X_test) print("\n\n results for SVM") -winfault.clf_scoring(ytest, y_pred_svm_bal, labels) +winfault.clf_scoring(y_test, y_pred_svm_bal, labels) -print("========================================================") -print("------Building models using Imbalanced training data------") -print("========================================================") -parameter_space = { - 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4], - 'C': [0.01, .1, 1, 10, 100, 1000], - 'class_weight': [ - {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']} +# print("========================================================") +# print("------Building models using Imbalanced training data------") +# print("========================================================") +# parameter_space = { +# 'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4], +# 'C': [0.01, .1, 1, 10, 100, 1000], +# 'class_weight': [ +# {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']} -print("Building Imbalanced SVM") -SVM = RandomizedSearchCV(SVC(C=1), parameter_space, cv=10, - scoring='recall_weighted', iid=True) -print("fitting Imbalanced SVM") -SVM.fit(xtrain, ytrain) +# print("Building Imbalanced SVM") +# SVM = RandomizedSearchCV(SVC(C=1), parameter_space, cv=10, +# scoring='recall_weighted', iid=True) +# print("fitting Imbalanced SVM") +# SVM.fit(X_train, y_train) -print("Hyperparameters for Imbalanced SVM found:") -print(SVM.best_params_) +# print("Hyperparameters for Imbalanced SVM found:") +# print(SVM.best_params_) -print("getting predictions for Imbalanced SVM") -y_pred_svm = SVM.predict(xtest) +# print("getting predictions for Imbalanced SVM") +# y_pred_svm = SVM.predict(X_test) -print("\n\n results for SVM") -winfault.clf_scoring(ytest, y_pred_svm, labels) +# print("\n\n results for SVM") +# winfault.clf_scoring(y_test, y_pred_svm, labels) # train and test adaboost svm print("Building AdaBoost Classifier") adaboost = sklearn.ensemble.AdaBoostClassifier( - base_estimator=SVC(**SVM.best_params_), algorithm='SAMME') + base_estimator=SVC(**SVM_bal.best_params_), algorithm='SAMME') print("fitting AdaBoost Classifier") -adaboost.fit(xbaltrain, ybaltrain) +adaboost.fit(X_train_bal, y_train_bal) print("getting predictions") -y_pred_ada = adaboost.predict(xtest) +y_pred_ada = adaboost.predict(X_test) print("\n\nResults for AdaBoosted SVM:") -winfault.clf_scoring(ytest, y_pred_ada, labels) +winfault.clf_scoring(y_test, y_pred_ada, labels) # train and test svm # clf_bal, bgg_bal = winfault.svm_class_and_score( -# xbaltrain, ybaltrain, xtest, ytest, labels, +# X_train_bal, y_train_bal, X_test, y_test, labels, # parameter_space=parameter_space_bal, bagged=True, score='recall_weighted', # search_type=GridSearchCV) diff --git a/fault_vs_all_example.py b/scripts/fault_vs_all_example.py similarity index 95% rename from fault_vs_all_example.py rename to scripts/fault_vs_all_example.py index d5fa70c..52796b4 100644 --- a/fault_vs_all_example.py +++ b/scripts/fault_vs_all_example.py @@ -69,9 +69,9 @@ # 'fault_case_1', True, 600,600,[62]) # label and split into train, test and balanced training data -xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \ +X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \ Turbine.get_test_train_data(features, faults, nf) # labels for confusion matrix labels = ['no-fault', 'feeding fault', 'excitation fault', 'generator fault'] # train and test svm -winfault.svm_class_and_score(xbaltrain, ybaltrain, xtest, ytest, labels) +winfault.svm_class_and_score(X_train_bal, y_train_bal, X_test, y_test, labels) diff --git a/results_fault_diagnosis_1_v_all.py b/scripts/results_fault_diagnosis_1_v_all.py similarity index 94% rename from results_fault_diagnosis_1_v_all.py rename to scripts/results_fault_diagnosis_1_v_all.py index 4e77ece..1a5a4fa 100644 --- a/results_fault_diagnosis_1_v_all.py +++ b/scripts/results_fault_diagnosis_1_v_all.py @@ -64,7 +64,7 @@ faults = [ff, ef, gf] # label and split into train, test and balanced training data -xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \ +X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \ Turbine.get_test_train_data(features, faults, nf) # labels for confusion matrix @@ -81,7 +81,7 @@ # train and test svm clf_bal, bgg_bal = winfault.svm_class_and_score( - xbaltrain, ybaltrain, xtest, ytest, labels, + X_train_bal, y_train_bal, X_test, y_test, labels, parameter_space=parameter_space_bal, bagged=True, score='recall_weighted', search_type=GridSearchCV) @@ -97,7 +97,7 @@ # train and test svm clf, bgg = winfault.svm_class_and_score( - xtrain, ytrain, xtest, ytest, labels, + X_train, y_train, X_test, y_test, labels, parameter_space=parameter_space, bagged=True, score='recall_weighted', search_type=RandomizedSearchCV) @@ -111,7 +111,7 @@ # af = np.append(ff, ef) # af = np.append(af, gf) -# xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \ +# X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \ # Turbine.get_test_train_data(features, [af], nf) # # labels for confusion matrix @@ -128,7 +128,7 @@ # train and test svm # clf_bal, bgg_bal = winfault.svm_class_and_score( -# xbaltrain, ybaltrain, xtest, ytest, labels, +# X_train_bal, y_train_bal, X_test, y_test, labels, # parameter_space=parameter_space_bal, bagged=True, score='recall_weighted', # search_type=GridSearchCV) @@ -144,6 +144,6 @@ # # train and test svm # clf, bgg = winfault.svm_class_and_score( -# xtrain, ytrain, xtest, ytest, labels, +# X_train, y_train, X_test, y_test, labels, # parameter_space=parameter_space, bagged=True, score='recall_weighted', # search_type=RandomizedSearchCV) diff --git a/script.py b/scripts/script.py similarity index 100% rename from script.py rename to scripts/script.py diff --git a/winfault.py b/winfault.py index 28a61be..e17b801 100644 --- a/winfault.py +++ b/winfault.py @@ -157,7 +157,7 @@ def __import_data(self): time = data_file['Time'] for i in range(0, len(time)): t = dt.datetime.strptime(time[i], "%d/%m/%Y %H:%M:%S") - t = (t - dt.datetime.fromtimestamp(3600)).total_seconds() + t = (t - dt.datetime.utcfromtimestamp(3600)).total_seconds() time[i] = t # convert Unix timestamp string to float (for some reason this @@ -266,10 +266,11 @@ def filter( - If 'fault_case_3' The function gets timestamps for the times between `time_delta_1` and `time_delta_2` before a certain fault starts. It returns indices of `scada_data` - which fall between these time stamps, but ONLY IF no other + which fall between these time stamps in the same way as + 'fault_case_2', BUT ONLY IF no other instance of the same fault occured during this period. Therefore, it contains only data which led up to the - fault. Used for fault prediction purposes. + fault. return_inverse: boolean, optional (default=False) If True, the function will return the indices of filtered SCADA data which DON'T correspond to what this function @@ -297,7 +298,7 @@ def filter( - If `filter_type` = 'fault_case_1', AFTER faulty operation ends from which to include `scada_data` indices - If `filter_type` = 'fault_case_2' or 'fault_case_3', this - refers to the time AFTER faulty operation begins from + refers to the time BEFORE faulty operation begins from which to stop including `scada_data` indices. Must be less than `time_delta_1`