changes to .gitignore

lkev · Apr 1, 2020 · d3b748d · d3b748d
1 parent 25d4ec0
commit d3b748d
Show file tree

Hide file tree

Showing 8 changed files with 56 additions and 48 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+Source Data/
diff --git a/__pycache__/winfault.cpython-35.pyc b/__pycache__/winfault.cpython-35.pyc
diff --git a/__pycache__/winfault.cpython-36.pyc b/__pycache__/winfault.cpython-36.pyc
diff --git a/adaboost.py → scripts/adaboost.py b/adaboost.py → scripts/adaboost.py
@@ -44,15 +44,22 @@
             'CS101__Control_cabinet_temp',
             'CS101__Transformer_temp']
 
+# This gets all the data EXCEPT the faults listed. Labels as nf for "no-fault"
 # This gets all the data EXCEPT the faults listed. Labels as nf for "no-fault"
 nf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
-                    'fault_case_1', True, 600, 600, [62, 9, 80])
+                    'fault_case_1', True, 600, 600, [62, 9, 228, 80])
 # feeding fault
 ff = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
                     'fault_case_1', False, 600, 600, 62)
+# mains failure fault
+# mf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
+#                     'fault_case_1', False, 600, 600, 60)
 # generator heating fault
 gf = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
                     'fault_case_1', False, 600, 600, 9)
+# aircooling fault
+af = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
+                    'fault_case_1', False, 600, 600, 228)
 # excitation fault
 ef = Turbine.filter(scada, Turbine.status_data_wec, "Main_Status",
                     'fault_case_1', False, 600, 600, 80)
@@ -63,81 +70,80 @@
 print("=============================================================", "\n")
 
 # select the faults to include.
-faults = [ff, ef, gf]
+faults = [ff, gf, af, ef]
 
 # label and split into train, test and balanced training data
-xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \
+X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \
     Turbine.get_test_train_data(features, faults, nf)
 
 # labels for confusion matrix
-labels = ['no-fault', 'feeding fault', 'excitation fault', 'generator fault']
+labels = ['no-fault', 'feeding fault', 'generator fault',
+          'aircooling fault', 'excitation fault']
 
-print("========================================================")
-print("------Building models using balanced training data------")
-print("========================================================")
+print("Building models using balanced training data")
 
 # train and test the SVM
 
 parameter_space_bal = {
     'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4],
-    'C': [0.01, .1, 1, 10, 100, 1000], 'class_weight': [None]}
+    'C': [0.01, .1, 1, 10, 100, 1000]}
 
 print("Building balanced SVM")
 SVM_bal = RandomizedSearchCV(SVC(C=1), parameter_space_bal, cv=10,
-        scoring='recall_weighted', iid=True)
+                             scoring='recall_weighted', iid=True)
 print("fitting balanced SVM")
-SVM_bal.fit(xbaltrain, ybaltrain)
+SVM_bal.fit(X_train_bal, y_train_bal)
 
 print("Hyperparameters for balanced SVM found:")
 print(SVM_bal.best_params_)
 
 print("getting predictions for balanced SVM")
-y_pred_svm_bal = SVM_bal.predict(xtest)
+y_pred_svm_bal = SVM_bal.predict(X_test)
 
 print("\n\n results for SVM")
-winfault.clf_scoring(ytest, y_pred_svm_bal, labels)
+winfault.clf_scoring(y_test, y_pred_svm_bal, labels)
 
-print("========================================================")
-print("------Building models using Imbalanced training data------")
-print("========================================================")
-parameter_space = {
-    'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4],
-    'C': [0.01, .1, 1, 10, 100, 1000],
-    'class_weight': [
-        {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']}
+# print("========================================================")
+# print("------Building models using Imbalanced training data------")
+# print("========================================================")
+# parameter_space = {
+#     'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4],
+#     'C': [0.01, .1, 1, 10, 100, 1000],
+#     'class_weight': [
+#         {0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']}
 
-print("Building Imbalanced SVM")
-SVM = RandomizedSearchCV(SVC(C=1), parameter_space, cv=10,
-                         scoring='recall_weighted', iid=True)
-print("fitting Imbalanced SVM")
-SVM.fit(xtrain, ytrain)
+# print("Building Imbalanced SVM")
+# SVM = RandomizedSearchCV(SVC(C=1), parameter_space, cv=10,
+#                          scoring='recall_weighted', iid=True)
+# print("fitting Imbalanced SVM")
+# SVM.fit(X_train, y_train)
 
-print("Hyperparameters for Imbalanced SVM found:")
-print(SVM.best_params_)
+# print("Hyperparameters for Imbalanced SVM found:")
+# print(SVM.best_params_)
 
-print("getting predictions for Imbalanced SVM")
-y_pred_svm = SVM.predict(xtest)
+# print("getting predictions for Imbalanced SVM")
+# y_pred_svm = SVM.predict(X_test)
 
-print("\n\n results for SVM")
-winfault.clf_scoring(ytest, y_pred_svm, labels)
+# print("\n\n results for SVM")
+# winfault.clf_scoring(y_test, y_pred_svm, labels)
 
 # train and test adaboost svm
 
 print("Building AdaBoost Classifier")
 adaboost = sklearn.ensemble.AdaBoostClassifier(
-    base_estimator=SVC(**SVM.best_params_), algorithm='SAMME')
+    base_estimator=SVC(**SVM_bal.best_params_), algorithm='SAMME')
 
 print("fitting AdaBoost Classifier")
-adaboost.fit(xbaltrain, ybaltrain)
+adaboost.fit(X_train_bal, y_train_bal)
 
 print("getting predictions")
-y_pred_ada = adaboost.predict(xtest)
+y_pred_ada = adaboost.predict(X_test)
 
 print("\n\nResults for AdaBoosted SVM:")
-winfault.clf_scoring(ytest, y_pred_ada, labels)
+winfault.clf_scoring(y_test, y_pred_ada, labels)
 
 # train and test svm
 # clf_bal, bgg_bal = winfault.svm_class_and_score(
-#     xbaltrain, ybaltrain, xtest, ytest, labels,
+#     X_train_bal, y_train_bal, X_test, y_test, labels,
 #    parameter_space=parameter_space_bal, bagged=True, score='recall_weighted',
 #     search_type=GridSearchCV)
diff --git a/fault_vs_all_example.py → scripts/fault_vs_all_example.py b/fault_vs_all_example.py → scripts/fault_vs_all_example.py
@@ -69,9 +69,9 @@
 #                     'fault_case_1', True, 600,600,[62])
 
 # label and split into train, test and balanced training data
-xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \
+X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \
     Turbine.get_test_train_data(features, faults, nf)
 # labels for confusion matrix
 labels = ['no-fault', 'feeding fault', 'excitation fault', 'generator fault']
 # train and test svm
-winfault.svm_class_and_score(xbaltrain, ybaltrain, xtest, ytest, labels)
+winfault.svm_class_and_score(X_train_bal, y_train_bal, X_test, y_test, labels)
diff --git a/results_fault_diagnosis_1_v_all.py → scripts/results_fault_diagnosis_1_v_all.py b/results_fault_diagnosis_1_v_all.py → scripts/results_fault_diagnosis_1_v_all.py
@@ -64,7 +64,7 @@
 faults = [ff, ef, gf]
 
 # label and split into train, test and balanced training data
-xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \
+X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \
     Turbine.get_test_train_data(features, faults, nf)
 
 # labels for confusion matrix
@@ -81,7 +81,7 @@
 
 # train and test svm
 clf_bal, bgg_bal = winfault.svm_class_and_score(
-    xbaltrain, ybaltrain, xtest, ytest, labels,
+    X_train_bal, y_train_bal, X_test, y_test, labels,
     parameter_space=parameter_space_bal, bagged=True, score='recall_weighted',
     search_type=GridSearchCV)
 
@@ -97,7 +97,7 @@
 
 # train and test svm
 clf, bgg = winfault.svm_class_and_score(
-    xtrain, ytrain, xtest, ytest, labels,
+    X_train, y_train, X_test, y_test, labels,
     parameter_space=parameter_space, bagged=True, score='recall_weighted',
     search_type=RandomizedSearchCV)
 
@@ -111,7 +111,7 @@
 # af = np.append(ff, ef)
 # af = np.append(af, gf)
 
-# xtrain, xtest, ytrain, ytest, xbaltrain, ybaltrain = \
+# X_train, X_test, y_train, y_test, X_train_bal, y_train_bal = \
 #     Turbine.get_test_train_data(features, [af], nf)
 
 # # labels for confusion matrix
@@ -128,7 +128,7 @@
 
 # train and test svm
 # clf_bal, bgg_bal = winfault.svm_class_and_score(
-#     xbaltrain, ybaltrain, xtest, ytest, labels,
+#     X_train_bal, y_train_bal, X_test, y_test, labels,
 #     parameter_space=parameter_space_bal, bagged=True, score='recall_weighted',
 #     search_type=GridSearchCV)
 
@@ -144,6 +144,6 @@
 
 # # train and test svm
 # clf, bgg = winfault.svm_class_and_score(
-#     xtrain, ytrain, xtest, ytest, labels,
+#     X_train, y_train, X_test, y_test, labels,
 #     parameter_space=parameter_space, bagged=True, score='recall_weighted',
 #     search_type=RandomizedSearchCV)
diff --git a/script.py → scripts/script.py b/script.py → scripts/script.py
diff --git a/winfault.py b/winfault.py
@@ -157,7 +157,7 @@ def __import_data(self):
             time = data_file['Time']
             for i in range(0, len(time)):
                 t = dt.datetime.strptime(time[i], "%d/%m/%Y %H:%M:%S")
-                t = (t - dt.datetime.fromtimestamp(3600)).total_seconds()
+                t = (t - dt.datetime.utcfromtimestamp(3600)).total_seconds()
                 time[i] = t
 
         # convert Unix timestamp string to float (for some reason this
@@ -266,10 +266,11 @@ def filter(
             - If 'fault_case_3' The function gets timestamps for the
               times between `time_delta_1` and `time_delta_2` before a
               certain fault starts. It returns indices of `scada_data`
-              which fall between these time stamps, but ONLY IF no other
+              which fall between these time stamps in the same way as
+              'fault_case_2', BUT ONLY IF no other
               instance of the same fault occured during this period.
               Therefore, it contains only data which led up to the
-              fault. Used for fault prediction purposes.
+              fault.
         return_inverse: boolean, optional (default=False)
             If True, the function will return the indices of filtered
             SCADA data which DON'T correspond to what this function
@@ -297,7 +298,7 @@ def filter(
             - If `filter_type` = 'fault_case_1', AFTER faulty operation
               ends from which to include `scada_data` indices
             - If `filter_type` = 'fault_case_2' or 'fault_case_3', this
-              refers to the time AFTER faulty operation begins from
+              refers to the time BEFORE faulty operation begins from
               which to stop including `scada_data` indices. Must be less
               than `time_delta_1`