# BioBot_FDS_03: SVM_Model
## Deliverable_03: Implementing a Support Vector Machine (SVM)-like Classifier model
Author/code developer: Yan Bello. 14/11/2018. As part of the Master in Artificial Intelligence (UNIR). 
This file/code is part of the development and exploration/experimentation on a Fall Detection System (FDS). 

---


In the following sections, we used this dataset: 
SisFall: A Fall and Movement Dataset. 
Created by: A. Sucerquia, J.D. López, J.F. Vargas-Bonilla
SISTEMIC, Faculty of Engineering, Universidad de Antiquia UDEA.
Detailed information about this dataset can be found in this website: http://sistemic.udea.edu.co/en/investigacion/proyectos/english-falls/.
Reference paper: Sucerquia A, López JD, Vargas-Bonilla JF. SisFall: A Fall and Movement Dataset. Sensors (Basel). 2017;17(1):198. Published 2017 Jan 20. doi:10.3390/s17010198

---



In [0]:
# Preliminary step 0. We need to establish/select our working folders. First, ensure the previous dataset files are available.
# The code below is prepared to work with two options: local drive or mounting a Google Drive for Colab
# Select the appropriate configuration for your environment by commenting/un-commenting the following lines:

# To work with Google Colab and Google Drive: 
from google.colab import drive 
drive.mount('/content/gdrive')
FILE_DIRECTORY = "gdrive/My Drive/Colab Notebooks/"
SisFall_ALL_DIRECTORY = FILE_DIRECTORY + "SisFall_dataset_ALL/"

# To work with a local drive, uncomment these line:
# FILE_DIRECTORY = os.getcwd() + "\\"
# SisFall_ALL_DIRECTORY = FILE_DIRECTORY + "SisFall_dataset_ALL\\"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## 3.1 Load a dataframe with prepared info from ADL/Falls dataset

In [0]:
# We work with the prepared file Unified_ADL_Falls, which is based on the previous dataset
my_data_file_name = FILE_DIRECTORY + "Unified_ADL_Falls.txt"

import pandas as pd

# Creamos un data frame y cargamos los datos del fichero
df_ADL_Falls = pd.DataFrame(pd.read_csv(my_data_file_name, sep = ','))

df_ADL_Falls.drop('0', axis=1, inplace=True)

df_only_ADLs = df_ADL_Falls[df_ADL_Falls.Fall_ADL == "D"]
df_only_Falls = df_ADL_Falls[df_ADL_Falls.Fall_ADL == "F"]

# mostramos el data frame
print(df_only_ADLs.tail())
print(df_only_Falls.tail())

 Act_Type Age_Cat Fall_ADL File kurtosis_S1_X max_S1_X \
2697 D19 SE D D19_SE06_R01.txt 8.727956 190 
2698 D19 SE D D19_SE06_R02.txt 10.096698 86 
2699 D19 SE D D19_SE06_R03.txt 9.540330 259 
2700 D19 SE D D19_SE06_R04.txt 20.191198 393 
2701 D19 SE D D19_SE06_R05.txt 9.022231 230 

 mean_S1_X min_S1_X range_S1_X skewness_S1_X ... \
2697 20.204659 -195 385 -1.745292 ... 
2698 -33.031614 -324 410 -1.976282 ... 
2699 8.276206 -154 413 0.398760 ... 
2700 9.514143 -255 648 0.993127 ... 
2701 9.554077 -164 394 -0.149056 ... 

 range_S1_N_VER skewness_S1_N_VER std_S1_N_VER var_S1_N_VER corr_HV \
2697 1.531165 2.009740 0.189131 0.035770 0.711721 
2698 1.721676 2.839037 0.226830 0.051452 0.865803 
2699 1.651294 2.986164 0.175513 0.030805 0.747053 
2700 2.525731 4.938333 0.230644 0.053197 0.787008 
2701 2.110528 4.118197 0.203275 0.041321 0.796733 

 corr_NH corr_NV corr_XY corr_XZ corr_YZ 
2697 0.996825 0.739432 -0.309286 -0.145757 0.702846 
2698 0.995884 0.886033 0.752237 0.536177 0.744701 
2

### Shuffle and set up training and test samples for ADL/Falls

In [0]:
import random
import math
from numpy.random import permutation

# Randomly shuffle the index of each set (ADLs and Falls)
# -------------------------------------------------------
# First we prepare the sets of ADLs
random_indices = permutation(df_only_ADLs.index)
# Use a test-split (of 30% of the items)
test_split = math.floor(len(df_only_ADLs)*0.3)
# Test set with 30% of items
df_only_ADLs_test = df_only_ADLs.loc[random_indices[0:test_split]]
# Train set with 70% of the items.
df_only_ADLs_train = df_only_ADLs.loc[random_indices[test_split:]]


# -------------------------------------------------------
# Now we prepare the sets of Falls
random_indices = permutation(df_only_Falls.index)
# Use a test-split (of 30% of the items)
test_split = math.floor(len(df_only_Falls)*0.3)
# Test set with 30% of items
df_only_Falls_test = df_only_Falls.loc[random_indices[0:test_split]]
# Train set with 70% of the items.
df_only_Falls_train = df_only_Falls.loc[random_indices[test_split:]]



print("Total ADL: " + str(len(df_only_ADLs)))
print("Total Falls: " + str(len(df_only_Falls)))
print("GRAND Total: " + str(len(df_only_Falls)+len(df_only_ADLs)))
print("---------------------------------------")
print("Train Falls: "+ str(len(df_only_Falls_train)))
print("Train ADL: "+ str(len(df_only_ADLs_train)))
print("Train TOTAL: "+ str(len(df_only_ADLs_train)+len(df_only_Falls_train)))
print("---------------------------------------")
print("Test Falls: "+ str(len(df_only_Falls_test)))
print("Test ADL: "+ str(len(df_only_ADLs_test)))
print("Test TOTAL: "+ str(len(df_only_ADLs_test)+len(df_only_Falls_test)))

Total ADL: 2702
Total Falls: 1798
GRAND Total: 4500
---------------------------------------
Train Falls: 1259
Train ADL: 1892
Train TOTAL: 3151
---------------------------------------
Test Falls: 539
Test ADL: 810
Test TOTAL: 1349


In [0]:
# Prepare dataset with Test examplars

frames = [df_only_Falls_test, df_only_ADLs_test]
df_ADL_Falls_test = pd.concat(frames)
print("Test ADLs: "+ str(len(df_only_ADLs_test)))
print("Test Falls: "+ str(len(df_only_Falls_test)))
print("Test ALL: "+ str(len(df_ADL_Falls_test)))

print(df_ADL_Falls_test.head())
print(df_ADL_Falls_test.tail())


Test ADLs: 810
Test Falls: 539
Test ALL: 1349
 Act_Type Age_Cat Fall_ADL File kurtosis_S1_X max_S1_X \
3477 F07 SA F F07_SA12_R02.txt 28.158868 1730 
4496 F15 SE F F15_SE06_R02.txt 14.164169 128 
3031 F03 SA F F03_SA19_R01.txt 88.093106 191 
4384 F15 SA F F15_SA01_R05.txt 64.593266 2619 
3719 F09 SA F F09_SA12_R04.txt 2.035594 19 

 mean_S1_X min_S1_X range_S1_X skewness_S1_X ... \
3477 63.324459 -510 2240 3.887497 ... 
4496 -155.198003 -911 1039 -3.036554 ... 
3031 -146.509151 -4096 4287 -8.141281 ... 
4384 -159.004992 -4053 6672 -5.225071 ... 
3719 -147.504160 -835 854 -1.118020 ... 

 range_S1_N_VER skewness_S1_N_VER std_S1_N_VER var_S1_N_VER corr_HV \
3477 6.699628 4.023581 0.718885 0.516795 0.846591 
4496 3.290032 4.064083 0.373755 0.139693 -0.055799 
3031 16.388624 9.088421 1.221342 1.491676 0.690857 
4384 21.553022 8.303136 1.562807 2.442366 0.820121 
3719 3.256540 1.456533 0.498986 0.248987 0.088200 

 corr_NH corr_NV corr_XY corr_XZ corr_YZ 
3477 0.950524 0.919384 -0.668190 -0

In [0]:
# Prepare dataset with Train examplars

frames = [df_only_Falls_train, df_only_ADLs_train]
df_ADL_Falls_train = pd.concat(frames)
print("train ADLs: "+ str(len(df_only_ADLs_train)))
print("train Falls: "+ str(len(df_only_Falls_train)))
print("train ALL: "+ str(len(df_ADL_Falls_train)))

print(df_ADL_Falls_train.head())
print(df_ADL_Falls_train.tail())


train ADLs: 1892
train Falls: 1259
train ALL: 3151
 Act_Type Age_Cat Fall_ADL File kurtosis_S1_X max_S1_X \
4149 F13 SA F F13_SA02_R05.txt 4.165379 53 
3575 F08 SA F F08_SA07_R05.txt 4.422467 205 
4172 F13 SA F F13_SA07_R03.txt 2.065487 221 
3303 F06 SA F F06_SA01_R03.txt 12.473522 893 
3400 F06 SA F F06_SA20_R05.txt 10.054013 1396 

 mean_S1_X min_S1_X range_S1_X skewness_S1_X ... \
4149 -147.003328 -918 971 -1.356286 ... 
3575 -143.532446 -989 1194 -1.547840 ... 
4172 -81.241265 -463 684 -0.420039 ... 
3303 129.995008 -88 981 2.952118 ... 
3400 123.337770 -565 1961 2.090652 ... 

 range_S1_N_VER skewness_S1_N_VER std_S1_N_VER var_S1_N_VER corr_HV \
4149 3.616831 1.759811 0.505234 0.255262 -0.053625 
3575 4.529287 3.027514 0.653364 0.426884 0.325869 
4172 3.010339 2.310889 0.410026 0.168122 0.889051 
3303 3.467718 2.645097 0.484077 0.234330 0.475571 
3400 5.562395 2.550464 0.684755 0.468889 0.503541 

 corr_NH corr_NV corr_XY corr_XZ corr_YZ 
4149 0.340929 0.825944 -0.536538 -0.287334

## 3.2 Define and train a Support Vector Machine (SVM)-like Classifier
Below we use LinearSVC from sklearn.svm, experimenting with various parameter settings. For clarity and simplicity here only one model configuration is included.

### Define and train the SVC model

In [0]:
# Here we use LinearSVC
from sklearn.svm import LinearSVC

# define the classifier
clf = LinearSVC(random_state=0, tol=1e-8, dual=False)

# prepare/get the columns
X= df_ADL_Falls_train[x_columns]
y= train_y.ravel()

# Fit/train classifier
clf.fit(X, y)

# Preview coeficients
print(clf.coef_)
print(clf.intercept_)

df_ADL_Falls_test[x_columns]

# Run and print predictions
predictions_SVC = clf.predict(df_ADL_Falls_test[x_columns])
print(predictions_SVC)


[[ 5.50460153e-03 -1.26392023e-04 -1.67728666e-03 4.10116184e-04
 -5.36508199e-04 2.91396916e-02 2.03349594e-02 -2.70047974e-05
 -6.06957812e-02 9.72320642e-04 3.92629309e-03 8.51681232e-04
 1.20639383e-04 -1.03292929e-01 2.37057189e-02 -4.55185941e-05
 -2.28879402e-02 -1.93256595e-04 -2.46620935e-03 1.09700239e-03
 -1.29025898e-03 -2.21147668e-03 2.55894454e-03 6.40749465e-05
 -1.12220492e-01 4.75388036e-01 -3.04926438e+00 -2.10432657e-02
 4.96431302e-01 7.16062790e-01 8.47875950e-01 -1.94739601e-01
 7.16921140e-02 8.91059980e-02 -1.45040029e+00 3.57430599e-01
 -2.68324601e-01 -3.33100845e-02 -1.90200989e+00 -1.57840043e+00
 4.54536040e-02 7.86236548e-02 5.67000966e-02 2.62011769e-01
 -1.83388114e-01 -2.64780902e-01 1.46377141e+00 8.61942609e-01
 -3.31388649e-01 7.99789866e-01 7.21179231e-01 3.68531588e-01
 2.28679221e-01 3.05568580e-01]]
[-1.81243313]
['F' 'F' 'F' ... 'D' 'D' 'D']


#### Confusion matrix for SVC/SVM Classifier

In [0]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(df_ADL_Falls_test[y_column], predictions_SVC, labels=["D", "F"])
print("Confusion Matrix:")
print("-----------------")
print(cm)
print("-----------------")
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Confusion Matrix (Normalized):")
print("-----------------------------")
print(cm_norm)
print("-----------------------------")

Confusion Matrix:
-----------------
[[806 4]
 [ 3 536]]
-----------------
Confusion Matrix (Normalized):
-----------------------------
[[0.99506173 0.00493827]
 [0.00556586 0.99443414]]
-----------------------------


#### Sensitivity, Specificity, Precision and Accuracy

In [0]:
# calculations of measurements of performance

n_TP = cm[1,1]
n_FP = cm[1,0]
n_TN = cm[0,0]
n_FN = cm[0,1]

# SENSITIVITY = TP / (TP + FN)
svc_Sensitivity = n_TP / (n_TP + n_FN)
print("svc_Sensitivity = "+ str(svc_Sensitivity))

# SPECIFICITY = TN / (FP + TN)
svc_Specificity = n_TN / (n_FP + n_TN)
print("svc_Specificity = "+ str(svc_Specificity))

# Precision = TP / (TP + FP)
svc_Precision = n_TP / (n_TP + n_FP)
print("svc_Precision = "+ str(svc_Precision))

# Accuracy = (TP + TN) / (TP + FP + TN + FN)
svc_Accuracy = (n_TP + n_TN) / (n_TP + n_FP + n_TN + n_FN)
print("svc_Accuracy = "+ str(svc_Accuracy))

svc_Sensitivity = 0.9925925925925926
svc_Specificity = 0.9962917181705809
svc_Precision = 0.9944341372912802
svc_Accuracy = 0.9948109710896961
