-
Notifications
You must be signed in to change notification settings - Fork 4
/
classify.py
executable file
·143 lines (130 loc) · 5.33 KB
/
classify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/python
'''
Created on May 28, 2011
@author: Carsten Witzke
'''
import sys
import getopt
from de.staticline.classification.svmlinear import DualCoordinateDescent
from de.staticline.classification.dummys import Always1Predictor
from de.staticline.classification.svm import SMO_Keerthi
import os
from de.staticline.tools.libsvmtools import LibsvmFileImporter
from de.staticline.kernels.kernels import Polynomial, Linear, RBF
def main(argv):
## get arguments
try:
opts, args = getopt.getopt(argv, 'hvc:k:',
['help','verbose','classifier=','complexity=','accuracy=','kernel=','degree=','gamma=','training-file=','test-file='])
except getopt.GetoptError, error:
print str(error)
usage()
sys.exit(2)
## handle arguments
classifier = None
kernel = None
trainingFile = None
testFile = None
verbose = False
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
if opt in ('-v', '--verbose'):
verbose = True
# -------------- CLASSIFIER -----------------
if opt in ('-c', '--classifier'):
if arg == 'dummy': # -------- dummy --
classifier = Always1Predictor()
elif arg == 'svm-cd': # -------- svm coordinate descent --
c=1
a=1e-10
for opt2, arg2 in opts:
if opt2 == '--complexity':
c = arg2
elif opt2 == '--accuracy':
a = arg2
classifier = DualCoordinateDescent(accuracy=a, complexity=c, verbose=verbose)
elif arg == 'svm-smo-keerthi': # -------- SMO (Keerthi) --
c=1
a=1e-10
for opt2, arg2 in opts:
if opt2 == '--complexity':
c = arg2
elif opt2 == '--accuracy':
a = arg2
classifier = SMO_Keerthi(accuracy=a, complexity=c, verbose=verbose)
else:
print 'Sorry, this classifier is currently not implemented :('
sys.exit()
# -------------- KERNEL -----------------
if opt in ('-k', '--kernel'):
if arg == 'linear':
kernel = Linear()
elif arg == 'poly':
# search parameters
for opt2, arg2 in opts:
if opt2 == '--degree':
kernel = Polynomial(arg2)
kernel = Polynomial()
elif arg == 'rbf':
# search parameters
for opt2, arg2 in opts:
if opt2 == '--gamma':
kernel = RBF(arg2)
kernel = RBF()
else:
print 'unknown kernel {kernel}'.format(kernel=arg)
kernel = None
# -------------- TRAINING FILE -----------------
if opt == '--training-file':
if os.path.exists(arg):
trainingFile = arg
else:
print 'It seems that the training file you specified does not exist. Please check path.'
sys.exit()
# -------------- TEST FILE -----------------
if opt == '--test-file':
if os.path.exists(arg):
testFile = arg
else:
print 'It seems that the test file you specified does not exist. Please check path.'
sys.exit()
## process input
if classifier == None:
print 'No classifier specified.'
usage()
sys.exit()
if trainingFile == None:
print 'No training file specified.'
usage()
sys.exit()
if testFile == None:
print 'No test file specified.'
usage()
sys.exit()
training = LibsvmFileImporter(trainingFile, binary=True).get_dataSet()
testing = LibsvmFileImporter(testFile, binary=True).get_dataSet()
#start classification - TODO: implement report and validation
if classifier.__class__ == DualCoordinateDescent().__class__:
classifier.set_kernel(kernel)
if verbose: print classifier
classifier.train(training.get_features(), training.get_targets())
elif classifier.__class__ == SMO_Keerthi().__class__:
classifier.set_kernel(kernel)
if verbose: print classifier
classifier.train(training.get_features(), training.get_targets())
print '# support vectors:',classifier.get_num_support_vectors()
def usage(): # not 100% UNIX style... don't care atm
print '''usage: {file} [options]
-h, --help display this usage information
-c, --classifier=svm-cd|svm-smo-keerthi|dummy select a classifier
--complexity=VALUE set the complexity VALUE for the classifier
-k, --kernel=linear|poly|rbf select a kernel
--degree=VALUE set the degree VALUE of a poly-Kernel
--gamma=VALUE set the gamma VALUE for a rbf-Kernel
--training-file=FILE read training data from FILE
--test-file=FILE evaluate on FILE
'''.format(file=__file__)
if __name__ == '__main__':
main(sys.argv[1:])