-
Notifications
You must be signed in to change notification settings - Fork 1
/
IgEPred
executable file
·189 lines (168 loc) · 6.59 KB
/
IgEPred
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/usr/bin/perl
###################################################################################
# Author: Saravanan Vijayakumar (brsaran@gmail.com) #
# Affiliation: CAS in Crystallography and Biophysics, University of Madras, India #
# Date: 18-05-2016 #
###################################################################################
use strict;
use Cwd qw(abs_path);
use lib abs_path().'/dependency/module/'; #setting module dependnecy path ! Do not change unless you are sure !
use brsaran::DDE; #requires DDE module developed in this study
##########################
my $path2WEKA = abs_path().'/dependency/'; #change only if weka.jar is in some other location;
#########################
####### DO not change anyhting beyond this line, unless if you are sure ###############
###Argument Check~~~~~~~~~~~~
my ($i, $mode, $inFile, $outFile,$Thresh,$MODEL);
for($i=0;$i<@ARGV;$i++){
if($ARGV[$i] eq "-m"){
$mode = $ARGV[$i+1];
$mode=~s/\s//g;
if(($mode ne 'pep') && ($mode ne 'pro')){ print "-m $mode invalid option! use 'pro' or 'pep' Exiting !\n"; exit;}
}
if($ARGV[$i] eq "-i"){
$inFile = $ARGV[$i+1];
$inFile=~s/\s//g;
unless(open(INFILE,"$inFile")){die "Invalid Input file or file doesn't Exist ! Exiting";}
}
if($ARGV[$i] eq "-o"){
$outFile = $ARGV[$i+1];
$outFile=~s/\s//g;
unless(open(OUTFILE,">$outFile")){die "Invalid Output file name or No permission to write the file ! Exiting";}
#close(OUTFILE);
}
if($ARGV[$i] eq "-M"){
$MODEL = $ARGV[$i+1];
$MODEL=~s/\s//g;
if(($MODEL ne 'RF') && ($MODEL ne 'SVM')){ print "-M $MODEL invalid option! use 'RF' or 'SVM' Exiting !#\n"; exit;}
}
if($ARGV[$i] eq "-t"){
$Thresh = $ARGV[$i+1];
$Thresh=~s/\s//g;
if(($Thresh!~m/\d\.\d+/g)){ print "-t:($Thresh) should be greater than 0 and less than 1! Exiting !\n"; exit;}
}
}
if($inFile eq ''){print "-i: cannot be null; Input file missing ! Exiting !\n"; `rm $outFile`; exit;}
if($mode eq ''){print "-m: cannot be null ! use 'pro' or 'pep' ! Exiting !\n"; `rm $outFile`; exit;}
if($outFile eq ''){print "\n-o: empty ! saving results to file LBEEP_out\n"; $outFile = "LBEEP_out";unless(open(OUTFILE,">$outFile")){die "Invalid Output file name or No permission to write the file ! Exiting";}}
if($MODEL eq ''){print "-M: model set to RF\n";$MODEL = 'RF';}
if($Thresh eq ''){print "-t: Threshold set to 0.5\n"; $Thresh = 0.5;}
#
#Argument Check Ends~~~~~~~~~~~~~~
##################### PEPTIDE MODE BEGINS #####################################
my (@FHead,@PEP,@PRO,@Result,$Pep_DDE,$j,$line1,$line2,$temp,$Arff_header,$Pro_Out,$Pep_Out,$FinalOutPep); #Variable decclaration
$FinalOutPep = 'Sno,ID|Peptide|,Score'."\n"; #Header for CSV
$j=0; #Variable for counting the peptides
my($Pep_Out_1,$Pep_Out_2,$Pep_Out_3);
if($mode eq "pep"){ #If peptide mode is opted
$FinalOutPep = 'Sno,ID|Peptide|,Score'."\n";
print "\nProcessing Input...........";
while(($line1 = <INFILE>) && ($line2 = <INFILE>)){
$line1=~s/\s//g;
$FHead[$j] = $line1; #Store Header
$line2=~s/\s//g;
$PEP[$j]=$line2; #Store Peptide
#if((length($PEP[$j]))>15){print "\nInput peptide $FHead[$j] has length greater than 15. May obtain undesired result ! ";}
if((length($PEP[$j]))<6){print "\nInput peptide $FHead[$j] has length less than 6 ! May obtain undesired result !";}
$Pep_DDE.= DDE($line2).",P\n"; #Compute DDE
$j++;
}
close (INFILE);
$temp = './temp/'.generate_random_string(6).'.arff';
for($i=1;$i<=400;$i++){
$Arff_header .= "\@attribute $i numeric\n";
}
$Arff_header = "\@relation Epitope\n".$Arff_header.'@attribute LABEL {P,N}'."\n"."\@data\n";
open(TEMP,">$temp");
print TEMP $Arff_header.$Pep_DDE;
close(TEMP);
print "Done\n";
$Pep_DDE=""; #Release memory
print "Prediction.................";
if($MODEL eq 'RF'){
###Model_1
$Pep_Out_1 = IgE_Predict_pep($temp,$path2WEKA,'1'); #Make prediction
$Pep_Out_1=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_1=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
###Model_2
$Pep_Out_2 = IgE_Predict_pep($temp,$path2WEKA,'2'); #Make prediction
$Pep_Out_2=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_2=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
###Model_3
$Pep_Out_3 = IgE_Predict_pep($temp,$path2WEKA,'3'); #Make prediction
$Pep_Out_3=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_3=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
##########
}else{
###Model_1
$Pep_Out_1 = SVM_IgE_Predict_pep($temp,$path2WEKA,'1'); #Make prediction
$Pep_Out_1=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_1=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
###Model_2
$Pep_Out_2 = SVM_IgE_Predict_pep($temp,$path2WEKA,'2'); #Make prediction
$Pep_Out_2=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_2=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
###Model_3
$Pep_Out_3 = SVM_IgE_Predict_pep($temp,$path2WEKA,'3'); #Make prediction
$Pep_Out_3=~s/^(?:.*\n){1,5}//; #Remove line 1-5 of weka out
$Pep_Out_3=~s/(?:.*\n){1,1}\z//; #Remove last line of weka out
##########
}
`rm $temp`; #remove ARFF file
print "Done\n";
##########Result Organization################
print "Generating Output..............";
my @model_1 = &Res_parser($Pep_Out_1);
my @model_2 = &Res_parser($Pep_Out_2);
my @model_3 = &Res_parser($Pep_Out_3);
my @FResult = &Vote_maker(\@model_1,\@model_2,\@model_3);
my @temp_sorter;
foreach $_(@FHead){
$_=~m/.*\>(.*)\-.*/g;
push @temp_sorter, $1;
}
#########Sort all results according to positions
my @sortedIndices = sort { $temp_sorter[$a] <=> $temp_sorter[$b] } 0..$#FResult;
@FHead = @FHead[ @sortedIndices ];
@PEP = @PEP[ @sortedIndices ];
@FResult = @FResult[ @sortedIndices ];
################################################
for($j=0;$j<@FResult;$j++){
$FResult[$j]=~s/\s//g;
if($FResult[$j]>=$Thresh){
$FinalOutPep.= ($j+1).",".$FHead[$j].'|'.$PEP[$j].'|,'. $FResult[$j]."\n";
}
}
print OUTFILE $FinalOutPep;
close(OUTFILE);
print "Done\n";
}
###### PEPTIDE MODE ENDS ###########################
sub Res_parser{
my $PEPx = $_[0];
undef @Result;
my @Result = split("\n",$PEPx);
my $j;
for($j=0;$j<@Result;$j++){
if($Result[$j]=~m/\+/g){
$Result[$j]=substr($Result[$j],-6);
$Result[$j]= 1-$Result[$j];
}else{
$Result[$j]=substr($Result[$j],-6);
}
$Result[$j]=~s/\s//g;
# $FinalOutPep.= ($j+1).",".$FHead[$j].'|'.$PEP[$j].'|,'. $Result[$j]."\n";
}
return @Result;
}
sub Vote_maker{
my @aref_1 = @{$_[0]};
my @aref_2 = @{$_[1]};
my @aref_3 = @{$_[2]};
my $j;
my @Sum_AV;
for($j=0;$j<@aref_1;$j++){
$Sum_AV[$j] = ($aref_1[$j]+$aref_2[$j]+$aref_3[$j])/3;
}
return @Sum_AV;
}