-
Notifications
You must be signed in to change notification settings - Fork 19
/
add_seq2sims
executable file
·83 lines (71 loc) · 2.23 KB
/
add_seq2sims
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
my $verbose = 0;
my $in_sim = '';
my $seq_file = '';
my $out_sim = '';
my $usage = qq($0
Input: 1. m8 format blast / blat file
2. tabbed file with id \t seq
Output: inputed sims file with sequence for each query appened to each line
NOTE: inputed sim and tab file must be sorted by query / id
m8: query, subject, identity, length, mismatch, gaps, q_start, q_end, s_start, s_end, evalue, bit_score
--in_sim file name Required. Name of input sim file. Sorted by query id.
--seq_file file name Required. Name of tabbed id/seq file. Sorted by id.
--out_sim file name Required. Name of appended output sim file.
--verbose Optional. Verbose output.
);
if ( (@ARGV > 0) && ($ARGV[0] =~ /-h/) ) {
print STDERR $usage; exit 1;
}
if ( ! GetOptions(
"verbose!" => \$verbose,
"in_sim=s" => \$in_sim,
"seq_file=s" => \$seq_file,
"out_sim=s" => \$out_sim
) ) {
print STDERR $usage; exit 1;
}
unless ($in_sim && $seq_file && $out_sim && (-s $in_sim) && (-s $seq_file)) {
print STDERR $usage . "Missing file input.\n"; exit 1;
}
print STDOUT "Reading file $in_sim ... " if ($verbose);
open(SIM, "<$in_sim") || die "Can't open file $in_sim!\n";
open(SEQ, "<$seq_file") || die "Can't open file $seq_file!\n";
open(OUT, ">$out_sim") || die "Can't open file $out_sim!\n";
my $s_num = 0;
my $q_num = 0;
my $prev = "";
my $seql = <SEQ>;
chomp $seql;
my ($id, $seq) = split(/\t/, $seql);
while (my $siml = <SIM>) {
chomp $siml;
if ($siml =~ /^(.+?)\t(.+)$/) {
$s_num += 1;
my $query = $1;
my $rest = $2;
unless ($query) { next; }
if ($prev ne $query) {
while ($id ne $query) {
$seql = <SEQ>;
unless (defined($seql)) { last; }
chomp $seql;
($id, $seq) = split(/\t/, $seql);
}
$q_num += 1;
print OUT "$query\t$rest\t$seq\n";
} else {
print OUT "$query\t$rest\t$seq\n";
}
$prev = $query;
}
}
close OUT;
close SEQ;
close SIM;
print STDOUT "Done: $q_num sequences added to $s_num sims\n" if ($verbose);
exit 0;