Skip to content

Latest commit

 

History

History

scripts

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 
 
 
 
 
 
 
##
#FASTA
gcc convert_fasta.c -o convert_fasta

#removes fasta header
./convert_fasta ../dataset/proteins-100.fasta 1

#and removes symbol 'N'
./convert_fasta ../dataset/proteins-100.fasta 1 1

#prints 25 strings
./convert_fasta ../dataset/proteins-100.fasta 1 1 25


##
#FASTQ

#converts fastq to fasta
sed -n '1~4s/^@/>/p;2~4p' ../dataset/reads-100.fastq


####

#remove header 	      1
#and symbol 'N'       1 1
#and cat fasta lines  1 1 1
#outputs 32 strings   1 1 1 32

#egsa
#removes 'N' and do not 'CAT' the lines
nohup ./convert_fasta /home/gpt/sac-corpus/DNA.all.fasta 0 1 > /home/gpt/sac-corpus/DNA-cleaned.all.fasta &

##esais
#removes 'N' and Headers
nohup ./convert_fasta /home/gpt/sac-corpus/DNA.all.fasta 1 1 1 > /home/gpt/sac-corpus/DNA-cleaned-esais.all.fasta &

#removes Headers
nohup ./convert_fasta /home/gpt/sac-corpus/uniprot_trembl.fasta 1 0 1 > /home/gpt/sac-corpus/uniprot_trembl-esais.fasta &