Skip to content

Commit

Permalink
Adding parser, mutation origin, muation type, and group identificatio…
Browse files Browse the repository at this point in the history
…n scripts for SNV files.
  • Loading branch information
asw0049 committed Oct 2, 2022
1 parent 1e2d11e commit b8a5127
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 0 deletions.
17 changes: 17 additions & 0 deletions SNP_positionFreqGroup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
###This script reports frequenct of mutations at certain positions by group.
##Dependencies: groupType3.awk, SNV_tstv_rosID.awk
tissue=muscle


cat ${tissue}MTsnps.txt | awk -F";" '{print $8}'| sed 's/_/;/g'| awk -F";" '{print ";"$1}'> ${tissue}_SampleTags.txt
paste -d'\0' ${tissue}MTsnps.txt ${tissue}_SampleTags.txt> ${tissue}MTsnps2.txt
cat ${tissue}MTsnps2.txt | awk -F";" -f groupType3.awk > ${tissue}MTsnps_labelled.txt
grep -w "Middle" ${tissue}MTsnps_labelled.txt | awk -F";" '{print $2";"$3";"$7";"$11}' | sort -n| uniq -c| sort -r | awk -F" " '{print $1";"$2}'| awk -F";" -f SNV_tstv_rosID.awk >${tissue}.MTsnpsPos.middle.txt
grep -w "Aged" ${tissue}MTsnps_labelled.txt | awk -F";" '{print $2";"$3";"$7";"$11}' | sort -n| uniq -c| sort -r | awk -F" " '{print $1";"$2}'| awk -F";" -f SNV_tstv_rosID.awk >${tissue}.MTsnpsPos.aged.txt
grep -w "NR" ${tissue}MTsnps_labelled.txt | awk -F";" '{print $2";"$3";"$7";"$11}' | sort -n| uniq -c| sort -r | awk -F" " '{print $1";"$2}'| awk -F";" -f SNV_tstv_rosID.awk > ${tissue}.MTsnpsPos.nr.txt
grep -w "R" ${tissue}MTsnps_labelled.txt | awk -F";" '{print $2";"$3";"$7";"$11}' | sort -n| uniq -c| sort -r | awk -F" " '{print $1";"$2}'| awk -F";" -f SNV_tstv_rosID.awk > ${tissue}.MTsnpsPos.r.txt

exit


10 changes: 10 additions & 0 deletions SNV_tstv_rosID.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
if (($3 == "A-T") || ($3 == "A-C") || ($3 == "T-A") || ($3 == "T-G") || ($3 == "G-C") || ($3 == "C-G")) SNVType="Other Transversion";
else if (($3 == "A-G") || ($3 == "G-A") || ($3 == "T-C") || ($3 == "C-T")) SNVType="Replication Error Transition";
else if (($3 == "G-T") || ($3 == "C-A")) SNVType="ROS-Mediated Transversion";
else if ($3 ~ /[[:alpha:]]-[[:alpha:]]{2}+/) SNVType="Replication Error Insertion";
else if ($3 ~ /[[:alpha:]]{2}+-[[:alpha:]]/) SNVType="Replication Error Deletion";
print ($0)";"SNVType
if (($3 != "A-T")||($3 != "A-C")||($3 != "A-G")||($3 != "G-C")||($3 != "G-A")||($3 != "G-T")||($3 != "C-A")||($3 != "C-T")||($3 !="C-G")||($3 != /[[:alpha:]]-[[:alpha:]]{2}+/)||($3 != /[[:alpha:]]{2}+-[[:alpha:]]/)) SNVType="Other";
}

37 changes: 37 additions & 0 deletions geneCountParser.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
##Script Variables
input_file=muscle.sample.list.txt
snp_file=muscleMTsnps.txt
gene_file=muscle.gene.list.txt
tissue=muscle
#####Setup Environment
#Dependency Scripts
##groupType2.awk
###Commands

#cat $snp_file | awk -F";" '{print $8}'|sort|uniq > ${tissue}.sample.list.txt

cat $snp_file | awk -F";" '{print $7}'|sort|uniq > ${tissue}.gene.list.txt

if test -f "$input_file"; then
while read F ; do
echo $F
declare -a sample_list=($F)
for sample in ${sample_list[@]}; do
cat $snp_file | grep ${sample}> ${sample}_${tissue}Muts.txt
while read A ; do
echo $A
declare -a gene_list=($A)
for gene in ${gene_list[@]}; do
cat ${sample}_${tissue}Muts.txt| grep $gene | awk -F";" -v g=$gene '{if ($7 == g ) { count += 1; }} END { print $7";"$8";" count}' > ${sample}_${gene}.mut.txt
done
done <$gene_file
done
done <$input_file
cat *.mut.txt | grep -v ";;" | sed 's/_/;/g'| awk -F";" -f groupType2.awk >${tissue}GeneMutCount.txt
rm *.mut.txt
else
echo "$input_file not found. Aborting!!!!"
fi
exit

7 changes: 7 additions & 0 deletions groupType.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
if (($1 >= 1) && ($1 <= 16) || ($1 >= 61) && ($1 <= 76) || ($1 >= 121) && ($1 <= 136)) groupType="Middle";
if (($1 >= 17) && ($1 <= 25) || ($1 >= 77) && ($1 <= 85) || ($1 >= 137) && ($1 <= 145)) groupType="Aged";
if (($1 >= 26) && ($1 <= 47) || ($1 >= 86) && ($1 <= 107) || ($1 >= 146) && ($1 <= 167)) groupType="NR";
if (($1 >= 48) && ($1 <= 60) || ($1 >= 108) && ($1 <= 120) || ($1 >= 168) && ($1 <= 180)) groupType="R";
print $1";"$4";"$5";"groupType;
}
7 changes: 7 additions & 0 deletions groupType2.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
if (($2 >= 1) && ($2 <= 16) || ($2 >= 61) && ($2 <= 76) || ($2 >= 121) && ($2 <= 136)) groupType="Middle";
if (($2 >= 17) && ($2 <= 25) || ($2 >= 77) && ($2 <= 85) || ($2 >= 137) && ($2 <= 145)) groupType="Aged";
if (($2 >= 26) && ($2 <= 47) || ($2 >= 86) && ($2 <= 107) || ($2 >= 146) && ($2 <= 167)) groupType="NR";
if (($2 >= 48) && ($2 <= 60) || ($2 >= 108) && ($2 <= 120) || ($2 >= 168) && ($2 <= 180)) groupType="R";
print ($0)";"groupType;
}
8 changes: 8 additions & 0 deletions groupType3.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
if (($10 >= 1) && ($10 <= 16) || ($10 >= 61) && ($10 <= 76) || ($10 >= 121) && ($10 <= 136)) groupType="Middle";
if (($10 >= 17) && ($10 <= 25) || ($10 >= 77) && ($10 <= 85) || ($10 >= 137) && ($10 <= 145)) groupType="Aged";
if (($10 >= 26) && ($10 <= 47) || ($10 >= 86) && ($10 <= 107) || ($10 >= 146) && ($10 <= 167)) groupType="NR";
if (($10 >= 48) && ($10 <= 60) || ($10 >= 108) && ($10 <= 120) || ($10 >= 168) && ($10 <= 180)) groupType="R";
print ($0)";"groupType;
}

28 changes: 28 additions & 0 deletions mutParser.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
##Script Variables
input_file=muscle.sample.list.txt
snp_file=muscleMTsnps.txt
tissue=muscle
##Setup Environment

###Commands

cat $snp_file | awk -F";" '{print $8}'|sort|uniq > ${tissue}.sample.list.txt

if test -f "$input_file"; then
while read F ; do
echo $F
declare -a sample_list=($F)
for sample in ${sample_list[@]}; do
cat $snp_file | grep ${sample} | grep "Transition" | awk -F";" '{if ($9 == "Transition" ) { TScount += 1; }} END { print $8";"$9";" TScount}' >> ${tissue}_mut_Count_individual.txt
cat $snp_file | grep ${sample} | grep "Transversion" | awk -F";" '{if ($9 == "Transversion" ) { TVcount += 1; }} END { print $8";"$9";" TVcount}' >> ${tissue}_mut_Count_individual.txt
cat $snp_file | grep ${sample} | grep "Insertion" | awk -F";" '{if ($9 == "Insertion" ) { Incount += 1; }} END { print $8";"$9";" Incount}' >> ${tissue}_mut_Count_individual.txt
cat $snp_file | grep ${sample} | grep "Deletion" | awk -F";" '{if ($9 == "Deletion" ) { Delcount += 1; }} END { print $8";"$9";" Delcount}' >> ${tissue}_mut_Count_individual.txt
done
done <$input_file
cat ${tissue}_mut_Count_individual.txt | grep -v ';;' | sed 's/_/;/g' | awk -F";" -f groupType.awk > ${tissue}_mut_Count_individual2.txt
else
echo "$input_file not found. Aborting!!!!"
fi
exit

0 comments on commit b8a5127

Please sign in to comment.