FR: get mitochondrial genes from genomepy for SCTK #847
Open
Description
import genomepy
def mito_genes(annotation_path):
ann = genomepy.annotation(annotation_path)
gtf = ann.named_gtf
# very quick, very dirty way to find the name for the mitochondrion.
# could also try to read this from the assembly report, but that's also not perfect.
mt = gtf[gtf["seqname"].str.contains("chrM", case=False, regex=False)]["seqname"].unique()
if len(mt) != 1:
mt = gtf[gtf["seqname"].str.contains("MT", case=False, regex=False)]["seqname"].unique()
if len(mt) != 1:
mt = gtf[gtf["seqname"].str.contains("mito", case=False, regex=False)]["seqname"].unique()
if len(mt) != 1:
mt = gtf[gtf["seqname"].str.contains("m", case=False, regex=False)]["seqname"].unique()
if len(mt) != 1:
print("we tried...")
return {}
genes = set(gtf[gtf["seqname"] == mt[0]].index)
return genes