Skip to content

Commit

Permalink
edit
Browse files Browse the repository at this point in the history
  • Loading branch information
EC2 Default User committed Oct 16, 2019
1 parent e1d8d74 commit 3b00998
Showing 3 changed files with 87 additions and 0 deletions.
1 change: 1 addition & 0 deletions HCA/cells.csv.json.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ def parse(configfile):
J["label"] = "cell metadata"
J["dataSubtype"] = "phenotype"
J["url"] = url
J[":clinFeature"]="/HCA/clinicalFeature"

outfile = dir + '/cells.tsv.json'
fout = open(outfile,'w')
35 changes: 35 additions & 0 deletions HCA/cluster.tsv.json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import sys, json

def parse(configfile):
fin = open(configfile, 'r')
dic ={}
for line in fin.readlines():
key, value = line[:-1].split('\t')
dic[key] = value
fin.close()
return dic

if len(sys.argv[:])!= 2:
print ("python cluster.tsv.json.py dataDir")
sys.exit()

dir = sys.argv[1]
configfile = dir + '/config'
metaDic = parse(configfile)

version = metaDic["version"]
cohort = metaDic["cohort"]

J ={}
J["type"] = "clinicalMatrix"
J["version"] = version
J["cohort"] = cohort
J["label"] = "louvain clusters"
J["dataSubtype"] = "phenotype"
J[":clinicalFeature"]="/HCA/clinicalFeature"
J["wrangling_procedure"]="Michael Krauss scanpy clustering run"

outfile = dir + '/cluster.tsv.json'
fout = open(outfile,'w')
json.dump(J, fout, indent =4)
fout.close()
51 changes: 51 additions & 0 deletions HCA/cluster_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys,os

def info (small_old_file):
fin = open(small_old_file,'r')
mapDic ={}
for line in fin.readlines():
data = line[:-1].split('\t')
xenaSampleID = data[0]
mapDic [xenaSampleID] = data[1:]
fin.close()
return mapDic

def blah (big_mapping_file, smallfile, outputFile, mapDic):
fin = open(big_mapping_file,'r')
fout = open(outputFile,'w')

#header use small file
fsmall = open(smallfile, 'r')
line = fsmall.readline()
N = len(line.split('\t')) - 1
fout.write(line)
fsmall.close()

#data
while 1:
line = fin.readline()
if line =="":
break
data = line[:-1].split()
old_id = data[1]
new_id = data[0]
if old_id not in mapDic:
fout.write(new_id + '\t' + '\t'* N +'\n')
else:
fout.write(new_id + '\t' + '\t'.join(mapDic[old_id]) + '\n')

fin.close()
fout.close()

if len(sys.argv[:]) != 4:
print ("python cluster_script.py cluster_results_small_old_id cluster_results_big_new_id big_mapping(new old)")
sys.exit()

smallfile = sys.argv[1] # small (old)
output = sys.argv[2] # big (new)
big_mapping_file = sys.argv[3] # big (New old)

mapDic = info(smallfile)
blah (big_mapping_file, smallfile, output, mapDic)


0 comments on commit 3b00998

Please sign in to comment.