edit

ucscXena · Oct 16, 2019 · 3b00998 · 3b00998
1 parent e1d8d74
commit 3b00998
Showing 3 changed files with 87 additions and 0 deletions.
diff --git a/HCA/cells.csv.json.py b/HCA/cells.csv.json.py
@@ -28,6 +28,7 @@ def parse(configfile):
 J["label"] = "cell metadata"
 J["dataSubtype"] = "phenotype"
 J["url"] = url
+J[":clinFeature"]="/HCA/clinicalFeature"
 
 outfile = dir + '/cells.tsv.json'
 fout = open(outfile,'w')

diff --git a/HCA/cluster.tsv.json.py b/HCA/cluster.tsv.json.py
@@ -0,0 +1,35 @@
+import sys, json
+
+def parse(configfile):
+	fin = open(configfile, 'r')
+	dic ={}
+	for line in fin.readlines():
+		key, value = line[:-1].split('\t')
+		dic[key] = value
+	fin.close()
+	return dic
+
+if len(sys.argv[:])!= 2:
+    print ("python cluster.tsv.json.py dataDir")
+    sys.exit()
+
+dir = sys.argv[1]
+configfile = dir + '/config'
+metaDic = parse(configfile)
+
+version = metaDic["version"]
+cohort = metaDic["cohort"]
+
+J ={}
+J["type"] = "clinicalMatrix"
+J["version"] = version
+J["cohort"] = cohort
+J["label"] = "louvain clusters"
+J["dataSubtype"] = "phenotype"
+J[":clinicalFeature"]="/HCA/clinicalFeature"
+J["wrangling_procedure"]="Michael Krauss scanpy clustering run"
+
+outfile = dir + '/cluster.tsv.json'
+fout = open(outfile,'w')
+json.dump(J, fout, indent =4)
+fout.close()
diff --git a/HCA/cluster_script.py b/HCA/cluster_script.py
@@ -0,0 +1,51 @@
+import sys,os
+
+def info (small_old_file):
+	fin = open(small_old_file,'r')
+	mapDic ={}
+	for line in fin.readlines():
+		data = line[:-1].split('\t')
+		xenaSampleID = data[0]
+		mapDic [xenaSampleID] = data[1:]
+	fin.close()
+	return mapDic
+
+def blah (big_mapping_file, smallfile, outputFile, mapDic):
+	fin = open(big_mapping_file,'r')
+	fout = open(outputFile,'w')
+
+    #header use small file
+	fsmall = open(smallfile, 'r')
+	line = fsmall.readline()
+	N = len(line.split('\t')) - 1
+	fout.write(line)
+	fsmall.close()
+
+    #data
+	while 1:
+		line = fin.readline()
+		if line =="":
+			break
+		data = line[:-1].split()
+		old_id = data[1]
+		new_id = data[0]                
+		if old_id not in mapDic:
+			fout.write(new_id + '\t' + '\t'* N +'\n')
+		else:
+			fout.write(new_id + '\t' + '\t'.join(mapDic[old_id]) + '\n')
+
+	fin.close()
+	fout.close()
+
+if len(sys.argv[:]) != 4:
+	print ("python cluster_script.py cluster_results_small_old_id cluster_results_big_new_id big_mapping(new old)")
+	sys.exit()
+
+smallfile = sys.argv[1] # small (old)
+output = sys.argv[2] # big (new)
+big_mapping_file = sys.argv[3] # big (New old)
+
+mapDic = info(smallfile)
+blah (big_mapping_file, smallfile, output, mapDic)
+
+