Hello everyone,
I'm trying to use the data of the uterus for a differential analysis, by downloading data from TCGA and GTex, here is my script:
ucs.recount.gtex<-TCGAquery_recount2(project="GTEX", tissue="uterus") ucs.recount.tcga<-TCGAquery_recount2(project="TCGA", tissue="uterus")
SE.ucs.recount.gtex <- ucs.recount.gtex$GTEX_uterus SE.ucs.recount.tcga <- ucs.recount.tcga$TCGA_uterus
matrix <- assays(SE.ucs.recount.gtex)$counts
query.ucs<- GDCquery(project = "TCGA-UCS", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "STAR - Counts")
samplesDown.ucs <- getResults(query.ucs,cols=c("cases"))
tumor samples for uterine cancer
dataSmTP.ucs <- TCGAquery_SampleTypes(barcode = samplesDown.ucs, typesample = "TP")
to check that there are no NT samples
dataSmNT.ucs <- TCGAquery_SampleTypes(barcode = samplesDown.ucs, typesample = "NT")
eset.gtex<-assays(scale_counts(ucs.recount.gtex$GTEX_uterus, round = TRUE))$counts eset.tcga<-assays(scale_counts(ucs.recount.tcga$TCGA_uterus, round = TRUE))$counts
rse_scaled <- scale_counts(ucs.recount.gtex$GTEX_uterus, round = TRUE) summary(colSums(assays(rse_scaled)$counts)) / 1e6
colnames(eset.tcga)<-colData(ucs.recount.tcga$TCGA_uterus)$gdc_cases.samples.portions.analytes.aliquots.submitter_id
rownames(eset.gtex) <- gsub("..", "", rownames(eset.gtex)) rownames(eset.tcga) <- gsub("..", "", rownames(eset.tcga))
eset.tcga.cancer<-eset.tcga[,which(colData(ucs.recount.tcga$TCGA_uterus)$gdc_cases.samples.sample_type=="Primary Tumor")] ) rownames(eset.gtex) <- gsub("..", "", rownames(eset.gtex)) rownames(eset.tcga) <- gsub("..", "", rownames(eset.tcga))
eset.tcga.cancer<-eset.tcga[,which(colData(ucs.recount.tcga$TCGA_uterus)$gdc_cases.samples.sample_type=="Primary Tumor")] eset.tcga.normal<-eset.tcga[,which(colData(ucs.recount.tcga$TCGA_uterus)$gdc_cases.samples.sample_type=="Solid Tissue Normal")]
dataPrep.ucs<-merge(as.data.frame(eset.gtex), as.data.frame(eset.tcga.cancer), by=0, all=TRUE)
rownames(dataPrep.ucs)<-dataPrep.ucs$Row.names dataPrep.ucs$Row.names<-NULL
dataNorm.ucs <- TCGAanalyze_Normalization(tabDF = dataPrep.ucs, geneInfo = geneInfoHT, method = "gcContent")
I obtain the following error: Error in .rowNamesDF<-(x, value = value) : duplications in 'row.names' are not allowed Warning: non-unique values when setting 'row.names': ‘ENSG00000002586’, ‘ENSG00000124333’, ‘ENSG00000124334’, ‘ENSG00000167393’, ‘ENSG00000168939’, ‘ENSG00000169084’, ‘ENSG00000169093’, ‘ENSG00000169100’, ‘ENSG00000178605’, ‘ENSG00000182162’, ‘ENSG00000182378’, ‘ENSG00000182484’, ‘ENSG00000185203’, ‘ENSG00000185291’, ‘ENSG00000185960’, ‘ENSG00000196433’, ‘ENSG00000197976’, ‘ENSG00000198223’, ‘ENSG00000205755’, ‘ENSG00000214717’, ‘ENSG00000223274’, ‘ENSG00000223484’, ‘ENSG00000223511’, ‘ENSG00000223571’, ‘ENSG00000223773’, ‘ENSG00000225661’, ‘ENSG00000226179’, ‘ENSG00000227159’, ‘ENSG00000228410’, ‘ENSG00000228572’, ‘ENSG00000229232’, ‘ENSG00000230542’, ‘ENSG00000234622’, ‘ENSG00000234958’, ‘ENSG00000236017’, ‘ENSG00000236871’, ‘ENSG00000237040’, ‘ENSG00000237531’, ‘ENSG00000237801’, ‘ENSG00000265658’, ‘ENSG00000270726’, ‘ENSG000002 […truncated]
Can someone help me? Thanks for the reply.