Entering edit mode
10 months ago
I am following this code for analysis in R of GSE113873 and only getting a symbol mapping to probe only 10 genes and enterzid and not any other why is that
The platform is GPL15207
# install additional bioconductor libraries, if not already installed
BiocManager::install("GEOquery")
BiocManager::install("affy")
BiocManager::install("gcrma")
#Load the necessary libraries
library(GEOquery)
library(affy)
library(gcrma)
if (!requireNamespace("hgu133plus2.db", quietly = TRUE)) {
install.packages("BiocManager")
BiocManager::install("hgu133plus2.db")
}
library(hgu133plus2.db)
#Download the CEL file package for this dataset (by GSE - Geo series id)
getGEOSuppFiles("GSE113873")
#Unpack the CEL files
setwd("/Desktop/geodata/")
untar("GSE113873_RAW.tar", exdir = "/Users/charmyshah/Desktop/geodata")
# Load the affy library
library(affy)
# Set the working directory to where the CEL files are
setwd("/Users/charmyshah/Desktop/geodata/GSE113873/")
# List the CEL files in the directory
cel_files <- list.files(pattern = "\\.CEL$")
# Read the CEL files into an AffyBatch object
raw.data <- ReadAffy(filenames = cel_files)
#perform RMA normalization
data.rma.norm=rma(raw.data)
#Get the important stuff out of the data - the expression estimates for each array
rma=exprs(data.rma.norm)
#Format values to 5 decimal places
rma=format(rma, digits=5)
# Look up a few probe IDs in the annotation database
select(hgu133plus2.db, keys = head(probes), columns = c("SYMBOL", "ENTREZID"))
#Extract probe ids, entrez symbols, and entrez ids
probes=row.names(rma)
Symbols = unlist(mget(probes, hgu133plus2SYMBOL, ifnotfound=NA))
Entrez_IDs = unlist(mget(probes, hgu133plus2ENTREZID, ifnotfound=NA))
#Combine gene annotations with raw data
rma=cbind(probes,Symbols,Entrez_IDs,rma)