当前位置:网站首页>Geo read single cell CSV expression matrix single cell column name change Seurat
Geo read single cell CSV expression matrix single cell column name change Seurat
2022-06-30 17:03:00 【youngleeyoung】
getwd()
path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" # Space transcriptome
dir.create(path)
setwd(path)
getwd()
list.files()
raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
)
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbol
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
counts=counts[,-1]
library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")
ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428
ids=na.omit(ids)
dim(ids) # [1] 15504
length(unique(ids$SYMBOL)) # [1] 15494
# The relationship here is super chaotic , Neither of them is one-on-one
# Whatever is chaotic ID Just delete them all
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce
#rownames(hp_sce) = ids$SYMBOL
# RenameGenesSeurat -----------------------------------------------
# Create a function Change the name
RenameGenesSeurat <- function(obj ,
newnames ) {
# Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration.
# It only changes obj@assays$RNA@counts, @data and @scale.data.
print("Run this before integration. It only changes [email protected][email protected], @data and @scale.data.")
RNA <- obj@assays$RNA
if (nrow(RNA) == length(newnames)) {
if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]] <- newnames
} else {
"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
obj@assays$RNA <- RNA
return(obj)
}
hp_sce=RenameGenesSeurat(obj = hp_sce,
newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')
hp_sce
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")
getwd()
plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_sce
hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1
sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
) ## 8186,7947;
table(sce$stim)
library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))
All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)
pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()
library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()
##########################run harmony
All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony')
pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()
#############cluster
#library(harmony)
All <- All %>%
RunUMAP(reduction = "harmony", dims = 1:30) %>%
RunTSNE(reduction = "harmony", dims = 1:30) %>%
FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()
options(repr.plot.height = 4, repr.plot.width = 10)
pdf("3_after_harmony_umap_two_group.pdf")
DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_UMAP.pdf")
DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
dev.off()
pdf("3_umap_samples_split.pdf")
DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
dev.off()
pdf("3_after_harmony_tsne_two_group.pdf")
DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_tSNE.pdf")
DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
dev.off()
pdf("3_tSNE_samples_split.pdf")
DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
dev.off()
getwd()
#save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")
边栏推荐
- 聊聊遠程辦公那些事兒 | 社區征文
- Half year inventory of new consumption in 2022: the industry is cold, but these nine tracks still attract gold
- 商单视频播放超2000万!农院改造为何屡被催更?
- register_ Chrdev and CDEV_ init cdev_ Add usage differences
- addmodule_allmerge_ams_im
- Etcd教程 — 第九章 Etcd之实现分布式锁
- pref使用记录
- jspreadsheet/CE JExcel数据字段比给的字段(columns)多会导致空白列的问题解决方案
- 香港回归25周年 香港故宫博物馆正式开放成文化新地标
- Tencent two sides: @bean and @component are used on the same class. What happens?
猜你喜欢

redis数据结构分析

OpenCV中LineTypes各枚举值(LINE_4 、LINE_8 、LINE_AA )的含义

名单揭晓 | 2021年度中国杰出知识产权服务团队

八大基本排序(详解)

“推广+搞笑剧情”,如何碰撞出爆款的火花?

香港回归25周年 香港故宫博物馆正式开放成文化新地标

Niuke: how many different binary search trees are there

更多龙蜥自研特性!生产可用的 Anolis OS 8.6 正式发布

The meaning of linetypes enumeration values (line_4, line_8, line_aa) in opencv

安全帽佩戴检测算法研究
随机推荐
nichenet实战silicosis
Dart: string replace related methods
addmodule_allmerge_ams_im
restartProcessIfVisible的流程
[wechat applet] basic use of common components (view/scroll-view/wiper, text/rich-text, button/image)
RTP sending PS stream zero copy scheme
POJ Project Summer
RT thread heap size Setting
Data mining knowledge points sorting (final review version)
Rongsheng biology rushes to the scientific innovation board: it plans to raise 1.25 billion yuan, with an annual revenue of 260million yuan
IndexSearch
为了使远程工作不受影响,我写了一个内部的聊天室 | 社区征文
Data security compliance has brought new problems to the risk control team
redis淘汰策略
Symantec electronic sprint technology innovation board: Tan Jian, the actual controller, is an American who plans to raise 620million yuan
Tencent two sides: @bean and @component are used on the same class. What happens?
Substrate 跨链技术源码级探索: XCVM的概览
列表变成向量 列表变向量 list vector
register_ Chrdev and CDEV_ init cdev_ Add usage differences
Etcd教程 — 第八章 Etcd之Compact、Watch和Lease API