当前位置:网站首页>geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat
geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat
2022-06-30 15:48:00 【youngleeyoung】
getwd()
path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" #空间转录组
dir.create(path)
setwd(path)
getwd()
list.files()
raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
)
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbol
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
counts=counts[,-1]
library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")
ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428
ids=na.omit(ids)
dim(ids) # [1] 15504
length(unique(ids$SYMBOL)) # [1] 15494
# 这里的关系超级乱,互相之间都不是一对一
# 凡是混乱的ID一律删除即可
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce
#rownames(hp_sce) = ids$SYMBOL
# RenameGenesSeurat -----------------------------------------------
#创建函数 改名字
RenameGenesSeurat <- function(obj ,
newnames ) {
# Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration.
# It only changes obj@assays$RNA@counts, @data and @scale.data.
print("Run this before integration. It only changes [email protected][email protected], @data and @scale.data.")
RNA <- obj@assays$RNA
if (nrow(RNA) == length(newnames)) {
if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]] <- newnames
} else {
"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
obj@assays$RNA <- RNA
return(obj)
}
hp_sce=RenameGenesSeurat(obj = hp_sce,
newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')
hp_sce
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")
getwd()
plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_sce
hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1
sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
) ## 8186,7947;
table(sce$stim)
library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))
All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)
pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()
library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()
##########################run harmony
All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony')
pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()
#############cluster
#library(harmony)
All <- All %>%
RunUMAP(reduction = "harmony", dims = 1:30) %>%
RunTSNE(reduction = "harmony", dims = 1:30) %>%
FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()
options(repr.plot.height = 4, repr.plot.width = 10)
pdf("3_after_harmony_umap_two_group.pdf")
DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_UMAP.pdf")
DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
dev.off()
pdf("3_umap_samples_split.pdf")
DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
dev.off()
pdf("3_after_harmony_tsne_two_group.pdf")
DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_tSNE.pdf")
DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
dev.off()
pdf("3_tSNE_samples_split.pdf")
DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
dev.off()
getwd()
#save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")
边栏推荐
- RTP 发送PS流零拷贝方案
- Symantec electronic sprint technology innovation board: Tan Jian, the actual controller, is an American who plans to raise 620million yuan
- [Demo] 循环写文件
- [activity registration] it's your turn to explore the yuan universe! I will be waiting for you in Shenzhen on July 2!
- Cmakelists Basics
- 编译丨迅为iTOP4412开发板Makefile编译
- Simpleitk encountered an ITK only supports orthonormal direction cosines error while reading NII
- RT thread heap size setting
- [Verilog basics] octal and hexadecimal representation of decimal negative numbers
- 【Verilog基础】关于Clock信号的一些概念总结(clock setup/hold、clock tree、clock skew、clock latency、clock transition..)
猜你喜欢
Deep learning - (2) several common loss functions
Niuke.com: minimum cost of climbing stairs
On July 2, I invited you to TD Hero online conference
Exception class_ Log frame
Implementation of aut, a self-developed transport layer protocol for sound network -- dev for dev column
[BJDCTF2020]The mystery of ip|[CISCN2019 华东南赛区]Web11|SSTI注入
idea必用插件
【微信小程序】常用组件基本使用(view/scroll-view/swiper、text/rich-text、button/image)
Niuke network: longest continuous subarray with positive product
Observation cloud reached in-depth cooperation with tdengine to optimize enterprise cloud experience
随机推荐
Installing jupyter notebook under Anaconda
RTP 发送PS流零拷贝方案
猎头5万挖我去VC
赛芯电子冲刺科创板:拟募资6.2亿 实控人谭健为美国籍
Jspreadsheet/ce JExcel: more data fields than the given fields (columns) will lead to blank columns. Solution
simpleITK读取nii遇到ITK only supports orthonormal direction cosines的错误
Raft介绍
聊聊远程办公那些事儿 | 社区征文
Etcd tutorial - Chapter 8 compact, watch, and lease APIs for etcd
【机器学习】K-means聚类分析
Tencent two sides: @bean and @component are used on the same class. What happens?
2022 Blue Bridge Cup group B -2022- (01 backpack to calculate the number of schemes)
【牛客网刷题系列 之 Verilog快速入门】~ 位拆分与运算
[machine learning] K-means clustering analysis
容联云首发基于统信UOS的Rphone,打造国产化联络中心新生态
In order to make remote work unaffected, I wrote an internal chat room | community essay
Observation cloud reached in-depth cooperation with tdengine to optimize enterprise cloud experience
互联网研发效能实践之去哪儿网(Qunar)核心领域DevOps落地实践
Yunhe enmo won the bid for Oracle maintenance project of Tianjin Binhai rural commercial bank in 2022-2023
restartProcessIfVisible的流程