当前位置:网站首页>geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat
geo 读取单细胞csv表达矩阵 单细胞 改列名 seurat
2022-06-30 15:48:00 【youngleeyoung】
getwd()
path="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin" #空间转录组
dir.create(path)
setwd(path)
getwd()
list.files()
raw_counts=read.csv("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
)
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbol
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
counts=counts[,-1]
library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")
ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428
ids=na.omit(ids)
dim(ids) # [1] 15504
length(unique(ids$SYMBOL)) # [1] 15494
# 这里的关系超级乱,互相之间都不是一对一
# 凡是混乱的ID一律删除即可
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce
#rownames(hp_sce) = ids$SYMBOL
# RenameGenesSeurat -----------------------------------------------
#创建函数 改名字
RenameGenesSeurat <- function(obj ,
newnames ) {
# Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration.
# It only changes obj@assays$RNA@counts, @data and @scale.data.
print("Run this before integration. It only changes [email protected][email protected], @data and @scale.data.")
RNA <- obj@assays$RNA
if (nrow(RNA) == length(newnames)) {
if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]] <- newnames
} else {
"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
obj@assays$RNA <- RNA
return(obj)
}
hp_sce=RenameGenesSeurat(obj = hp_sce,
newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')
hp_sce
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")
getwd()
plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_sce
hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1
sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
) ## 8186,7947;
table(sce$stim)
library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))
All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%
FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)
pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()
library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()
##########################run harmony
All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony')
pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()
#############cluster
#library(harmony)
All <- All %>%
RunUMAP(reduction = "harmony", dims = 1:30) %>%
RunTSNE(reduction = "harmony", dims = 1:30) %>%
FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()
options(repr.plot.height = 4, repr.plot.width = 10)
pdf("3_after_harmony_umap_two_group.pdf")
DimPlot(All, reduction = "umap", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_UMAP.pdf")
DimPlot(All, reduction = "umap", label = TRUE, pt.size = .1)
dev.off()
pdf("3_umap_samples_split.pdf")
DimPlot(All, reduction = "umap", pt.size = .1, split.by = "stim", label = T)
dev.off()
pdf("3_after_harmony_tsne_two_group.pdf")
DimPlot(All, reduction = "tsne", group.by = "stim", pt.size = .1)
dev.off()
pdf("3_after_harmony_cluster_tSNE.pdf")
DimPlot(All, reduction = "tsne", label = TRUE, pt.size = .1)
dev.off()
pdf("3_tSNE_samples_split.pdf")
DimPlot(All, reduction = "tsne", pt.size = .1, split.by = "stim", label = T)
dev.off()
getwd()
#save(All,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")
边栏推荐
- 附加:(还没写,别看~~~)CorsFilter过滤器;
- 备战数学建模33-灰色预测模型2
- Etcd教程 — 第八章 Etcd之Compact、Watch和Lease API
- Php7.3 centos7.9 installing sqlserver extensions
- 2022蓝桥杯国赛B组-费用报销-(线性dp|状态dp)
- RT thread heap size setting
- Headhunter 50, 000, I'll go to VC
- Cmakelists Basics
- Additional: (not written yet, don't look at ~ ~ ~) webmvcconfigurer interface;
- 9: Chapter 3: e-commerce engineering analysis: 4: [general module]; (to be written...)
猜你喜欢

Home office discussion on the experience of remote assistance to quickly improve efficiency | community essay solicitation

【活动报名】探秘元宇宙,就差你了!7月2号我在深圳现场等你!

IO stream_ recursion

Mathematical modeling for war preparation 33- grey prediction model 2

Raft介绍

Cesium-1.72 learning (add points, lines, cubes, etc.)

Rongsheng biology rushes to the scientific innovation board: it plans to raise 1.25 billion yuan, with an annual revenue of 260million yuan

Wechat emoticons are written into the judgment, and the OK and bomb you send may become "testimony in court"

微信表情符号写入判决书,你发的OK、炸弹都可能成为“呈堂证供”

Niuke: how many different binary search trees are there
随机推荐
Jspreadsheet/ce JExcel: more data fields than the given fields (columns) will lead to blank columns. Solution
Wechat emoticons are written into the judgment, and the OK and bomb you send may become "testimony in court"
Undistorted resize using pil
24:第三章:开发通行证服务:7:自定义异常(来表征程序中出现的错误);创建GraceExceptionHandler来全局统一处理异常(根据异常信息,构建对应的API统一返回对象的,JSON数据);
Mathematical modeling for war preparation 33- grey prediction model 2
华为帐号多端协同,打造美好互联生活
Halcon knowledge: matrix topic [02]
数据挖掘知识点整理(期末复习版)
IndexSearch
Parler du télétravail
《网络是怎么样连接的》读书笔记 - 汇总篇
Additional: (not written yet, don't look at ~ ~ ~) webmvcconfigurer interface;
RT-Thread 堆区大小设置
荣盛生物冲刺科创板:拟募资12.5亿 年营收2.6亿
register_chrdev和cdev_init cdev_add用法区别
微信表情符号写入判决书,你发的OK、炸弹都可能成为“呈堂证供”
STL教程7-set、pair对组和仿函数
Raft introduction
Installing jupyter notebook under Anaconda
【微信小程序】小程序的宿主环境