当前位置:网站首页>Topic 1 Single_Cell_analysis(4)
Topic 1 Single_Cell_analysis(4)
2022-06-12 07:38:00 【柚子味的羊】
Topic 1 Single_Cell_analysis(4)
文章目录
Part 1 Scissor() debug——binomial 数据处理
1.1 加载package
#加载package
library(Seurat)
library(Matrix)
library(Scissor)
library(preprocessCore)

1.2 加载单细胞数据
#单细胞数据下载
location <- "https://xialab.s3-us-west-2.amazonaws.com/Duanchen/Scissor_data/"
load(url(paste0(location, 'scRNA-seq.RData')))
dim(sc_dataset)

1.3 将单细胞数据转化为Seurat类型
#将数据转换为Seurat类型,并建立细胞与细胞之间的similarity网络,还会得到多种方法降维后的簇
sc_dataset <- Seurat_preprocessing(sc_dataset, verbose = F)
class(sc_dataset)

1.4 加载bulk数据
#bulk数据下载,TCGA中survival数据作为phenotype
load(url(paste0(location, 'TCGA_LUAD_exp2.RData')))
load(url(paste0(location, 'TCGA_LUAD_TP53_mutation.RData')))
1.5 parameter 赋值
family = "binomial"
phenotype <- TP53_mutation
tag <- c("wild-type", "TP53 mutant")
1.6 检查bulk-cell和single-cell的intersect
#取bulk和single的gene交集
common <- intersect(rownames(bulk_dataset), rownames(sc_dataset))
length(rownames(bulk_dataset))
length(rownames(sc_dataset))
length(common)

1.7 得到processed data 和 细胞间的network
sc_exprs<-as.matrix(sc_dataset@assays$RNA@data)
network<-as.matrix(sc_dataset@graphs$RNA_snn)

1.8 将network的对角元素置0,将所有非零元素置1
diag(network)<-0
network[which(network!=0)]<-1
1.9 将bulk-cell和single-cell拼接(bulk-cell:single-cell)
(bulk 共有498个sample,single共有4102个cell)
dataset0<-cbind(bulk_dataset[common,],sc_exprs[common,])#将bulk-dataset和sc-dataset中的common拼接(bulk-cell and sample)

1.10 对表达矩阵进行矫正
dataset1 <- normalize.quantiles(dataset0)

rownames(dataset1)<-rownames(dataset0)#行赋值
colnames(dataset1)<-colnames(dataset0)#列赋值
1.11 得到bulk和single的表达矩阵
Expression_bulk <- dataset1[,1:ncol(bulk_dataset)]#bulk-cell的基因表达
Expression_cell <- dataset1[,(ncol(bulk_dataset) + 1):ncol(dataset1)]#single-cell的基因表达

Part 2 Scissor() debug——binomial 后续
2.1 合并bulk基因表达和single基因表达
X<-cor(Expression_bulk,Expression_cell)#bulk基因表达矩阵和single基因表达矩阵的相关性

2.2 质控(卡分位数)
#质控
quality_check <- quantile(X)#分位数
print("|**************************************************|")
print("Performing quality-check for the correlations")
print("The five-number summary of correlations:")
print(quality_check)
print("|**************************************************|")

2.3 50% correlation
#50%分位数小于0.01
if (quality_check[3] < 0.01){
warning("The median correlation between the single-cell and bulk samples is relatively low.")
}
2.4 判断是否有两个label(使用logistic regression)
if (family == "binomial"){
Y <- as.numeric(phenotype)
z <- table(Y)
if (length(z) != length(tag)){
stop("The length differs between tags and phenotypes. Please check Scissor inputs and selected regression type.")
}else{
print(sprintf("Current phenotype contains %d %s and %d %s samples.", z[1], tag[1], z[2], tag[2]))
print("Perform logistic regression on the given phenotypes:")
}
}

2.5 存储RData
#保存当前数据(其中X表示bulk-cell和single-cell表达矩阵的相关性,y表示phenotype的table,其中1表示tp53_mutant,0表示wild-type)
save(X, Y, network, Expression_bulk, Expression_cell, file = 'LUAD_TP53_mutant.RData')
2.6 惩罚参数最小化——APML1 function()
#参数和惩罚系数最小化 加惩罚函数降低过拟合现象
APML1=function(x, y, family=c("gaussian", "binomial", "cox"), penalty=c("Lasso","Enet", "Net"), Omega=NULL, alpha=1.0, lambda=NULL, nlambda=50, rlambda=NULL, wbeta=rep(1,ncol(x)), sgn=rep(1,ncol(x)), nfolds=1, foldid=NULL, inzero=TRUE, isd=FALSE, iysd=FALSE, keep.beta=FALSE, ifast=TRUE, thresh=1e-7, maxit=1e+5) {
#fcall=match.call()
family=match.arg(family)
penalty=match.arg(penalty)
if (penalty=="Net" & is.null(Omega)) {
penalty="Enet"
cat("Enet was performed as no input of Omega")
}
if (penalty %in% c("Enet","Net") & alpha==1.0) {
penalty="Lasso"
cat("Lasso was performed as alpha=1.0")
}
if (alpha!=1.0) {
if (is.null(Omega)) {
penalty="Enet"
} else if (!is.null(Omega)) {
penalty="Net"
}
} else {
penalty="Lasso"
}
wbeta=abs(wbeta)
fit=switch(family,
"gaussian"=LmL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,iysd,keep.beta,thresh,maxit),
"binomial"=LogL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,keep.beta,thresh,maxit,threshP=1e-5),
"cox"=CoxL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,keep.beta,ifast,thresh,maxit))
fit$family=family
#fit$call=fcall
class(fit)="APML1"
return(fit)
}
2.7 选择有信息的cell(informative cells)
for (i in 1:length(alpha)){
set.seed(123)
#惩罚参数最小化
fit0 <- APML1(X, Y, family = family, penalty = "Net", alpha = alpha[i], Omega = network, nlambda = 100, nfolds = min(10,nrow(X)))
fit1 <- APML1(X, Y, family = family, penalty = "Net", alpha = alpha[i], Omega = network, lambda = fit0$lambda.min)
if (family == "binomial"){
#当family==binomail时,fit1得到的beta共有4103个
Coefs <- as.numeric(fit1$Beta[2:(ncol(X)+1)])
}else{
Coefs <- as.numeric(fit1$Beta)
}
Cell1 <- colnames(X)[which(Coefs > 0)]#系数大于0的细胞,作为scissor+细胞
Cell2 <- colnames(X)[which(Coefs < 0)]#系数小于0的细胞,作为scissor-细胞
percentage <- (length(Cell1) + length(Cell2)) / ncol(X)#选择的细胞不能太少,cutoff是至少的限制
print(sprintf("alpha = %s", alpha[i]))
print(sprintf("Scissor identified %d Scissor+ cells and %d Scissor- cells.", length(Cell1), length(Cell2)))
print(sprintf("The percentage of selected cell is: %s%%", formatC(percentage*100, format = 'f', digits = 3)))
if (percentage < cutoff){
break
}
cat("\n")
}

#Scissor的最后返回值alpha,lambda,family,coef,pos_cell和neg_cell
list(alpha = alpha[i], lambda = fit0$lambda.min, family = family)
Coefs = Coefs
Scissor_pos = Cell1
Scissor_neg = Cell2
Coefs
Scissor_pos
Scissor_neg



Question?
惩罚系数的计算方法——APML1()函数详细解析
边栏推荐
- Personalized federated learning with exact stochastic gradient descent
- Study on display principle of seven segment digital tube
- 面试计算机网络-传输层
- Golang 快速生成数据库表的 model 和 queryset
- LeetCode笔记:Weekly Contest 296
- BI技巧丨当月期初
- Bi skills - beginning of the month
- R语言rnorm函数生成正太分布数据、使用epiDisplay包的summ函数计算向量数据的描述性统计汇总信息并可视化有序点图(名称、有效值个数、均值、中位数、标准差、最大值、最小值)
- VS2019 MFC IP Address Control 控件继承CIPAddressCtrl类重绘
- Scoring prediction problem
猜你喜欢

SQL -- course experiment examination

Personalized federated learning with Moreau envelopes

Right click the general solution of file rotation jam, refresh, white screen, flash back and desktop crash

Exposure compensation, white increase and black decrease theory

Knife4j first use

Summary of semantic segmentation learning (II) -- UNET network

Primal problem and dual problem

20220526 yolov1-v5

Thyristor, it is a very important AC control device

GD32F4(5):GD32F450时钟配置为200M过程分析
随机推荐
Arrangement of statistical learning knowledge points gradient descent, least square method, Newton method
LeetCode笔记:Weekly Contest 296
Adaptive personalized federated learning paper interpretation + code analysis
xshell安装
Principle and application of PWM
[yolo-v5 learning notes]
FCPX插件:简约线条呼出文字标题介绍动画Call Outs With Photo Placeholders for FCPX
C language queue implementation
The first demand in my life - batch uploading of Excel data to the database
Keil installation of C language development tool for 51 single chip microcomputer
10 lessons from the recommended system
Continuous local training for better initialization of Federated models
What is a good recommendation system?
Right click the general solution of file rotation jam, refresh, white screen, flash back and desktop crash
modelarts二
Voice assistant -- vertical class perpetual motion machine -- automated iteration framework
初步认知Next.js中ISR/RSC/Edge Runtime/Streaming等新概念
LeetCode34. 在排序数组中查找元素的第一个和最后一个位置
Study on display principle of seven segment digital tube
鸿蒙os-第一次培训