当前位置:网站首页>Topic 1 Single_Cell_analysis(4)
Topic 1 Single_Cell_analysis(4)
2022-06-12 07:38:00 【柚子味的羊】
Topic 1 Single_Cell_analysis(4)
文章目录
Part 1 Scissor() debug——binomial 数据处理
1.1 加载package
#加载package
library(Seurat)
library(Matrix)
library(Scissor)
library(preprocessCore)

1.2 加载单细胞数据
#单细胞数据下载
location <- "https://xialab.s3-us-west-2.amazonaws.com/Duanchen/Scissor_data/"
load(url(paste0(location, 'scRNA-seq.RData')))
dim(sc_dataset)

1.3 将单细胞数据转化为Seurat类型
#将数据转换为Seurat类型,并建立细胞与细胞之间的similarity网络,还会得到多种方法降维后的簇
sc_dataset <- Seurat_preprocessing(sc_dataset, verbose = F)
class(sc_dataset)

1.4 加载bulk数据
#bulk数据下载,TCGA中survival数据作为phenotype
load(url(paste0(location, 'TCGA_LUAD_exp2.RData')))
load(url(paste0(location, 'TCGA_LUAD_TP53_mutation.RData')))
1.5 parameter 赋值
family = "binomial"
phenotype <- TP53_mutation
tag <- c("wild-type", "TP53 mutant")
1.6 检查bulk-cell和single-cell的intersect
#取bulk和single的gene交集
common <- intersect(rownames(bulk_dataset), rownames(sc_dataset))
length(rownames(bulk_dataset))
length(rownames(sc_dataset))
length(common)

1.7 得到processed data 和 细胞间的network
sc_exprs<-as.matrix(sc_dataset@assays$RNA@data)
network<-as.matrix(sc_dataset@graphs$RNA_snn)

1.8 将network的对角元素置0,将所有非零元素置1
diag(network)<-0
network[which(network!=0)]<-1
1.9 将bulk-cell和single-cell拼接(bulk-cell:single-cell)
(bulk 共有498个sample,single共有4102个cell)
dataset0<-cbind(bulk_dataset[common,],sc_exprs[common,])#将bulk-dataset和sc-dataset中的common拼接(bulk-cell and sample)

1.10 对表达矩阵进行矫正
dataset1 <- normalize.quantiles(dataset0)

rownames(dataset1)<-rownames(dataset0)#行赋值
colnames(dataset1)<-colnames(dataset0)#列赋值
1.11 得到bulk和single的表达矩阵
Expression_bulk <- dataset1[,1:ncol(bulk_dataset)]#bulk-cell的基因表达
Expression_cell <- dataset1[,(ncol(bulk_dataset) + 1):ncol(dataset1)]#single-cell的基因表达

Part 2 Scissor() debug——binomial 后续
2.1 合并bulk基因表达和single基因表达
X<-cor(Expression_bulk,Expression_cell)#bulk基因表达矩阵和single基因表达矩阵的相关性

2.2 质控(卡分位数)
#质控
quality_check <- quantile(X)#分位数
print("|**************************************************|")
print("Performing quality-check for the correlations")
print("The five-number summary of correlations:")
print(quality_check)
print("|**************************************************|")

2.3 50% correlation
#50%分位数小于0.01
if (quality_check[3] < 0.01){
warning("The median correlation between the single-cell and bulk samples is relatively low.")
}
2.4 判断是否有两个label(使用logistic regression)
if (family == "binomial"){
Y <- as.numeric(phenotype)
z <- table(Y)
if (length(z) != length(tag)){
stop("The length differs between tags and phenotypes. Please check Scissor inputs and selected regression type.")
}else{
print(sprintf("Current phenotype contains %d %s and %d %s samples.", z[1], tag[1], z[2], tag[2]))
print("Perform logistic regression on the given phenotypes:")
}
}

2.5 存储RData
#保存当前数据(其中X表示bulk-cell和single-cell表达矩阵的相关性,y表示phenotype的table,其中1表示tp53_mutant,0表示wild-type)
save(X, Y, network, Expression_bulk, Expression_cell, file = 'LUAD_TP53_mutant.RData')
2.6 惩罚参数最小化——APML1 function()
#参数和惩罚系数最小化 加惩罚函数降低过拟合现象
APML1=function(x, y, family=c("gaussian", "binomial", "cox"), penalty=c("Lasso","Enet", "Net"), Omega=NULL, alpha=1.0, lambda=NULL, nlambda=50, rlambda=NULL, wbeta=rep(1,ncol(x)), sgn=rep(1,ncol(x)), nfolds=1, foldid=NULL, inzero=TRUE, isd=FALSE, iysd=FALSE, keep.beta=FALSE, ifast=TRUE, thresh=1e-7, maxit=1e+5) {
#fcall=match.call()
family=match.arg(family)
penalty=match.arg(penalty)
if (penalty=="Net" & is.null(Omega)) {
penalty="Enet"
cat("Enet was performed as no input of Omega")
}
if (penalty %in% c("Enet","Net") & alpha==1.0) {
penalty="Lasso"
cat("Lasso was performed as alpha=1.0")
}
if (alpha!=1.0) {
if (is.null(Omega)) {
penalty="Enet"
} else if (!is.null(Omega)) {
penalty="Net"
}
} else {
penalty="Lasso"
}
wbeta=abs(wbeta)
fit=switch(family,
"gaussian"=LmL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,iysd,keep.beta,thresh,maxit),
"binomial"=LogL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,keep.beta,thresh,maxit,threshP=1e-5),
"cox"=CoxL0(x,y,Omega,alpha,lambda,nlambda,rlambda,nfolds,foldid,inzero,wbeta,sgn,isd,keep.beta,ifast,thresh,maxit))
fit$family=family
#fit$call=fcall
class(fit)="APML1"
return(fit)
}
2.7 选择有信息的cell(informative cells)
for (i in 1:length(alpha)){
set.seed(123)
#惩罚参数最小化
fit0 <- APML1(X, Y, family = family, penalty = "Net", alpha = alpha[i], Omega = network, nlambda = 100, nfolds = min(10,nrow(X)))
fit1 <- APML1(X, Y, family = family, penalty = "Net", alpha = alpha[i], Omega = network, lambda = fit0$lambda.min)
if (family == "binomial"){
#当family==binomail时,fit1得到的beta共有4103个
Coefs <- as.numeric(fit1$Beta[2:(ncol(X)+1)])
}else{
Coefs <- as.numeric(fit1$Beta)
}
Cell1 <- colnames(X)[which(Coefs > 0)]#系数大于0的细胞,作为scissor+细胞
Cell2 <- colnames(X)[which(Coefs < 0)]#系数小于0的细胞,作为scissor-细胞
percentage <- (length(Cell1) + length(Cell2)) / ncol(X)#选择的细胞不能太少,cutoff是至少的限制
print(sprintf("alpha = %s", alpha[i]))
print(sprintf("Scissor identified %d Scissor+ cells and %d Scissor- cells.", length(Cell1), length(Cell2)))
print(sprintf("The percentage of selected cell is: %s%%", formatC(percentage*100, format = 'f', digits = 3)))
if (percentage < cutoff){
break
}
cat("\n")
}

#Scissor的最后返回值alpha,lambda,family,coef,pos_cell和neg_cell
list(alpha = alpha[i], lambda = fit0$lambda.min, family = family)
Coefs = Coefs
Scissor_pos = Cell1
Scissor_neg = Cell2
Coefs
Scissor_pos
Scissor_neg



Question?
惩罚系数的计算方法——APML1()函数详细解析
边栏推荐
- Leverage contextual information
- AcWing——4268. 性感素
- 移动端、安卓、IOS兼容性面试题
- knife4j 初次使用
- Chapter 3 - Fundamentals of cryptography
- LeetCode笔记:Biweekly Contest 79
- Continuous local training for better initialization of Federated models
- R语言使用epiDisplay包的summ函数计算dataframe中指定变量在不同分组变量下的描述性统计汇总信息并可视化有序点图、使用dot.col参数设置不同分组数据点的颜色
- Why must coordinate transformations consist of publishers / subscribers of coordinate transformation information?
- 我人生中的第一个需求——Excel数据批量上传到数据库
猜你喜欢

There is no solid line connection between many devices in Proteus circuit simulation design diagram. How are they realized?

Principle and application of PWM

Formatting the generalization forgetting trade off in continuous learning

Use of gt911 capacitive touch screen

Chapter V - message authentication and digital signature

Personalized federated learning using hypernetworks paper reading notes + code interpretation

Golang quickly generates model and queryset of database tables

最新hbuilderX编辑uni-app项目运行于夜神模拟器

Exposure compensation, white increase and black decrease theory

modelarts二
随机推荐
Acwing - 4269 school anniversary
FCPX插件:简约线条呼出文字标题介绍动画Call Outs With Photo Placeholders for FCPX
SQL -- course experiment examination
@DateTimeFormat @JsonFormat 的区别
Shortcut key modification of TMUX and VIM
Paddepaddl 28 supports the implementation of GHM loss, a gradient balancing mechanism for arbitrary dimensional data (supports ignore\u index, class\u weight, back propagation training, and multi clas
10 lessons from the recommended system
tar之多线程解压缩
Why must coordinate transformations consist of publishers / subscribers of coordinate transformation information?
‘CMRESHandler‘ object has no attribute ‘_timer‘,socket.gaierror: [Errno 8] nodename nor servname pro
VS2019 MFC IP Address Control 控件繼承CIPAddressCtrl類重繪
Adaptive personalized federated learning paper interpretation + code analysis
VS 2019 MFC 通过ACE引擎连接并访问Access数据库类库封装
Leetcode34. find the first and last positions of elements in a sorted array
Machine learning from entry to re entry: re understanding of SVM
vscode 1.68变化与关注点(整理导入语句/实验性新命令中心等)
最新hbuilderX编辑uni-app项目运行于夜神模拟器
连接数据库却无法修改数据
2022r2 mobile pressure vessel filling test question simulation test platform operation
Study on display principle of seven segment digital tube