当前位置:网站首页>Metagenome (personal notes)

Metagenome (personal notes)

2022-06-21 18:40:00 Illegal account 247188

source /home/dengqr/miniconda3/bin/activate
conda config --set auto_activate_base true


# The backup data  【 The original data does not move original】
cp -r 00data 00data2

# Data last confirmed 
ls -l | grep  ".gz$" > 1.txt

 View the list of virtual environments  
conda env list

 Creating a virtual environment , Anti pollution environmental variables , If there is software in Solving environment Steps cannot be installed for hours , You can create a new environment 

conda create -n meta

 Loading environment 
 conda activate meta

###  Quality assessment fastqc

    # = For the specified version ,-c Specify the installation source , Can speed up installation 
    # -y To agree to install 
    conda install fastqc=0.11.9 -c bioconda -y
    fastqc -v

###  Summary of evaluation report multiqc

    #  notes 1.7 by Python2 Environmental Science ,1.8/9 The new version needs Python3 Environment 
    conda install multiqc=1.9 -c bioconda -y 
    multiqc --version

###  Quality control process kneaddata

    conda install kneaddata=0.7.4 -c bioconda -y 
    kneaddata --version
    trimmomatic -version # 0.39
    bowtie2 --version # 2.4.2

db= /home/dengqr/dataset/metagenome/  # It's direct here cd  Go to the directory you want to put 
#  View available databases 
    kneaddata_database
    #  Including the human genome bowtie2/bmtagger、 Human transcriptome 、 Ribosome RNA And the mouse genome 
    #  Download the human genome bowtie2 Indexes  3.44 GB
    mkdir -p $/home/dengqr/dataset/metagenome/kneaddata/human_genome
    kneaddata_database --download human_genome bowtie2 $/home/dengqr/dataset/metagenome/kneaddata/human_genome
    #  Database download is slow or failed , The appendix contains links to Baidu cloud and domestic backup 
 Download to  $/home/dengqr/home/dengqr/dataset/metagenome/kneaddata/

#mv /home/dengqr/$/home/dengqr/dataset/metagenome/kneaddata/ /home/dengqr/dataset/metagenome/ 【 Completed 】



## 1.2 ( Optional )FastQC Quality assessment 

    #  The software version shall be recorded when using the software for the first time , The method of writing must be clear 
    fastqc --version # 0.11.8
    # time Statistics of running time ,fastqc Quality assessment 
    # *.gz For raw data ,-t Specify multithreading 
    time fastqc seq/*.gz -t 2#32 Threads time= 27 minute  tip: Add... To the top 
time fastqc -o 01fastqc 00data/*.gz -t 32
    
multiqc take fastqc Multiple reports of generate a single consolidated report , Methods batch view and compare 

    #  Record the software version 
    multiqc --version # 1.5
    #  Arrangement seq Under the table of contents fastqc The report , Output multiqc_report.html to result/qc Catalog 
    multiqc -d seq/ -o result/qc
▼
multiqc -d 01fastqc/ -o 02fastqc_result/
 Look at the right side result/qc Directory multiqc_report.html


# Remove the host 
# index catalogue  "/home/dengqr/dataset/metagenome/kneaddata/human_genome/hg37dec_v0.1.1.bt2"

kneaddata -h
# After de hosting, the two ends do not match —— Sequence rename   Check first 
zcat 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz |head -n 6
zcat 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz |head -n 6


cp 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz 00datatrain\
cp 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz 00datatrain\


zcat 00datatrain/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz |head -n 6
zcat 00datatrain/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz |head -n 6

( Optional )  Sequence rename , solve NCBI SRA Data double ended ID The problem of duplicate names , See [《MPB: Random macrogenome sequencing data quality control and analysis process and common problems of de hosting 》](https://mp.weixin.qq.com/s/ovL4TwalqZvwx5qWb5fsYA).
gunzip 00datatrain/*.gz
sed -i '1~4 s/$/\\1/g' 00datatrain/*R2_001.fastq
sed -i '1~4 s/$/\\2/g' 00datatrain/*R1_001.fastq
    #  Check again whether the label of the sample is duplicated 
zcat 00datatrain/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq |head -n 6
zcat 00datatrain/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq |head -n 6

    #  As a result, compression saves space 
    gzip seq/*.fq
    # pigz It's a parallel version gzip, Not installed for use gzip
    pigz seq/*.fq





time kneaddata -i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o 03qc -v -t 32 --remove-intermediate-output \
--reorder --bowtie2-options "--very-sensitive --dovetail" \
-db kneaddata/human_genome

### Java Mismatch —— reinstall Java Running environment 
 If an error occurs  Unrecognized option: -d64, Install java solve :
 conda install -c cyclus java-jdk

/home/dengqr/miniconda2/bin/trimmomatic
type trimmomatic
type fastqc
"/home/dengqr/miniconda3/share/trimmomatic-0.39-2/"



time kneaddata -i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o 03qc -v -t 32 --remove-intermediate-output \
-db kneaddata/human_genome


time kneaddata -i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o 03qc -v -t 32 --remove-intermediate-output \
--trimmomatic home/dengqr/miniconda2/bin/trimmomatic \
--reorder --bowtie2-options "--very-sensitive --dovetail" \
-db kneaddata/human_genome




time kneaddata -t 40 -v \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o 03qc/ \
--trimmomatic /home/dengqr/miniconda3/share/trimmomatic-0.39-2/ \
--max-memory 80g \
--trimmomatic-options "SLIDINGWINDOW:4:20 MINLEN:50" \
-db kneaddata/human_genome/ \
--bowtie2-options "--very-sensitive --dovetail --reoeder" \
--remove-intermediate-output

"/home/dengqr/miniconda2/bin/trimmomatic-0.33.jar"
"/home/dengqr/miniconda3/bin/trimmomatic"time kneaddata \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o temp/qc -v -t 40 --remove-intermediate-output \
--trimmomatic /home/dengqr/miniconda3/share/trimmomatic-0.39-2/ \
--trimmomatic-options "SLIDINGWINDOW:4:20 MINLEN:50" \
--reorder --bowtie2-options "--very-sensitive --dovetail" \
-db kneaddata/human_genome



▼
time kneaddata \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R2_001.fastq.gz \
-i 00data/OSCC35A_20211015NA_AGGCAGAA_S156_L002_R1_001.fastq.gz \
-o temp1/qc -v -t 40 --remove-intermediate-output \
--trimmomatic /home/dengqr/miniconda3/share/trimmomatic-0.39-2/ \
--trimmomatic-options "SLIDINGWINDOW:4:20 MINLEN:50" \
--reorder --bowtie2-options "--very-sensitive --dovetail" \
-db kneaddata/human_genome

▼
 #  use kneaddata Accessory tools kneaddata_read_count_table
kneaddata_read_count_table --input temp1/qc \
--output temp1/kneaddata.txt
#  Filter key result Columns 
cut -f 1,2,4,12,13 temp1/kneaddata.txt | sed 's/_1_kneaddata//' > temp1/qc/sum.txt
cat temp1/qc/sum.txt

# Quality control results 
fastqc temp1/qc/*_1_kneaddata_paired_*.fastq -t 2 -o temp1
multiqc -d temp1/ -o temp1/



fastqc temp1/qc/*R2_001_kneaddata_paired_*.fastq -t 2 -o temp1
multiqc -d temp1/ -o temp1/

OSCC35A_20211015NA_AGGCAGAA_S156_L002_



"/home/dengqr/dataset/metagenome/00data/Control105A_R1_001.fastq.gz"
# Multitasking runs in parallel 
→ Remember informed consent     #  hit will cite Promise to reference parallel software parallel
    parallel --citation 

parallel -j 3 --xapply "echo 00data/{1}_R1_001.fastq.gz 00data/{1}_R2_001.fastq.gz" ::: `tail -n+2 metadata.txt|cut -f1`


time parallel -j 2 --xapply \
"kneaddata -i 00data/{1}_R1_001.fastq.gz \ -i 00data/{1}_R2_001.fastq.gz \ -o temp/qc -v -t 40 --remove-intermediate-output \ --trimmomatic /home/dengqr/miniconda3/share/trimmomatic-0.39-2/ \ --trimmomatic-options 'SLIDINGWINDOW:4:20 MINLEN:50' \ --reorder --bowtie2-options '--very-sensitive --dovetail' \ -db kneaddata/human_genome" ::: `tail -n+2 metadata.txt|cut -f1`












原网站

版权声明
本文为[Illegal account 247188]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/172/202206211659281914.html