ssh -X USER@genologin
cd work/
mkdir 
wget http://genoweb.toulouse.inra.fr/~formation/19_Rnaseq_Cli/data/reads/MT_rep1_1_Ch6.fastq.gz
wget http://genoweb.toulouse.inra.fr/~formation/19_Rnaseq_Cli/data/reads/MT_rep1_2_Ch6.fastq.gz
wget http://genoweb.toulouse.inra.fr/~formation/19_Rnaseq_Cli/data/reads/WT_rep1_1_Ch6.fastq.gz
wget http://genoweb.toulouse.inra.fr/~formation/19_Rnaseq_Cli/data/reads/WT_rep1_2_Ch6.fastq.gz

Exercice n°1: Quality control
---------------------------------------------

a) search_module fastqc

b) module load bioinfo/FastQC_v0.11.7; fastqc MT_rep1_1_Ch6.fastq.gz
ctrl + c pour ne pas faire l'execution totale (on est encore sur le frontal)

c)
for i in `ls *.fastq.gz`
do
echo "module load bioinfo/FastQC_v0.11.7; fastqc $i"
done

d) for i in `ls *.fastq.gz`; do echo "module load bioinfo/FastQC_v0.11.7; fastqc $i"; done > mescommandesfastqc.sh

e) more mescommandes.txt

f) sarray -J fastqcjob -o %j.out -e %j.err -t 01:00:00 --mem=2G --mail-type=BEGIN,END,FAIL mescommandesfastqc.sh

e) squeue -u username

Visualisation:
firefox MT_rep1_2_Ch6_fastqc.html


Exercice n°2: Cleaning
---------------------------------------------

a) Visualiser: http://bioinfo.genotoul.fr/index.php/resources-2/softwares/index.php/how-to-use?software=How_to_use_SLURM_TrimGalore

b) more /usr/local/bioinfo/src/TrimGalore/example_on_cluster/test_TriGalore-0.4.5.sh

c) Créer un fichier pour MT et un fichier pour WT

- copie du fichier
	cp /usr/local/bioinfo/src/TrimGalore/example_on_cluster/test_TriGalore-0.4.5.sh ./trimgaloreMT.sh
	
- modification du fichier MT: trimgaloreMT.sh

>>> Debut fichier MT <<<
#!/bin/bash
#SBATCH -p workq
#SBATCH -t 10 #minutes

#Load binaries
module load bioinfo/TrimGalore-0.4.5
module load bioinfo/cutadapt-1.8.3-python-2.7.2
module load bioinfo/FastQC_v0.11.5

mkdir WT
trim_galore --fastqc --stringency 3 --length 25 --trim-n -o MT --paired MT_rep1_1_Ch6.fastq.gz MT_rep1_2_Ch6.fastq.gz
>>> Fin fichier MT <<<

- copie du fichier pour WT:
cp  ./trimgaloreMT.cmd ./trimgaloreWT.sh

- modification du fichier WT: trimgaloreWT.sh

>>> Debut fichier WT <<<
#!/bin/bash
#SBATCH -p workq
#SBATCH -t 10 #minutes

#Load binaries
module load bioinfo/TrimGalore-0.4.5
module load bioinfo/cutadapt-1.8.3-python-2.7.2
module load bioinfo/FastQC_v0.11.5

mkdir WT
trim_galore --fastqc --stringency 3 --length 25 --trim-n -o WT --paired WT_rep1_1_Ch6.fastq.gz WT_rep1_2_Ch6.fastq.gz
>>> Fin fichier WT <<<

d)
sbatch trimgaloreMT.sh
sbatch trimgaloreMT.sh


Exercice n°3:  Générer l'index STAR
---------------------------------------------

search_module star
--> bioinfo/STAR-2.6.0c

srun -c 4 --pty bash 

module load bioinfo/STAR-2.6.0c
mkdir star-index
cd star-index

wget http://genoweb.toulouse.inra.fr/~formation/4_Galaxy_RNAseq/2018/data/reference/ITAG2.3_genomic_Ch6.fasta
wget http://genoweb.toulouse.inra.fr/~formation/4_Galaxy_RNAseq/2018/data/reference/ITAG_pre2.3_gene_models_Ch6.gtf

STAR --runMode genomeGenerate --genomeDir . --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --runThreadN 4 --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf
ls 
exit

Exercice n°4:  Réaliser les alignements épissés
---------------------------------------------

cd ..

creation fichier scriptSTAR_MT.sh qui contient :
>>> Debut fichier MT <<<
#!/bin/bash
#SBATCH -J starMT
#SBATCH -c 4
#SBATCH -o star_MT.out
#SBATCH -e star_MT.err
#SBATCH -t 01:00:00
#SBATCH --mem 5G
#SBATCH --mail-type=BEGIN,END,FAIL 
#Purge any previous modules
module purge
module load bioinfo/STAR-2.6.0c
STAR --genomeDir star-index --readFilesIn MT_rep1_1_Ch6.fastq.gz MT_rep1_2_Ch6.fastq.gz \
 --outFileNamePrefix MT --sjdbGTFfile  ./star-index/ITAG_pre2.3_gene_models_Ch6.gtf \
 --readFilesCommand zcat --alignIntronMin  20 --alignIntronMax 500000 \
 --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif \
 --alignSoftClipAtReferenceEnds No --outSAMattrIHstart 0  \
 --outFilterType BySJout --outFilterIntronMotifs  RemoveNoncanonical \
 --quantMode TranscriptomeSAM GeneCounts \
 --outSAMattributes All --runThreadN 4 
>>> Fin fichier MT <<<

creation fichier scriptSTAR_WT.sh qui contient :
>>> Debut fichier WT <<<
#!/bin/bash
#SBATCH -J starWT
#SBATCH -c 4
#SBATCH -o star_WT.out
#SBATCH -e star_WT.err
#SBATCH -t 01:00:00
#SBATCH --mem 5G
#SBATCH --mail-type=BEGIN,END,FAIL 
#Purge any previous modules
module purge
module load bioinfo/STAR-2.6.0c
STAR --genomeDir star-index --readFilesIn WT_rep1_1_Ch6.fastq.gz WT_rep1_2_Ch6.fastq.gz \
 --outFileNamePrefix WT --sjdbGTFfile  ./star-index/ITAG_pre2.3_gene_models_Ch6.gtf \
 --readFilesCommand zcat --alignIntronMin  20 --alignIntronMax 500000 \
 --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif \
 --alignSoftClipAtReferenceEnds No --outSAMattrIHstart 0  \
 --outFilterType BySJout --outFilterIntronMotifs  RemoveNoncanonical \
 --quantMode TranscriptomeSAM GeneCounts \
 --outSAMattributes All --runThreadN 4 
>>> Fin fichier WT <<<

#execution
sbatch scriptSTAR_WT.sh
sbatch scriptSTAR_MT.sh

Vous pouvez comme pour fastq faire un fichier pour faire un sarray voici la boucle pour qui permet de recupérer le nom du fichier sans l'extention
ls *.fastq.gz | paste - - | perl -lane '($ech)=$F[0]=~/(.*).fastq.gz/; print "module load XXX; STAR --genomeDir star-index --readFilesIn $F[0] $F[1] --outFileNamePrefix $ech ...."' > mesSTARcommandes.sh

Exercice n°5:  Visualisation
---------------------------------------------
srun -c 4 --pty bash 
samtools index -@ 4 WTAligned.sortedByCoord.out.bam
samtools index -@ 4 MTAligned.sortedByCoord.out.bam


Exercice 6:  Recherche de nouveaux transcrits 
---------------------------------------------

cd star-index/
cut -f 9 ITAG_pre2.3_gene_models_Ch6.gtf | cut -d ';' -f 2 | sort -u | wc
--> 2813 transcrits
cd ..

module load bioinfo/cufflinks-2.2.1
module load bioinfo/samtools-1.8
samtools merge merge.bam MTAligned.sortedByCoord.out.bam WTAligned.sortedByCoord.out.bam

cufflinks -g star-index/ITAG_pre2.3_gene_models_Ch6.gtf --library-type fr-unstranded -p 4 merge.bam

cut -f 9 transcripts.gtf | cut -d ';' -f 2 | sort -u | wc
--> 4388 transcrits

cuffcompare -r star-index/ITAG_pre2.3_gene_models_Ch6.gtf transcripts.gtf 

Exercice n°7: estimation de l'expression:
---------------------------------------------

module load bioinfo/RSEM-1.3.0

a) mkdir rsemGenomeDir
rsem-prepare-reference --gtf star-index/ITAG_pre2.3_gene_models_Ch6.gtf star-index/ITAG2.3_genomic_Ch6.fasta rsemGenomeIndex

b) rsem-calculate-expression --alignments --no-bam-output --estimate-rspd --calc-ci --seed 12345 -p 4 --ci-memory 3000 --paired-end WTAligned.toTranscriptome.out.bam rsemGenomeIndex QuantWT
rsem-calculate-expression --alignments --no-bam-output --estimate-rspd --calc-ci --seed 12345 -p 4 --ci-memory 3000 --paired-end MTAligned.toTranscriptome.out.bam rsemGenomeIndex QuantMT

c) /usr/local/bioinfo/Scripts/bin/merge_cols.py -f QuantMT.genes.results,QuantWT_Quant.genes.results -n MT,WT -c 5 -o matrice.txt

Exercice n°7 (Optionnel)
---------------------------------------------
module load bioinfo/subread-1.6.0
featureCounts -a transcripts.gtf -o new_transcript_featureCounts.txt -Q 20 --minOverlap 10 WTAligned.sortedByCoord.out.bam MTAligned.sortedByCoord.out.bam


Exercice 8 Stats
---------------------------------------------
export R_LIBS="~/work/Rlib"
mkdir ~/work/Rlib

srun -c 4 --pty bash 
mkdir stats
cd stats/
wget http://www.nathalievilla.org/doc/gz/RNAseq_data.tar.gz
tar -xvzf RNAseq_data.tar.gz
cd RNAseq_data/count_table_files/
module load system/R-3.5.1
Rscript /usr/local/bioinfo/Scripts/bin/Normalization.R -f count_table.tsv -o ./normalization
 ->copie en local et exloration des résultats.
Rscript /usr/local/bioinfo/Scripts/bin/DEG.R -f count_table.tsv -n ./normalization/RLE_info.txt -o DEG --pool1 untreated1,untreated2,untreated3,untreated4 --pool2=treated1,treated2,treated3 --filter TRUE --alpha 0.05 --correct BH --MAplots TRUE
 ->copie en local et exloration des résultats.

Rscript /usr/local/bioinfo/Scripts/bin/GOEnrichment.R -f fb.go --fileFormat twoColumns -i DEG/resDEG.csv -o GOEnrichment -a classic -t fisher