git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/r3Cseq@98703 bc3139a8-67e5-0310-9ffc-ced21a209358
| ... | ... |
@@ -25,12 +25,13 @@ |
| 25 | 25 |
\newcommand{\Rdata}[1]{{\texttt{#1}}}
|
| 26 | 26 |
\newcommand{\Rpackage}[1]{{\textit{#1}}}
|
| 27 | 27 |
|
| 28 |
-\author{Supat Thongjuea \footnote{Bergen Center for Computational Science, Bergen, Norway}}
|
|
| 28 |
+\author{Supat Thongjuea \footnote{The Weatherall Institute of Molecular
|
|
| 29 |
+Medicine, University of Oxford, UK}} |
|
| 29 | 30 |
|
| 30 | 31 |
\title{\textsf{r3Cseq: an R package for the discovery of long-range genomic interactions with
|
| 31 | 32 |
chromosome conformation capture and next-generation sequencing data}} |
| 32 | 33 |
|
| 33 |
-\date{October 28, 2012}
|
|
| 34 |
+\date{January 26, 2015}
|
|
| 34 | 35 |
|
| 35 | 36 |
\begin{document}
|
| 36 | 37 |
<<setup, echo=FALSE>>= |
| ... | ... |
@@ -195,10 +196,13 @@ be used to replace 'mm9\_ref\_chr01.fa' to 'chr1'. |
| 195 | 196 |
\section{Getting started}
|
| 196 | 197 |
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 197 | 198 |
The 3C-seq data generated by \cite{Ralph:2011} will be used for the demonstration.
|
| 198 |
-The current version of r3Cseq supports mouse and human genome. |
|
| 199 |
+The current version of r3Cseq supports mouse, human, and rat genomes. |
|
| 199 | 200 |
Therefore, the package requires one of the followings \Rpackage{BSgenome}
|
| 200 |
-packages to be installed;\Rpackage{BSgenome.Mmusculus.UCSC.mm9.masked},
|
|
| 201 |
-\Rpackage{BSgenome.Hsapiens.UCSC.hg18.masked}, and \Rpackage{BSgenome.Hsapiens.UCSC.hg19.masked}.
|
|
| 201 |
+packages to be installed;\Rpackage{BSgenome.Mmusculus.UCSC.mm9.masked},
|
|
| 202 |
+\Rpackage{BSgenome.Mmusculus.UCSC.mm10.masked},
|
|
| 203 |
+\Rpackage{BSgenome.Hsapiens.UCSC.hg18.masked},
|
|
| 204 |
+\Rpackage{BSgenome.Hsapiens.UCSC.hg19.masked, and
|
|
| 205 |
+\Rpackage{BSgenome.Rnorvegicus.UCSC.rn5.masked}}.
|
|
| 202 | 206 |
|
| 203 | 207 |
Loading the \Rpackage{r3Cseq} package into R.
|
| 204 | 208 |
<<>>= |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/r3Cseq@86623 bc3139a8-67e5-0310-9ffc-ced21a209358
| ... | ... |
@@ -197,8 +197,8 @@ be used to replace 'mm9\_ref\_chr01.fa' to 'chr1'. |
| 197 | 197 |
The 3C-seq data generated by \cite{Ralph:2011} will be used for the demonstration.
|
| 198 | 198 |
The current version of r3Cseq supports mouse and human genome. |
| 199 | 199 |
Therefore, the package requires one of the followings \Rpackage{BSgenome}
|
| 200 |
-packages to be installed;\Rpackage{BSgenome.Mmusculsu.UCSC.mm9},
|
|
| 201 |
-\Rpackage{BSgenome.Hsapiens.UCSC.hg18}, and \Rpackage{BSgenome.Hsapiens.UCSC.hg19}.
|
|
| 200 |
+packages to be installed;\Rpackage{BSgenome.Mmusculus.UCSC.mm9.masked},
|
|
| 201 |
+\Rpackage{BSgenome.Hsapiens.UCSC.hg18.masked}, and \Rpackage{BSgenome.Hsapiens.UCSC.hg19.masked}.
|
|
| 202 | 202 |
|
| 203 | 203 |
Loading the \Rpackage{r3Cseq} package into R.
|
| 204 | 204 |
<<>>= |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/r3Cseq@83157 bc3139a8-67e5-0310-9ffc-ced21a209358
| ... | ... |
@@ -343,7 +343,7 @@ head(detected_genes) |
| 343 | 343 |
\Robject{RangedData} to the bedGraph format, which simply upload to the UCSC
|
| 344 | 344 |
genome browser. |
| 345 | 345 |
<<>>= |
| 346 |
-export3Cseq2bedGraph(my3Cseq.obj) |
|
| 346 |
+#export3Cseq2bedGraph(my3Cseq.obj) |
|
| 347 | 347 |
@ |
| 348 | 348 |
\subsection{Summary report}
|
| 349 | 349 |
\Rfunction{generate3CseqReport} function generates the summary report
|
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/r3Cseq@82291 bc3139a8-67e5-0310-9ffc-ced21a209358
| 1 | 1 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,379 @@ |
| 1 |
+%\VignetteIndexEntry{r3Cseq}
|
|
| 2 |
+%\VignetteKeywords{r3Cseq}
|
|
| 3 |
+%\VignettePackage{r3Cseq}
|
|
| 4 |
+ |
|
| 5 |
+%documentclass[12pt, a4paper]{article}
|
|
| 6 |
+\documentclass[12pt]{article}
|
|
| 7 |
+ |
|
| 8 |
+\usepackage{amsmath}
|
|
| 9 |
+\usepackage{hyperref}
|
|
| 10 |
+\usepackage[authoryear,round]{natbib}
|
|
| 11 |
+ |
|
| 12 |
+\textwidth=6.2in |
|
| 13 |
+\textheight=8.5in |
|
| 14 |
+%\parskip=.3cm |
|
| 15 |
+\oddsidemargin=.1in |
|
| 16 |
+\evensidemargin=.1in |
|
| 17 |
+\headheight=-.3in |
|
| 18 |
+ |
|
| 19 |
+\newcommand{\scscst}{\scriptscriptstyle}
|
|
| 20 |
+\newcommand{\scst}{\scriptstyle}
|
|
| 21 |
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
|
|
| 22 |
+\newcommand{\Rcode}[1]{{\texttt{#1}}}
|
|
| 23 |
+\newcommand{\Rparameter}[1]{{\texttt{#1}}}
|
|
| 24 |
+\newcommand{\Robject}[1]{{\texttt{#1}}}
|
|
| 25 |
+\newcommand{\Rdata}[1]{{\texttt{#1}}}
|
|
| 26 |
+\newcommand{\Rpackage}[1]{{\textit{#1}}}
|
|
| 27 |
+ |
|
| 28 |
+\author{Supat Thongjuea \footnote{Bergen Center for Computational Science, Bergen, Norway}}
|
|
| 29 |
+ |
|
| 30 |
+\title{\textsf{r3Cseq: an R package for the discovery of long-range genomic interactions with
|
|
| 31 |
+chromosome conformation capture and next-generation sequencing data}} |
|
| 32 |
+ |
|
| 33 |
+\date{October 28, 2012}
|
|
| 34 |
+ |
|
| 35 |
+\begin{document}
|
|
| 36 |
+<<setup, echo=FALSE>>= |
|
| 37 |
+options(width = 60) |
|
| 38 |
+olocale=Sys.setlocale(locale="C") |
|
| 39 |
+@ |
|
| 40 |
+\maketitle |
|
| 41 |
+ |
|
| 42 |
+\tableofcontents |
|
| 43 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 44 |
+\begin{abstract}
|
|
| 45 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 46 |
+The coupling of chromosome conformation capture (3C)-based and next-generation sequencing (NGS) enable high-throughput detection |
|
| 47 |
+of long-range genomic interactions via the generation of novel ligation products between DNA sequences that are closely juxtaposed |
|
| 48 |
+in vivo. These interactions may involve promoter regions, enhacers and other regulatory and structural elements of chromosomes, and |
|
| 49 |
+can reveal key details in the regulation of gene expression. 3C-seq is a a variant of the method for the detection of interactions |
|
| 50 |
+between one chosen genomic element (viewpoint) and the rest of the genome. We present an R/Bioconductor package called \Rpackage{r3Cseq}, designed to perform
|
|
| 51 |
+3C-seq data analysis in a number of different experimental designs, with or without a control experiment. The package can also be used to perform |
|
| 52 |
+data analysis for the experiment with replicates. The package provides functions to perform 3C-seq data normalization, statistical analysis for cis/trans |
|
| 53 |
+interactions and visualization to facilitate the identification of genomic regions that physically interact with the given viewpoints of interest. |
|
| 54 |
+The r3Cseq package greatly facilitates hypothesis generation and the interpretation of experimental results. |
|
| 55 |
+\end{abstract}
|
|
| 56 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 57 |
+\section{Introduction}
|
|
| 58 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 59 |
+This vignette describes how to use the \Rpackage{r3Cseq} package.
|
|
| 60 |
+\Rpackage{r3Cseq} is a Bioconductor-compliant R package designed to facilitate the
|
|
| 61 |
+identification of interaction regions generated by chromosome conformation capture |
|
| 62 |
+and next-generation sequencing (3C-seq). The fundamental principles of 3C-seq briefly described in |
|
| 63 |
+the following \cite{Soler:2010} (Figure~\ref{fig:3CseqProcedures}), isolated cells are treated with a cross-linking agent
|
|
| 64 |
+to preserve in vivo nuclear proximity between DNA sequences. The DNA isolated from these |
|
| 65 |
+cells is then digested using a primary restriction enzyme, typically a 6-base pairs cutting |
|
| 66 |
+enzyme such as HindIII, EcoRI or BamHI. The digested product is then ligated under dilute conditions |
|
| 67 |
+to favor intra-molecular over inter-molecular ligation events. This digested and ligated chromatin yields composite |
|
| 68 |
+sequences representing (distal) genomic regions that are in close physical proximity in the cell nucleus. |
|
| 69 |
+The digested and ligated chromatin is then de-crosslinked and subjected to a second restriction digest |
|
| 70 |
+using either Nla III or Dpn II (a 4-cutter) as a secondary restriction enzyme to decrease the fragment sizes. |
|
| 71 |
+The resulting digested DNA is then ligated again under diluted conditions, creating small circular fragments. |
|
| 72 |
+These fragments are inverse PCR-amplified using primers specific for a genomic region of interest (eg. promoter, |
|
| 73 |
+enhancer, or any other element potentially involved in long-range interactions), termed the "viewpoint". |
|
| 74 |
+The amplified fragments are then sequenced using massively parallel high-throughput sequencing. |
|
| 75 |
+Because the 3C-seq procedure hybrid DNA molecules being a combination of viewpoint-specific primers |
|
| 76 |
+followed by sequences dervied from the ligated interaction fragments. As such, these composite sequences are |
|
| 77 |
+unmappable and need to be trimmed to removed the viewpoint sequences, thus leaving only the capture sequence fragments |
|
| 78 |
+for mapping. After trimming, reads are mapped against a reference genome using alignment software such as Bowtie. |
|
| 79 |
+A mapped read file generated by the mapping software is then transformed to the BAM file and analyzed |
|
| 80 |
+by using \Rpackage{r3Cseq} package.
|
|
| 81 |
+ |
|
| 82 |
+\begin{figure}
|
|
| 83 |
+\centering |
|
| 84 |
+\includegraphics{images/3CseqProcedures.png}
|
|
| 85 |
+\caption{3C-seq procedures}
|
|
| 86 |
+\label{fig:3CseqProcedures}
|
|
| 87 |
+\end{figure}
|
|
| 88 |
+ |
|
| 89 |
+\Rpackage{r3Cseq} package is built on, and extends the functionality of Bioconductor package such as \Rpackage{GenomicRanges},
|
|
| 90 |
+\Rpackage{BSgenome}, \Rpackage{Rsamtools}, and \Rpackage{rtracklayer}. The package provides classes and
|
|
| 91 |
+methods to facilitate single-end reads, which are generated by the next-generation sequencing. The package can perform |
|
| 92 |
+data analysis on both single input file (single lane from one experiment) and two input files from an experiment and a control. |
|
| 93 |
+The package also provides a class and functions to perform 3C-seq data analysis from replicates (see working with replicates section). |
|
| 94 |
+The key features workflow of \Rpackage{r3Cseq} depicts in the following figure. (Figure~\ref{fig:r3CseqFlowChart})
|
|
| 95 |
+ |
|
| 96 |
+\begin{figure}
|
|
| 97 |
+\centering |
|
| 98 |
+\includegraphics{images/r3CseqFlowChart.png}
|
|
| 99 |
+\caption{r3Cseq key features workflow}
|
|
| 100 |
+\label{fig:r3CseqFlowChart}
|
|
| 101 |
+\end{figure}
|
|
| 102 |
+ |
|
| 103 |
+\Rpackage{r3Cseq} analysis workflow starts from the class initialization.
|
|
| 104 |
+There are two classes found in this package. One is \Robject{r3Cseq} class that
|
|
| 105 |
+is designed to support a single experiment in both with and without a control experiment. |
|
| 106 |
+Another is \Robject{r3CseqInBatch} class that is designed to support analysis with replicates.
|
|
| 107 |
+To initialize the class both \Robject{r3Cseq} and \Robject{r3CseqInBatch},
|
|
| 108 |
+a user gives the input parameters for example the input file name, |
|
| 109 |
+genome assembly version, primary restriction fragment name and so on |
|
| 110 |
+(see more details in the manual page of \Robject{r3Cseq} and \Robject{r3CseqInBatch}).
|
|
| 111 |
+The class is then will be created and it is ready to perform 3C-seq analysis. |
|
| 112 |
+The \Rfunction{getRawReads} and \Rfunction{getRawReadsInBatch} functions can be next used to read in the BAM files.
|
|
| 113 |
+It may take a few minutes for data processing depending on the size of the input BAM files and |
|
| 114 |
+the speed of CPU and the size of the RAM of a computer that performs analysis. |
|
| 115 |
+To run the \Rfunction{getRawReadsInBatch} function for replicates, a user might have a powerful computer server.
|
|
| 116 |
+\Rfunction{getRawReads} function reads in aligned reads from input BAM files
|
|
| 117 |
+and transforms aligned reads to the \Robject{GRanged} objects that can be stored in the r3Cseq object, whereas
|
|
| 118 |
+\Rfunction{getRawReadsInBatch} processes the data in batch and stores the aligned reads \Robject{GRanges}
|
|
| 119 |
+in the R files (.rdata). To count number of reads preparing for downstream analysis, \Robject{r3Cseq}
|
|
| 120 |
+provides two ways to count number of reads per region; 1) count number of reads per |
|
| 121 |
+resitrction fragments, using the function \Rfunction{getReadCountPerRestrictionFragment} and 2)
|
|
| 122 |
+count the number of reads per non-overlapping window size, using function \Rfunction{getReadCountPerWindow},
|
|
| 123 |
+whereas \Rfunction{getBatchReadCountPerRestrictionFragment} and \Rfunction{getBatchReadCountPerWindow} can do
|
|
| 124 |
+the same for replicates. \Rpackage{r3Cseq} provides \Rfunction{calculateRPM} and \Rfunction{calculateBatchRPM}
|
|
| 125 |
+functions to calculate reads per million per restriction fragment size (RPM) as normalized interaction frequency values. |
|
| 126 |
+There are two methods to calculate RPM. which are described in the r3Cseq paper \cite{Supat:2012}.
|
|
| 127 |
+After data normalization, \Rfunction{getInteractions} and \Rfunction{getBatchInteractions} will be performed
|
|
| 128 |
+to identify candidate interactions. Statistical analysis for both cis and trans interactions is described in the |
|
| 129 |
+r3Cseq paper \cite{Supat:2012}. \Rpackage{r3Cseq} provides functions \Rfunction{export3CseqRawReads2BedGraph},
|
|
| 130 |
+\Rfunction{export3Cseq2bedGraph}, \Rfunction{exportInteractions2text}, and \Rfunction{exportBatchInteractions2text}
|
|
| 131 |
+to export raw reads and all identified interactions to the bedGraph file format, which is simply uploaded to the UCSC genome browser. |
|
| 132 |
+The package also provides functionalities for plotting to show data analysis result of interaction regions. |
|
| 133 |
+Plotting functions consist of \Rfunction{plotOverviewInteractions},
|
|
| 134 |
+\Rfunction{plotInteractionPerChromosome},
|
|
| 135 |
+\Rfunction{plotInteractionNearViewpoint},
|
|
| 136 |
+and \Rfunction{plotDomainogramNearViewpoint}.
|
|
| 137 |
+These functions will be demonstrated in the visualization of 3C-seq data section. |
|
| 138 |
+ |
|
| 139 |
+Here is a list of some of its most important functions. |
|
| 140 |
+ |
|
| 141 |
+\begin{enumerate}
|
|
| 142 |
+\item |
|
| 143 |
+\Rfunction{getRawReads}: a function to read in BAM files.
|
|
| 144 |
+\item |
|
| 145 |
+\Rfunction{getBatchRawReads}: a function to read in multiple BAM files for replicates.
|
|
| 146 |
+\item |
|
| 147 |
+\Rfunction{getReadCountPerRestrictionFragment} : a function to count the number of reads per restriction
|
|
| 148 |
+fragment. A user has to specify the name of restriction enzyme. The package |
|
| 149 |
+will then automatically generate the genome-wide restriction fragments and counts how many 3C-seq reads |
|
| 150 |
+are mapped into that particular restriction fragments. |
|
| 151 |
+\item |
|
| 152 |
+\Rfunction{getBatchReadCountPerRestrictionFragment} : Similar to \Rfunction{getReadCountPerRestrictionFragment}
|
|
| 153 |
+using it for replicates |
|
| 154 |
+\item |
|
| 155 |
+\Rfunction{getReadCountPerWindow} : a function to count the number of reads per defined non-overalapping window size.
|
|
| 156 |
+A user has to specify the window size of interest. |
|
| 157 |
+The package will then automatically generate the genome-wide windows and counts how many 3C-seq reads |
|
| 158 |
+are mapped into that particular windows. |
|
| 159 |
+\item |
|
| 160 |
+\Rfunction{getBatchReadCountPerWindow} : similar to \Rfunction{getReadCountPerWindow} using for replicates
|
|
| 161 |
+\item |
|
| 162 |
+\Rfunction{calculateRPM} : a function to calcuate reads per million (RPM) per each restriction fragment
|
|
| 163 |
+\item |
|
| 164 |
+\Rfunction{calculateBatchRPM} : similar to \Rfunction{calculateRPM} using for replicates
|
|
| 165 |
+\item |
|
| 166 |
+\Rfunction{getInteractions} : a function to perform statistical analysis to identify candidate interactions
|
|
| 167 |
+\item |
|
| 168 |
+\Rfunction{getBatchInteractions} : similar to \Rfunction{getInteractions} using for replicates
|
|
| 169 |
+\item |
|
| 170 |
+\emph{Visualiztion} : the package contains functions for visualizing the interaction regions with
|
|
| 171 |
+the powerful plotting facilities. These functions are \Rfunction{plotOverviewInteractions},
|
|
| 172 |
+\Rfunction{plotInteractionsNearViewpoint},\Rfunction{plotInteractionsPerChromosome},
|
|
| 173 |
+and \Rfunction{plotDomainogramNearViewpoint}.
|
|
| 174 |
+\item |
|
| 175 |
+\emph{Data export} : the package contains functions to export the data into
|
|
| 176 |
+tab-delimited text format, which can be easily uploaded to the UCSC |
|
| 177 |
+genome browser for further visualization and exploration. Currently it supports |
|
| 178 |
+the bedGraph format. These functions are \Rfunction{export3CseqRawReads2bedGraph},
|
|
| 179 |
+\Rfunction{export3Cseq2bedGraph}, \Rfunction{exportInteractions2text},
|
|
| 180 |
+\Rfunction{exportBatchInteractions2text}. The package can also generate a summary report in PDF format by
|
|
| 181 |
+\Rfunction{generate3CseqReport} function.
|
|
| 182 |
+\end{enumerate}
|
|
| 183 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 184 |
+\section{Preparation input files for r3Cseq}
|
|
| 185 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 186 |
+The required input file for \Rpackage{r3Cseq} package is the BAM file, obtained as an output from
|
|
| 187 |
+the mapping software. The represented identifier for a reference genome shown in each input BAM file is important |
|
| 188 |
+to run \Rpackage{r3Cseq} properly. The represented identifier for each chromosome must be
|
|
| 189 |
+in "chr[1..19XYM]" format for the mouse reference genome and "chr[1..22XYM]" format for the |
|
| 190 |
+human reference genome. Therefore, before using \Rpackage{r3Cseq} package, a user has to check the identifier for the reference
|
|
| 191 |
+genome. If the identifier for each chromosome found in the mapped file |
|
| 192 |
+is not in a proper format for example 'mm9\_ref\_chr01.fa', the Unix command like 'sed' might |
|
| 193 |
+be used to replace 'mm9\_ref\_chr01.fa' to 'chr1'. |
|
| 194 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 195 |
+\section{Getting started}
|
|
| 196 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 197 |
+The 3C-seq data generated by \cite{Ralph:2011} will be used for the demonstration.
|
|
| 198 |
+The current version of r3Cseq supports mouse and human genome. |
|
| 199 |
+Therefore, the package requires one of the followings \Rpackage{BSgenome}
|
|
| 200 |
+packages to be installed;\Rpackage{BSgenome.Mmusculsu.UCSC.mm9},
|
|
| 201 |
+\Rpackage{BSgenome.Hsapiens.UCSC.hg18}, and \Rpackage{BSgenome.Hsapiens.UCSC.hg19}.
|
|
| 202 |
+ |
|
| 203 |
+Loading the \Rpackage{r3Cseq} package into R.
|
|
| 204 |
+<<>>= |
|
| 205 |
+library(r3Cseq) |
|
| 206 |
+@ |
|
| 207 |
+There are 2 data sets found in the package. |
|
| 208 |
+<<>>= |
|
| 209 |
+data(Myb_prom_FL) |
|
| 210 |
+data(Myb_prom_FB) |
|
| 211 |
+@ |
|
| 212 |
+\begin{enumerate}
|
|
| 213 |
+\item |
|
| 214 |
+\Rdata{Myb\_prom\_FL}, the 3C-seq data contains the aligned reads of
|
|
| 215 |
+the Myb promoter interactions signal in fetal liver. |
|
| 216 |
+It was stored in the \Robject{GRanges} object processed by the \Rpackage{Rsamtools} package.
|
|
| 217 |
+ |
|
| 218 |
+\item |
|
| 219 |
+\Rdata{Myb\_prom\_FB}, the 3C-seq data contains the aligned reads of Myb promoter interactions signal in fetal brain.
|
|
| 220 |
+\end{enumerate}
|
|
| 221 |
+We will perform \Rpackage{r3Cseq} to discover interaction regions,
|
|
| 222 |
+which possibly interact with the promoter region of Myb gene in both fetal liver and brain \cite{Ralph:2011}.
|
|
| 223 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 224 |
+\subsection{r3Cseq object initialization}
|
|
| 225 |
+In this section, we will analyze 3C-seq data, which were derived from fetal liver (expressing high levels of Myb) |
|
| 226 |
+and fetal brain (expression low level of Myb). The latter will be used as a negative control. |
|
| 227 |
+More examples of r3Cseq data analysis can be found at the official r3Cseq |
|
| 228 |
+website \url{http://r3cseq.genereg.net}. We firstly initialized the r3Cseq object.
|
|
| 229 |
+<<>>= |
|
| 230 |
+my3Cseq.obj<-new("r3Cseq",organismName='mm9',isControlInvolved=TRUE,
|
|
| 231 |
+viewpoint_chromosome='chr10',viewpoint_primer_forward='TCTTTGTTTGATGGCATCTGTT', |
|
| 232 |
+viewpoint_primer_reverse='AAAGGGGAGGAGAAGGAGGT',expLabel="Myb_prom_FL", |
|
| 233 |
+contrLabel="MYb_prom_FB",restrictionEnzyme='HindIII') |
|
| 234 |
+@ |
|
| 235 |
+Definition of input parameters is described in the \Robject{r3Cseq} help page.
|
|
| 236 |
+We next add raw reads from Myb\_prom\_FL and Myb\_prom\_FB to the existing \Robject{my3Cseq.obj}.
|
|
| 237 |
+<<>>= |
|
| 238 |
+expRawData(my3Cseq.obj)<-exp.GRanges |
|
| 239 |
+contrRawData(my3Cseq.obj)<-contr.GRanges |
|
| 240 |
+@ |
|
| 241 |
+Type \Robject{my3Cseq.obj} to see the r3Cseq object:
|
|
| 242 |
+<<>>= |
|
| 243 |
+my3Cseq.obj |
|
| 244 |
+@ |
|
| 245 |
+\subsection{Getting reads per restriction fragments/user defined window size}
|
|
| 246 |
+To get number of reads per restriction fragement, function \Rfunction{getReadCountPerRestrictionFragment}
|
|
| 247 |
+will be performed. |
|
| 248 |
+<<>>= |
|
| 249 |
+getReadCountPerRestrictionFragment(my3Cseq.obj) |
|
| 250 |
+@ |
|
| 251 |
+The package provides the function \Rfunction{getReadCountPerWindow} to count number of reads
|
|
| 252 |
+per non-overlapping window size defined by a user. |
|
| 253 |
+ |
|
| 254 |
+\subsection{Normalization}
|
|
| 255 |
+ |
|
| 256 |
+We next perform normalization. |
|
| 257 |
+<<>>= |
|
| 258 |
+calculateRPM(my3Cseq.obj) |
|
| 259 |
+@ |
|
| 260 |
+\subsection{Getting interaction regions}
|
|
| 261 |
+After normalization, the \Rfunction{getInteractions} function will be performed.
|
|
| 262 |
+<<>>= |
|
| 263 |
+getInteractions(my3Cseq.obj,fdr=0.05) |
|
| 264 |
+@ |
|
| 265 |
+In order to see the result of interaction regions, Two functions \Rfunction{expInteractionRegions} and \Rfunction{contrInteractionRegions}
|
|
| 266 |
+need to be used to access the slot of r3Cseq object. |
|
| 267 |
+To get the result of interaction regions for the experiment, \Rfunction{expInteractionRegions} will be performed.
|
|
| 268 |
+<<>>= |
|
| 269 |
+fetal.liver.interactions<-expInteractionRegions(my3Cseq.obj) |
|
| 270 |
+fetal.liver.interactions |
|
| 271 |
+@ |
|
| 272 |
+To get the result of interaction regions for the control, \Rfunction{contrInteractionRegions} will be performed.
|
|
| 273 |
+<<>>= |
|
| 274 |
+fetal.brain.interactions<-contrInteractionRegions(my3Cseq.obj) |
|
| 275 |
+fetal.brain.interactions |
|
| 276 |
+@ |
|
| 277 |
+\subsection{Getting the viewpoint information}
|
|
| 278 |
+To see the viewpoint information, \Rfunction{getViewpoint} function can be used.
|
|
| 279 |
+\Rfunction{getViewpoint} will return the RangedData object of the viewpoint information.
|
|
| 280 |
+<<>>= |
|
| 281 |
+viewpoint<-getViewpoint(my3Cseq.obj) |
|
| 282 |
+viewpoint |
|
| 283 |
+@ |
|
| 284 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 285 |
+\section{Visualization of 3C-seq data}
|
|
| 286 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 287 |
+\Rpackage{r3Cseq} package provides visualization functions.
|
|
| 288 |
+These functions are \Rfunction{plotOverviewInteractions}, \Rfunction{plotInteractionsNearViewpoint},
|
|
| 289 |
+\Rfunction{plotInteractionsPerChromosome}, and \Rfunction{PlotDomainogramNearViewpoint}.
|
|
| 290 |
+ |
|
| 291 |
+\subsection{The overview plot of interactions}
|
|
| 292 |
+\Rfunction{plotOverviewInteractions} function shows the overview of interaction regions distributed across genome.
|
|
| 293 |
+ |
|
| 294 |
+<<plotOverviewInteractions,fig=TRUE,height=8,width=12,eps=FALSE,include=FALSE>>= |
|
| 295 |
+plotOverviewInteractions(my3Cseq.obj) |
|
| 296 |
+@ |
|
| 297 |
+\begin{figure}
|
|
| 298 |
+\centering |
|
| 299 |
+\includegraphics{r3Cseq-plotOverviewInteractions}
|
|
| 300 |
+\caption{Distribution of interaction regions across genome}
|
|
| 301 |
+\end{figure}
|
|
| 302 |
+ |
|
| 303 |
+\subsection{Plot of interactions in cis}
|
|
| 304 |
+\Rfunction{plotInteractionsNearViewpoint} function shows the zoom in of interaction regions located close to the viewpoint.
|
|
| 305 |
+<<plotInteractionsNearViewpoint,fig=TRUE,height=8,width=12,eps=FALSE,include=FALSE>>= |
|
| 306 |
+plotInteractionsNearViewpoint(my3Cseq.obj) |
|
| 307 |
+@ |
|
| 308 |
+\begin{figure}
|
|
| 309 |
+\centering |
|
| 310 |
+\includegraphics{r3Cseq-plotInteractionsNearViewpoint}
|
|
| 311 |
+\caption{Zoom in interaction regions near the viewpoint}
|
|
| 312 |
+\end{figure}
|
|
| 313 |
+ |
|
| 314 |
+\subsection{Plot of interactions in each selected chromosome}
|
|
| 315 |
+\Rfunction{plotInteractionsPerChromosome} function shows the interaction regions found in the chromosome10.
|
|
| 316 |
+ |
|
| 317 |
+<<plotInteractionsPerChromosome,fig=TRUE,height=8,width=12,eps=FALSE,include=FALSE>>= |
|
| 318 |
+plotInteractionsPerChromosome(my3Cseq.obj,"chr10") |
|
| 319 |
+@ |
|
| 320 |
+ |
|
| 321 |
+\begin{figure}
|
|
| 322 |
+\centering |
|
| 323 |
+\includegraphics{r3Cseq-plotInteractionsPerChromosome}
|
|
| 324 |
+\caption{Distribution of interaction regions across chromosome 10}
|
|
| 325 |
+\end{figure}
|
|
| 326 |
+ |
|
| 327 |
+\subsection{Domainogram of interactions}
|
|
| 328 |
+\Rfunction{plotDomainogramNearViewpoint} function shows the domainogram of interactions found in cis.
|
|
| 329 |
+This function may takes several minutes to produce domainograms. We therefore, skip this command to produce plots for the vignette. |
|
| 330 |
+You can see the example of the plots and find more details at \url{http://r3Cseq.genereg.net}.
|
|
| 331 |
+<<>>= |
|
| 332 |
+#plotDomainogramNearViewpoint(my3Cseq.obj) |
|
| 333 |
+@ |
|
| 334 |
+\subsection{Associate interaction signals to the Refseq genes}
|
|
| 335 |
+\Rfunction{getExpInteractionsInRefseq} and \Rfunction{getContrInteractionsInRefseq} functions can be used to detect the list of
|
|
| 336 |
+genes that contain significant interaction signals in their proximity. |
|
| 337 |
+<<>>= |
|
| 338 |
+detected_genes<-getExpInteractionsInRefseq(my3Cseq.obj) |
|
| 339 |
+head(detected_genes) |
|
| 340 |
+@ |
|
| 341 |
+\subsection{Export interactions to the bedGraph format}
|
|
| 342 |
+\Rfunction{export3Cseq2bedGraph} function exports all interactions from the
|
|
| 343 |
+\Robject{RangedData} to the bedGraph format, which simply upload to the UCSC
|
|
| 344 |
+genome browser. |
|
| 345 |
+<<>>= |
|
| 346 |
+export3Cseq2bedGraph(my3Cseq.obj) |
|
| 347 |
+@ |
|
| 348 |
+\subsection{Summary report}
|
|
| 349 |
+\Rfunction{generate3CseqReport} function generates the summary report
|
|
| 350 |
+from \Rpackage{r3Cseq} analysis results. The report contains
|
|
| 351 |
+a pdf file for all plots and text files of interaction regions. |
|
| 352 |
+This function may takes several minutes to produce the report. |
|
| 353 |
+We therefore, skip this command during the vignette creation. |
|
| 354 |
+<<>>= |
|
| 355 |
+#generate3CseqReport(my3Cseq.obj) |
|
| 356 |
+@ |
|
| 357 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 358 |
+\section{Working with replicates}
|
|
| 359 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 360 |
+The example of how to work with replicats can be found at |
|
| 361 |
+\url{http://r3cseq.genereg.net/}.
|
|
| 362 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 363 |
+\section{r3Cseq website}
|
|
| 364 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 365 |
+We have developed the website \url{http://r3cseq.genereg.net}.
|
|
| 366 |
+The website provides more details of \Rpackage{r3Cseq} analysis pipeline.
|
|
| 367 |
+The example data sets and the current version of \Rpackage{r3Cseq}
|
|
| 368 |
+package can be downloaded from the website. |
|
| 369 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 370 |
+\section{Session Info}
|
|
| 371 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 372 |
+<<>>= |
|
| 373 |
+sessionInfo() |
|
| 374 |
+@ |
|
| 375 |
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
| 376 |
+%\newpage |
|
| 377 |
+\bibliographystyle{apalike}
|
|
| 378 |
+\bibliography{r3Cseq}
|
|
| 379 |
+\end{document}
|