import React, { useEffect } from 'react'
import { connect } from 'react-redux';
// import PropTypes from 'prop-types';
// import { withStyles } from '@material-ui/core/styles';
import { makeStyles } from '@material-ui/core/styles';
import Typography from '@material-ui/core/Typography';
// import MUIDataTable from "mui-datatables";
import Grid from '@material-ui/core/Grid';
// import { BlockMath } from 'react-katex';

import TableContainer from '@material-ui/core/TableContainer';

// import Divider from '@material-ui/core/Divider';
import Table from '@material-ui/core/Table';
import TableBody from '@material-ui/core/TableBody';
import TableCell from '@material-ui/core/TableCell';
import TableHead from '@material-ui/core/TableHead';
import TableRow from '@material-ui/core/TableRow';

// import TableNewSuper from '../components/TableNewSuper';
// import ContentsProtocol from '../components/ContentsProtocol'
import Accordion from '@material-ui/core/Accordion';
import AccordionSummary from '@material-ui/core/AccordionSummary';
import AccordionDetails from '@material-ui/core/AccordionDetails';
import ExpandMoreIcon from '@material-ui/icons/ExpandMore';

import { handleHeader } from '../actions/generalActions';
import protocolBasic from './svg/protocolBasic.svg'
import n_norm2 from './svg/n-norm.svg'
import n_obs from './svg/n-obs.svg'
import wins from './svg/wins.svg'
import mus_wins from './svg/mus_wins.svg'

const useStyles = makeStyles((theme) => ({
  root: {
    ...theme.mixins.gutters(),
    // paddingTop: theme.spacing(3),
    paddingBottom: theme.spacing(2),
    flexGrow: 1,
    marginTop: theme.spacing(7),
  },
  table: {
    minWidth: 650,
    marginTop: theme.spacing(1),
    marginBottom: theme.spacing(3),
  },
  divider: {
    marginTop: 20,
    marginBottom: 20,
    width: '100%',
    height: 2,
    backgroundColor: 'black',
  },
}));

function createData(annotation, from, how, number, description) {
    return {annotation, from, how, number, description};
  }
  
  const rows = [
    createData(
        <b>gencode</b>, 
        <a href="http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_35/" target="_blank" rel="noopener noreferrer">GENCODE v35</a>, 
        'NaN', 
        '60619', 
        'The file "gencode.v35.basic.annotation.gtf" has been filtered as follows: third column = "gene".'),
    createData(
        <b>vlinc</b>, 
        <a href="https://www.ncbi.nlm.nih.gov/pubmed/23876380" target="_blank" rel="noopener noreferrer">PMID: 23876380</a>, 
        'NaN', 
        '2762', 
        'We transferred the coordinates of the genes from hg19 to hg38 using the liftOver tool (UCSC Genome Browser, with standard parameters).'),
    createData(
        <b>GB_snomirna</b>, 
        "UCSC Genome Browser", 
        <a href="https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1408074705_ozZlcwBmYr0T4YgKz1U6d8ihBvY7&clade=mammal&org=Human&db=hg38&hgta_group=allTables&hgta_track=hg38&hgta_table=wgRna&hgta_regionType=genome&position=chrX%3A1-15%2C527%2C057&hgta_outputType=wigData&hgta_outFileName=" target="_blank" rel="noopener noreferrer">wgRna table</a>, 
        '2320', 
        'NaN'),
    createData(
        <b>GB_trna</b>, 
        "UCSC Genome Browser", 
        <a href="https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1408074705_ozZlcwBmYr0T4YgKz1U6d8ihBvY7&clade=mammal&org=Human&db=hg38&hgta_group=allTables&hgta_track=hg38&hgta_table=tRNAs&hgta_regionType=genome&position=chrX%3A1-15%2C527%2C057&hgta_outputType=primaryTable&hgta_outFileName=" target="_blank" rel="noopener noreferrer">tRNAs table</a>, 
        '629', 
        'NaN'),
    createData(
        <b>GB_repM</b>, 
        "UCSC Genome Browser", 
        <a href="https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1408074705_ozZlcwBmYr0T4YgKz1U6d8ihBvY7&clade=mammal&org=Human&db=hg38&hgta_group=allTables&hgta_track=hg38&hgta_table=rmsk&hgta_regionType=genome&position=chrX%3A1-15%2C527%2C057&hgta_outputType=primaryTable&hgta_outFileName=" target="_blank" rel="noopener noreferrer">rmsk table</a>, 
        '11408', 
        'Filtering by the repClass column, selected values: snRNA, rRNA, scRNA, tRNA, RNA, srpRNA.'),
    createData(
        <b>from_article</b>, 
        'PMIDs: 26307088, 30628890, 31819156', 
        'NaN',
        '3', 
        '"DACOR1", "lnc-NR2F1_short", "Linc-ASEN" were not found in any of the above gene annotations, but there are "one-to-all" data for these RNAs.'),
    createData(
        <b>Xrna_human</b>, 
        "PMIDs: GRID (28922346), iMARGI (30718424), Red-C (32479626), iMARGI_HUVEC (33060583)", 
        'NaN',
        '155127', 
        'De novo assembled transcripts for those RNA-parts that were not annotated by any gene from the general gene annotation.'),
    createData(
        <b>RNA-Chrom DB</b>, 
        "PMIDs: 24360279, 26878240, 33852864, 28132025", 
        'NaN',
        '2', 
        'Some experiments have studied multicopy RNAs true parental genes of which we cannot determine. In such cases, we have introduced a group name for the corresponding multicopy RNAs: "7SK RNAs" and "ERV-9 RNAs".'),
  ];

  const rows_2 = [
    createData(
        <b>gencode</b>, 
        <a href="http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/" target="_blank" rel="noopener noreferrer">GENCODE M25</a>, 
        'NaN', 
        '55364', 
        'The file "gencode.vM25.basic.annotation.gtf" has been filtered as follows: third column = "gene".'),
    createData(
        <b>GB_trna</b>, 
        "UCSC Genome Browser", 
        <a href="https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1408074705_ozZlcwBmYr0T4YgKz1U6d8ihBvY7&clade=mammal&org=Mouse&db=mm10&hgta_group=allTables&hgta_track=mm10&hgta_table=tRNAs&hgta_regionType=genome&position=chr12%3A56%2C694%2C976-56%2C714%2C605&hgta_outputType=primaryTable&hgta_outFileName=" target="_blank" rel="noopener noreferrer">tRNAs table</a>, 
        '434', 
        'NaN'),
    createData(
        <b>GB_repM</b>, 
        "UCSC Genome Browser", 
        <a href="https://genome.ucsc.edu/cgi-bin/hgTables?hgsid=1408074705_ozZlcwBmYr0T4YgKz1U6d8ihBvY7&clade=mammal&org=Mouse&db=0&hgta_group=allTables&hgta_track=hg38&hgta_table=rmsk&hgta_regionType=genome&position=&hgta_outputType=primaryTable&hgta_outFileName=" target="_blank" rel="noopener noreferrer">rmsk table</a>, 
        '18770', 
        'Filtering by the repClass column, selected values: snRNA, rRNA, scRNA, tRNA, RNA, srpRNA.'),
    createData(
        <b>from_article</b>, 
        'PMIDs:  29241531, 25772072, 33293420, 33782403', 
        'NaN',
        '4', 
        '"Panct1", "LncHSC-2", "LncMyoD", "HOXBLINC" were not found in any of the above gene annotations, but there are "one-to-all" data for these RNAs.'),
    createData(
        <b>Xrna_mouse</b>, 
        "PMIDs: GRID (28922346), RADICL (32094342)", 
        'NaN',
        '14333', 
        'De novo assembled transcripts for those RNA-parts that were not annotated by any gene from the general gene annotation.'),
    createData(
        <b>RNA-Chrom DB</b>, 
        "PMIDs: 25259926, 28666128, 28692038, 31426913, 33658714, 32160538, 27984727, 28820723, 26878240, 23771028, 33505026", 
        'NaN',
        '9', 
        'Some experiments have studied multicopy RNAs true parental genes of which we cannot determine. In such cases, we have introduced a group name for the corresponding multicopy RNAs: "U1 RNAs", "TERRA RNAs", "PAR-TERRA RNAs", "LINE1 RNAs", "IAP RNAs", "B2 RNAs", "7SK RNAs", "116HG RNAs", "IAPEz-int RNAs".'),
  ]                                                                                      




const BasicProtocol = (props) => {
  const classes = useStyles();
  document.title = "Data processing";

  useEffect(() => {
    props.handleHeader(': data processing')
  }, [])
  
  return (
    <div className={classes.root}>
        <TableContainer style={{maxHeight: window.innerHeight*0.9}}>
            <Typography id="Data-processing" variant="h5" style={{fontWeight: 'bold', textAlign: "center", marginBottom: 15, marginTop: 10}}>
                In this work, we preprocessed raw reads using a universal data processing protocol:
            </Typography> 
      {/* In this work, we used only raw reads provided by the authors of publications. */}
            <div style={{marginBottom: 10, textAlign: 'center', marginTop: 40}}>
                <img src={protocolBasic} alt="protocolBasic" width='50%'/>
            </div>
            <Typography variant={"subtitle1"} style={{textAlign: 'justify', marginBottom: 15, marginTop: 5}}>
                To find out summary statistics of the data processing protocol on a particular experiment, click on <b>"Exp.ID"</b> and go to <b>"Metadata"</b> page.
            </Typography>
            {/* <Grid container spasing={3}>
                <Grid item xs={6} style={{marginTop: 40}}>
                    <Typography variant="h5" style={{fontWeight: 'bold', textAlign: "center"}}>
                        All-to-all
                    </Typography> 
                </Grid>
                <Grid item xs={6} style={{marginTop: 40}}>
                    <Typography variant="h5" style={{fontWeight: 'bold', textAlign: "center"}}>
                        All-to-all
                    </Typography> 
                </Grid>
            </Grid> */}
            <Accordion style={{marginTop: 10}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Raw Data</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>                                           
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        <ul>
                            <li><b>RNA-DNA interaction data</b> was downloaded primarily from the <a href="https://www.ncbi.nlm.nih.gov/geo/" target="_blank" rel="noopener noreferrer">Gene Expression Omnibus</a>. The <span style={{color: 'red',fontWeight: 'bold'}}>RNA-Chrom
                            </span> database <a href="/experiments" target="_blank" rel="noopener noreferrer">contains</a> data for more than 50 human and mouse RNAs in case of "one-to-all" methods and for thousands of RNAs in case of "all-to-all" methods.</li>
                            {/* <li>Up to the stage <b>"Annotation of RNA-parts of contacts"</b>, all replicas were processed <b>independently</b>.</li> */}
                            <li>Further, all replicas were processed independently starting from <b>"Remove duplicates"</b> step and up to <b>"BlackList"</b> step.</li>
                        </ul>
                    </Typography>      
                </AccordionDetails>
            </Accordion>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Remove duplicates</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>  
                    <Grid container spasing={3}>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    All-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>Possible PCR duplicates of RNA-DNA pairs of reads were removed via <a href="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0052249"target="_blank" rel="noopener noreferrer"><b>Fastuniq</b></a> tool.</li>
                                        <li>For iMARGI data sets we followed the original paper authors’ recommendations to perform this step after <b>"Add restriction site"</b> step.</li>
                                    </ul>
                                </Typography>   
                            </div>        
                        </Grid>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    One-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>Possible PCR duplicates were removed via <a href="https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0052249"target="_blank" rel="noopener noreferrer"><b>Fastuniq</b></a> and <a href="https://doi.org/10.1371/journal.pone.0163962" target="_blank" rel="noopener noreferrer"><b>SeqKit rmdup</b></a> for paired-end and single-end read data respectively.</li>
                                    </ul>
                                </Typography> 
                            </div>                                 
                        </Grid>
                    </Grid>                                              
                </AccordionDetails>
            </Accordion>
            <Grid container spasing={3}>
                <Grid item xs={6}>
                    <Accordion style={{marginTop: 5}}>
                        <AccordionSummary
                            expandIcon={<ExpandMoreIcon />}
                            aria-controls="panel1a-content"
                            id="panel1a-header"
                            style={{backgroundColor: '#b8b9ba'}}
                        >
                            <Typography variant={"subtitle3"}><b>Add restriction site</b></Typography>
                        </AccordionSummary>
                        <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>   
                            <div>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    Only for all-to-all data
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li><b>iMARGI</b>: following protocol described in the original <a href="https://pubmed.ncbi.nlm.nih.gov/30718424/" target="_blank" rel="noopener noreferrer">paper</a>, read pairs were filtered out if the 5′-most two bases of their DNA end (Read 2) were not CT. In addition, the first two bases of the RNA end (Read 1) were removed, as they are random nucleotides.</li>
                                        <li><b>iMARGI HUVEC</b>: same as first iMARGI.</li>
                                        <li><b>GRID</b>: "AG" halves of the restriction site on 3’ end of DNA reads were restored to "AGCT". Sequencing qualities for added "CT" were copied from "AG". Reads without "AG" on 3’ end were removed.</li>
                                        <li><b>RADICL</b>: no additional steps in this category.</li>
                                        <li><b>Red-C</b>: complex procedure described in the original Red-С <a href="https://pubmed.ncbi.nlm.nih.gov/32479626/" target="_blank" rel="noopener noreferrer">paper</a>.</li>
                                    </ul>
                                </Typography>      
                            </div>
                        </AccordionDetails>
                    </Accordion>
                </Grid>
                <Grid item xs={6}>
                    {null}
                </Grid>
            </Grid>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Trimmomatic</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>                                           
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        <ul>
                            <li>For all data, we used <b>TRIMMOMATIC</b> with parameters <b>"window size" = 5, "quality threshold" = 26, "minlen" = 14</b> (SLIDINGWINDOW:5:26 MINLEN:14) for the detection of low-quality position in each forward and reverse read.</li>
                            <li>Few low-quality cases with more than 50% of data discarded after trimming were re-trimmed with more permissive parameters: SLIDINGWINDOW:5:22 LEADING:22 MINLEN:14</li>
                            <li>We ran <b>fastQC</b> twice for sequencing quality control: immediately after downloading the raw data and after trimming it.</li>
                        </ul>
                    </Typography>      
                </AccordionDetails>
            </Accordion>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Mapping</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>  
                    <Grid container spasing={3}>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    All-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>DNA-parts and RNA-parts of contacts were independently mapped to the genome (hg38 and mm10) with <a href="https://pubmed.ncbi.nlm.nih.gov/31375807/" target="_blank" rel="noopener noreferrer"><b>HISAT2</b></a> program (version 2.1.0) (parameters for DNA-parts: <b>-k 100 --no-spliced-alignment --no-softclip</b>, for RNA-parts: <b>-k 100 --no-softclip --dta-cufflinks --known-splicesite-infile</b>). 
                                        Splice site annotations for the respective genomes were obtained using the "hisat2_extract_splice_sites.py" script [<a href="https://pubmed.ncbi.nlm.nih.gov/31375807/" target="_blank" rel="noopener noreferrer">PMID: 31375807</a>].
                                        </li>
                                        <li>SAM files were filtered for unique mappings with at most 2 mismatches relative to the reference genome.</li>
                                        <li>
                                            <div>
                                                <Accordion>
                                                    <AccordionSummary
                                                        expandIcon={<ExpandMoreIcon />}
                                                        aria-controls="panel1a-content"
                                                        id="panel1a-header"
                                                        style={{backgroundColor: '#b8b9ba'}}
                                                    >
                                                        <Typography variant={"subtitle1"} style={{fontWeight: 'bold'}}>Determination of orientation of the RNA-parts of the reads</Typography>
                                                    </AccordionSummary>
                                                    <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>
                                                        <Typography variant="subtitle1">
                                                            &emsp;&emsp;During the data processing steps we noticed that RNA-parts of the contacts in a number of experiments could represent not those parts of genes sequences, from which the corresponding RNAs were transcribed, but reverse complements of those sequences. In other words, In some experiments, the “forward” strand of the cDNA read part could be sequenced, and in others — the “reverse” one.
                                                            <br/>&emsp;&emsp;In order to determine whether that hypothesis was true,  an experiment was carried out based on the following assumption: in any viable cell line, the genes of ribosomal proteins must be highly expressed, and it is likely that a significant part of the data we have is precisely the messenger RNAs of these proteins contacting with chromatin on their way to nuclear pores. For each data set, we can select the RNA-parts of the contacts that were aligned within the coordinates of the genes of ribosomal proteins on both chains, and then calculate the fractions of reads aligned on the gene chain and the chain complementary to the gene (Figure 1).
                                                            <br/>&emsp;&emsp;If more reads were mapped to the gene chain than to its complementary one, then during sequencing, the “correct” cDNA chain was read corresponding to the sequence of RNA in contact with chromatin, and vice versa. <b>The RNA-parts from experiments that have “wrong” cDNA chain sequences needed to be reversed before future analysis, although it was not obviously stated in any of the original papers.</b>
                                                            <br/>&emsp;&emsp;Notably, when looking at human data, the following strands were read: the “correct” cDNA strand (in the case of the <b>Red-C</b> experiment); the reverse strand (for <b>GRID-seq</b> and <b>iMARGI</b> experiments). Whereas in the case of <b>MARGI</b>, it seems that mostly a random strand was read, and the orientations of the RNA-parts of the contacts were lost (Figure 1). It can be seen that for some <b>MARGI</b> data sets (SRR5278097, SRR5278097, SRR5278100, SRR5278102) strands were uniquely determined. However, due to the low gene coverage of ribosomal proteins in these data sets and to the loss of RNA-parts orientations in other <b>MARGI</b> data sets, we decided to exclude the <b>MARGI</b> experiment from any further analysis.
                                                            <br/>&emsp;&emsp;Looking at mouse data, we can see that the <b>mouse GRID-seq</b> behaves like a <b>human GRID-seq</b>, and <b>RADICL-seq</b> behaves like <b>Red-C</b> (Figure 2).
                                                            <br/>
                                                            <br/>
                                                            <img src={wins} alt="wins" style={{border: '1px solid #000000'}} width='100%'/>
                                                            <span style={{fontWeight: 'bold'}}>Figure 1.</span> Estimation of orientation of RNA-parts of the contacts for human data sets on a subsample of 53 ribosomal protein-coding genes (see article for details).
                                                            <br/>
                                                            <br/>
                                                            <img src={mus_wins} alt="mus_wins" style={{border: '1px solid #000000'}} width='100%'/>
                                                            <span style={{fontWeight: 'bold'}}>Figure 2.</span> Estimation of orientation of RNA-parts of the contacts for mouse data sets on a subsample of 51 ribosomal protein-coding genes (see article for details).
                                                        </Typography>
                                                    </AccordionDetails>
                                                </Accordion>
                                            </div>
                                        </li>
                                    </ul>
                                </Typography>   
                            </div>        
                        </Grid>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    One-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>Paired-end reads (DNA-parts of contacts) were aligned as pairs with <a href="https://pubmed.ncbi.nlm.nih.gov/31375807/" target="_blank" rel="noopener noreferrer"><b>HISAT2</b></a> program (version 2.1.0) (<b>-k 100 --no-spliced-alignment --no-softclip --no-discordant --no-mixed</b> flags to filter out improperly paired reads during the alignment step), and single-end reads were also mapped by the <b>hisat2</b> program (<b>-k 100 --no-spliced-alignment --no-softclip</b>).</li>
                                        <li>SAM files were filtered for unique mappings with at most 2 mismatches relative to the reference genome.</li>
                                    </ul>
                                </Typography> 
                            </div>                                 
                        </Grid>
                    </Grid>
                </AccordionDetails>
            </Accordion>
            <Grid container spasing={3}>
                <Grid item xs={6}>
                    <Accordion style={{marginTop: 5}}>
                        <AccordionSummary
                            expandIcon={<ExpandMoreIcon />}
                            aria-controls="panel1a-content"
                            id="panel1a-header"
                            style={{backgroundColor: '#b8b9ba'}}
                        >
                            <Typography variant={"subtitle3"}><b>CIGAR</b></Typography>
                        </AccordionSummary>
                        <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>     
                            <div>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    Only for all-to-all data
                                </Typography>                                      
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                &emsp;&emsp;To search for and process reads with splicing, corresponding to RNA-parts of the contacts, the mapping information presented in <b>the CIGAR field</b> was analysed.<br/>
                                &emsp;&emsp;Reads can be mapped in three ways:
                                    <ul>
                                        <li>with a complete match with the reference genome along the entire length of the read (CIGAR of the <b>"25M"</b> type, where M — match). <b>Such reads went on without changes</b>;</li>
                                        <li>containing one missing interval (CIGAR of the form <b>"30M65N10M"</b>, where M — match, N — skipped region). <b>For such reads, the longest section mapped without breaks was left</b>;</li>
                                        <li>more complex mapping options (reads with complex splicing): multiple missing intervals (CIGAR of the form: <b>"8M1113N56M79N8M"</b>), mapping with insertions or deletions. <b>All such reads were removed</b>.</li>
                                    </ul>
                                </Typography>      
                            </div>
                        </AccordionDetails>
                    </Accordion>
                </Grid>
                <Grid item xs={6}>
                    {null}
                </Grid>
            </Grid>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>BlackList</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>                                           
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        <ul>
                            <li>The authors of the RADICL-seq protocol [<a href="https://pubmed.ncbi.nlm.nih.gov/32094342/" target="_blank" rel="noopener noreferrer">PMID:  32094342</a>] propose to exclude from the analysis those contacts DNA-parts of which fall into regions from the <b>ENCODE BlackList</b> for hg38 and mm10 (accession: <a href="https://www.encodeproject.org/annotations/ENCSR636HFF/" target="_blank" rel="noopener noreferrer">ENCSR636HFF</a>); and so we did.</li>
                            <li><b>NB!</b> For one-to-all data, "input" was not filtered by the BlackList to avoid edge effects.</li>
                        </ul>
                    </Typography>      
                </AccordionDetails>
            </Accordion>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Annotation of RNA-parts of contacts</b>  (all replicas are already merged)</Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>  
                    <Grid container spasing={3}>
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        &emsp;&emsp;<b>The replicas have been merged</b> to increase the amount of data and coverage.
                        In the <span style={{color: 'red',fontWeight: 'bold'}}>RNA-Chrom</span> database we use a <b>general gene annotation collected from</b> various sources (for hg38 and mm10 genomes):
                        <ul>
                            <li><b>For human data sets</b> (only those genes that belong to canonical chromosomes):</li>
                            <TableContainer> {/* style={{width: `calc(100% - 200px)`}} */}
                                <Table className={classes.table} size="small" aria-label="a dense table">
                                    <TableHead>
                                        <TableRow>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Annotation</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Source</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Additional download tool</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Number of genes (RNAs) in the annotation</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Description</TableCell>
                                        </TableRow>
                                    </TableHead>
                                    <TableBody>
                                    {rows.map((row, index) => (
                                        <TableRow key={row.annotation + index}>
                                            <TableCell component="th" scope="row" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>
                                                {row.annotation}
                                            </TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.from}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.how}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.number}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.description}</TableCell>
                                        </TableRow>
                                    ))}
                                    </TableBody>
                                </Table>
                            </TableContainer>
                            <li><b>For murine data sets</b> (only those genes that belong to canonical chromosomes):</li>
                            <TableContainer> {/* style={{width: `calc(100% - 200px)`}} */}
                                <Table className={classes.table} size="small" aria-label="a dense table">
                                    <TableHead>
                                        <TableRow>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Annotation</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Source</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Additional download tool</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Number of genes (RNAs) in the annotation</TableCell>
                                            <TableCell align="center" style={{whiteSpace: 'wrap', padding: 3, border: '1px solid #000000', backgroundColor: '#009688', color: 'white'}}>Description</TableCell>
                                        </TableRow>
                                    </TableHead>
                                    <TableBody>
                                    {rows_2.map((row, index) => (
                                        <TableRow key={row.annotation + index}>
                                            <TableCell component="th" scope="row" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>
                                                {row.annotation}
                                            </TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.from}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.how}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.number}</TableCell>
                                            <TableCell align="left" style={{whiteSpace: 'wrap',padding: 3, backgroundColor: 'white', border: '1px solid #000000'}}>{row.description}</TableCell>
                                        </TableRow>
                                    ))}
                                    </TableBody>
                                </Table>
                            </TableContainer>
                        </ul>
                    </Typography>
                        <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                            &emsp;&emsp;For all annotations, the following columns are left: "chr", "start", "end", "strand", gene_type", "gene_name".<br/>
                            &emsp;&emsp;If the gene names repeated (for example, the "WASIR1" gene was found twice in the gencode annotation), 
                            then a serial number was assigned to them so that <b>all gene names in the database were unique</b> ("WASIR1_1" and 
                            "WASIR1_2", respectively). All of these genes (RNAs) with contact metrics can be found in the <b>"from RNA"</b> analysis 
                            by clicking on <b>"Browse all RNAs"</b>.
                        </Typography>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    All-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>In case the RNA-part of the contact intersects a gene by at least 1 nucleotide, this RNA-part is assigned to this gene. If the RNA-part of the contact intersects more than one gene at the same strand, this RNA-part is assigned to the gene showing the highest coverage by RNA-parts, which is determined as the total number of RNA-parts mapped to the gene, normalized to the gene length. Only contacts with RNA-parts that intersect genes (including X-RNAs) were used (<b>"After RNA-part annotation"</b> on the <b>Metadata page</b>). The others were named <b>"Singletons"</b> and were not used.</li>
                                    </ul>
                                </Typography>
                            </div>   
                        </Grid>      
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    One-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li>Since, unlike "all-to-all" data, "one-to-all" data does not have any information on the RNA-parts of contacts, 
                                            we assigned to each DNA-part from the experiment the coordinates of the corresponding RNA gene studied 
                                            in the corresponding experiment.</li>
                                    </ul>
                                </Typography>
                            </div>   
                        </Grid>      
                    </Grid>                                            
                </AccordionDetails>
            </Accordion>
            <Grid container spasing={3}>
                <Grid item xs={6}>
                    <Accordion style={{marginTop: 5}}>
                        <AccordionSummary
                            expandIcon={<ExpandMoreIcon />}
                            aria-controls="panel1a-content"
                            id="panel1a-header"
                            style={{backgroundColor: '#b8b9ba'}}
                        >
                            <Typography variant={"subtitle3"}><b>X-RNAs assembly</b></Typography>
                        </AccordionSummary>
                        <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>   
                            <div>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    Only for all-to-all data
                                </Typography>                                        
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    &emsp;&emsp;A substantial amount of RNA-parts was not annotated by any of the used gene annotations. 
                                    Some of these unknown parts may belong to unknown ncRNAs. 
                                    Transcripts not corresponding to any known gene (from GENCODE database: annotation version 35 for human and 25 for mice) were assembled using <a href="https://pubmed.ncbi.nlm.nih.gov/25690850/" target="_blank" rel="noopener noreferrer"><b>StringTie</b></a> and then filtered by several criteria, e.g. length, distance to the closest known gene on the same strand, conservation on different taxonomic levels and high coverage. 
                                    We called the group of transcripts that passed all the filters "X-RNAs". 
                                    To the X-RNAs the unique IDs were assigned based on their genome location.
                                    E.g. <b>X_1_13_a_hg38</b> is the X-RNA located on chromosome 1 of the human genome (version hg38) in the 13th bin (each chromosome was divided into bins of 10,000 bp). 
                                    And the letter "a" indicates that the source gene of this X-RNA is the first in the bin relative to the beginning of the corresponding chromosome.
                                </Typography>    
                            </div>  
                        </AccordionDetails>
                    </Accordion>
                </Grid>
                <Grid item xs={6}>
                    {null}
                </Grid>
            </Grid>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Contacts normalization</b> (all replicas are already merged)</Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}> 
                    <Grid container spasing={3}>
                        <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                            <br/>There are 4 types of normalization in our data base:
                            <ul>
                            <li><b>Normalized</b> — background-normalized contacts.</li>
                            <li><b>Raw</b> — not-normalized contacts.</li>
                            <li><b>Norm. & in peaks (one-to-all)</b> — background-normalized contacts crossing MACS2 peaks (only available for one-to-all data).</li>
                            <li><b>Raw & in peaks (one-to-all)</b> — not-normalized contacts crossing MACS2 peaks (only available for one-to-all data).</li>
                            </ul>
                        </Typography>
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    All-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li><b>n-reads (raw)</b> = 1, because the contact corresponds to 1 pair of RNA- and DNA-parts.</li>
                                        <li><b>n-reads (normalized)</b></li>
                                            <ul>
                                                <li><b>n-reads (background)</b>. 
                                                    To account for the level of background ligation in the experimental procedure, we estimated the total number of mRNAs trans-contacts with each genomic site (not with the parental chromosome). 
                                                    The genome was divided into 500 bp bins, and for each bin we summed up the number of trans-contacts made with this bin by protein-coding mRNAs (the 50 most contacting and 1000 least contacting mRNAs were removed for each data set). 
                                                    We smoothed the obtained signal using <a href="https://github.com/favorov/stereogene/tree/master/www/StereoGene_read_me.doc" target="_blank" rel="noopener noreferrer"><b>StereoGene (v.2.20)</b></a> (parameters: bin=500, wSize=1000000, flankSize=10000, kernelSigma=3000, kernelType=NORMAL) and used it as a background signal – <b>n-reads (background)</b>.</li>
                                                <li>Then we normalized each <b>n-reads (raw)</b> by the "value of the background signal" (<b>n-reads (background)</b>) 
                                                    in the genomic coordinate where the DNA-parts were mapped. 
                                                    To work with DNA-parts mapped to the regions with zero value of the background signal, 
                                                    we added the pseudocount to the <b>n-reads (background)</b>. 
                                                    And thus, we obtained <b>n-reads (normalized)</b>. 
                                                    This normalization ensures that the sum of the normalized values is equal to the number of reads in the experiment.</li>
                                            </ul>
                                        {/* <img src={n_obs} alt="n_obs" style={{marginTop: 15, textAlign: 'centr'}} width='50%'/> \textbf{n-reads (without norm.)} = 1 */}
                                        <div style={{textAlign: 'center', border: '1px solid #000000'}}>
                                            <img src={n_norm2} alt="n_norm" style={{marginTop: 15, marginBottom: 10}} width='100%'/> {/* \textbf{n-reads (normalized)}=\frac{\text{n-reads (raw)}}{\text{n-reads (background) + 0.5}}*\frac{\sum \text{n-reads (raw)}}{{\sum \frac{\text{n-reads (raw)}}{\text{n-reads (background) + 0.5}}}} */}
                                        </div>
                                    </ul>
                                </Typography>
                            </div> 
                        </Grid>      
                        <Grid item xs={6} style={{marginTop: 15}}>
                            <div style={{marginLeft: 1, width: '99%'}}>
                                <Typography variant={"h6"} style={{fontWeight: 'bold', textAlign: 'center'}}>
                                    One-to-all
                                </Typography>
                                <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                                    <ul>
                                        <li><b>n-reads (raw)</b> = 1, because the contact corresponds to 1 pair of RNA- and DNA-parts.</li>
                                        <li><b>n-reads (normalized)</b></li>
                                            <ul>
                                                <li><b>n-reads (background)</b>. 
                                                    The genome was divided into 500 bp bins, and for each bin we summed up the number of input-contacts whose centers fell into the corresponding bin (if there was no input library for a particular experiment, the background was made constant, that is, we assigned exactly one contact to each bin). 
                                                    We smoothed the obtained signal using <a href="https://github.com/favorov/stereogene/tree/master/www/StereoGene_read_me.doc" target="_blank" rel="noopener noreferrer"><b>StereoGene (v.2.20)</b></a> (parameters: bin=500, wSize=1000000, flankSize=10000, kernelSigma=3000, kernelType=NORMAL) and used it as a background signal – <b>n-reads (background)</b>.</li>
                                                <li>Then we normalized each <b>n-reads (raw)</b> by the "value of the background signal" (<b>n-reads (background)</b>) 
                                                    in the genomic coordinate where the DNA-parts were mapped. 
                                                    To work with DNA-parts mapped to the regions with zero value of the background signal, 
                                                    we added the pseudocount to the <b>n-reads (background)</b>. 
                                                    And thus, we obtained <b>n-reads (normalized)</b>. 
                                                    This normalization ensures that the sum of the normalized values is equal to the number of reads in the experiment.</li>
                                            </ul>
                                        {/* <img src={n_obs} alt="n_obs" style={{marginTop: 15, textAlign: 'centr'}} width='50%'/> \textbf{n-reads (without norm.)} = 1 */}
                                        <div style={{textAlign: 'center', border: '1px solid #000000'}}>
                                            <img src={n_norm2} alt="n_norm2" style={{marginTop: 15, marginBottom: 10}} width='100%'/> {/* \textbf{n-reads (normalized)}=\frac{\text{n-reads (raw)}}{\text{n-reads (background) + 0.5}}*\frac{\sum \text{n-reads (raw)}}{{\sum \frac{\text{n-reads (raw)}}{\text{n-reads (background) + 0.5}}}} */}
                                        </div>
                                        <li><b>n-reads (norm./raw & in peaks)</b></li>
                                            <ul>
                                                <li>In addition to the appropriate normalization, only those contacts are considered, the DNA-parts of which intersected the peaks by at least 1 bp.</li>
                                                <li><b>Peak calling with <a href="https://hbctraining.github.io/Intro-to-ChIPseq/lessons/05_peak_calling_macs.html" target="_blank" rel="noopener noreferrer">MACS2</a></b>. On the merged replicas, we launch MACS2 with the following parameters: -Q 0.05 -FORMAT BED(if Single-end) / BEDPE(if Paired-end)</li>
                                            </ul>
                                    </ul>
                                </Typography>      
                            </div>                                           
                        </Grid>  
                    </Grid>    
                </AccordionDetails>
            </Accordion>
            <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Annotation of DNA-parts of contacts</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>                                           
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        &emsp;&emsp;For each gene we calculated the following five intervals: <b>gene body</b>, <b>5 Kb upstream</b> and <b>5 Kb downstream</b> from the gene, <b>from 5 to 50 Kb upstream</b> and <b>from 5 
                        to 50 Kb downstream</b> from the gene according to the gene strand. 
                        Hence, the intervals don't intersect.
                        DNA-parts of the contacts were annotated by all the intervals. <b>The coordinates of the DNA-part and the corresponding intervals must intersect by at least 1 nucleotide.</b>
                    </Typography>     
                </AccordionDetails>
            </Accordion>
            {/* <Accordion style={{marginTop: 5}}>
                <AccordionSummary
                    expandIcon={<ExpandMoreIcon />}
                    aria-controls="panel1a-content"
                    id="panel1a-header"
                    style={{backgroundColor: '#b8b9ba'}}
                >
                    <Typography variant={"subtitle3"}><b>Database</b></Typography>
                </AccordionSummary>
                <AccordionDetails style={{backgroundColor: '#f5f6f7'}}>                                           
                    <Typography variant={"subtitle1"} style={{textAlign: 'justify'}}>
                        Processing a particular RNA (for example XIST) with chromatin data (data type "One vs all")
                        <ul>
                            <li>All steps before alignment are the same like in <b>Processing genome-wide RNA-chromatin interactome data</b></li>
                        </ul>
                    </Typography>      
                </AccordionDetails>
            </Accordion>                 */}
        </TableContainer>
    </div>
  );
};

const mapStateToProps = (state) => {
  return {

  }
}

export default connect(mapStateToProps, { handleHeader })(BasicProtocol);