import React from 'react';

function AboutSourcesVda() {
  return (
    <div id="section12">
      <h3>Variant-Disease Associations</h3>
      <p>
        The variant-disease information in DISGENET is organized according to two types of source databases:
      </p>
      <ul>
        <li className="bullet-item">
          CURATED: VDAs from
          <a className="tt" target="blank" href="https://www.uniprot.org/">
            {' '}
            UniProt/SwissProt
          </a>
          ,
          <a className="tt" target="blank" href="https://www.ebi.ac.uk/gwas/">
            {' '}
            ClinVar
          </a>
          ,
          <a className="tt" target="blank" href="https://phewascatalog.org/">
            {' '}
            Phewas Catalog
          </a>
          , and the
          <a className="tt" target="blank" href="https://www.ebi.ac.uk/gwas/">
            {' '}
            NHGRI-EBI GWAS Catalog
          </a>
          .
        </li>
        <li className="bullet-item">
          BIOBANK: VDAs from GWAS performed on biobanks. We include GWAS studies performed on{' '}
          <a className="tt" target="blank" href="https://www.finngen.fi/en">
            {' '}
            Finngen
          </a>{' '}
          and{' '}
          <a className="tt" target="blank" href="https://pheweb.org/UKB-TOPMed/">
            {' '}
            UK Biobank
          </a>
          .
          <ul>
            <li className="bullet-item">
              <a className="tt" target="blank" href="https://finngen.gitbook.io/documentation">
                {' '}
                FinnGen GWAS
              </a>
              : We use{' '}
              <a
                className="tt"
                target="blank"
                href="https://www.finngen.fi/en/results-based-full-finngen-cohort-500000-participants-released"
              >
                {' '}
                Release 12 (November, 2024)
              </a>{' '}
              GWAS summary statistics. R12 consists of:
              <ul>
                <li className="bullet-item">
                  A total of 500,348 individuals, including 282,064 females and 218,284 males
                </li>
                <li className="bullet-item">2,502 health endpoints</li>
                <li className="bullet-item">{'>'} 21 M variants </li>
              </ul>
              Phenotype data in FinnGen comes from national health registers covering the entire lifespan of
              the study subjects.{' '}
              <a
                className="tt"
                target="blank"
                href="https://www.finngen.fi/en/researchers/clinical-endpoints"
              >
                {' '}
                Clinical disease endpoints
              </a>{' '}
              are obtained by combining data from different registries, such as ICD-10 codes, drug
              prescription data and causes of death. These endpoints were mapped to UMLS CUIs. <br />
              We selected a p-value threshold of 10-6. The final FinnGen GWAS dataset integrated into DISGENET
              comprises 2.7M VDA between 2,026 clinical endpoints and 636,644 SNPs.
            </li>
          </ul>
          <li className="bullet-item">
            <a className="tt" target="blank" href="UK Biobank GWAS/PheWAS">
              {' '}
              UK Biobank GWAS/PheWAS
            </a>{' '}
            : The UK Biobank GWAS/PheWAS dataset includes genome-wide associations for EHR-derived ICD billing
            codes from the White British participants of the UK Biobank. Phenotypes were classified into 1,419
            broad PheWAS codes with counts ranging from 51 – 78,000 cases and 167,000 – 407,000 controls. All
            individuals were imputed using TOPMed, resulting in ~57 million variants after filtering for
            MAF≥0.005%. Analyses on binary outcomes were conducted using SAIGE, adjusting for genetic
            relatedness, sex, birth year and the first 4 principal components. Phenotypes are represented
            using Phecodes, and were mapped to UMLS CUIs.{' '}
          </li>
        </li>
        We selected a p-value threshold of 10<sup>-6</sup>. The final UK Biobank GWAS dataset integrated into
        DISGENET comprises 545,400 VDA between 1,330 clinical endpoints and 213,328 SNPs.
        <li className="bullet-item">
          TEXT MINING HUMAN: VDAs identified by text mining from the scientific literature.
        </li>
        <li className="bullet-item">
          ALL: VDAs from previous sources and VDAs obtained via text mining approaches.
        </li>
      </ul>
    </div>
  );
}

export default AboutSourcesVda;
