export type DataSource = {
  title: string;
  short_title: string;
  description: JSX.Element;
};

export type DataSourceDictionary = Record<string | number, DataSource>;

export const dataSources: DataSourceDictionary = {
  sra: {
    title: 'Sequence Read Archive',
    short_title: 'SRA',
    description: <>
      SRA is the largest publicly available repository of high throughput raw sequencing data.
      The archive contains data from all branches of life as well as metagenomic and environmental surveys.
      </>
  },
  mgyp: {
    title: 'mgy by ebi.ac.uk (next)',
    short_title: 'MGYP',
    description: <>
      MGnify provides data derived from sequencing microbial populations that are present in particular environments.
      </>
  },
  uniprot: {
    title: 'uniprot by ebi.ac.uk',
    short_title: 'uniprot',
    description: <>
      UniProt provides high-quality protein sequences and functional information as well as links out to many databases.
      </>
  },
  alphafolddb: {
    title: 'AlphaFold Protein Structure Database',
    short_title: 'alphafold',
    description: <>
      Protein structeres predicted using AlphaFold.
      </>
  },
  rcsb: {
    title: 'RCSB PDB Protein Data Bank',
    short_title: 'rcsb',
    description: <>
      RCSB PDB including calculated structures.
      </>
  },
  esmatlas: {
    title: 'ESM Metagenomic Atlas',
    short_title: 'esmatlas',
    description: <>
      An open atlas of hundreds of millions predicted metagenomic protein structures.
      </>
  },
  proteineer: {
    title: 'Structure Predicted by Proteineer',
    short_title: 'proteineer',
    description: <>
      These protein structures were calculated by Proteineer.
      </>
  },
  1: {
    title: 'Broad Institute',
    short_title: 'Broad Institute',
    description: <>
      All publicly available sequences from the Broad Institute
      </>
  },
  2: {
    title: 'eggnog 4',
    short_title: 'eggnog4',
    description: <>
      A hierarchical, functionally and phylogenetically annotated orthology resource Version 4
      </>
  },
  3: {
    title: 'eggnog 5',
    short_title: 'eggnog5',
    description: <>
      A hierarchical, functionally and phylogenetically annotated orthology resource Version 5
      </>
  },
  4: {
    title: 'genbank',
    short_title: 'genbank',
    description: <>
      Complete NCBI Genbank database.
      </>
  },
  5: {
    title: 'transportDB2',
    short_title: 'transportDB2',
    description: <>
       A relational database describing the predicted cytoplasmic membrane transport protein complement for organisms
      whose complete genome sequences are available.
      </>
  },
  6: {
    title: 'MEROPS',
    short_title: 'MEROPS',
    description: <>
      The <a href={'https://doi.org/10.1093/nar/gkx1134'} target="_blank" rel="noreferrer">MEROPS</a> database is an information resource for peptidases
      (also termed proteases, proteinases and proteolytic enzymes) and the proteins that inhibit them.
      </>
  },
  7: {
    title: 'MGENES',
    short_title: 'KEGG MGENES',
    description: <>
      KEGG MGENES is a collection of gene catalogs from large scale environmental sequencing studies.
      MGENES links sequences, functions and environmental parameters based on the KEGG reference pathway database.
      MGENES also provides publicly available reference gene catalogs generated from metagenomes as well as complete
      genomes with the use of sequence clustering methods.
      </>
  },
  8: {
    title: 'patents by ebi.ac.uk',
    short_title: 'patents',
    description: <>
      <a href={'https://www.ebi.ac.uk/patentdata/proteins'} target="_blank" rel="noreferrer">Patent proteins</a> cover sequences of
      <a href={'https://www.epo.org/'} target="_blank" rel="noreferrer">EPO (European Patent Office)</a> proteins,
      <a href={'https://www.jpo.go.jp/'} target="_blank" rel="noreferrer">JPO (Japan Patent Office)</a> proteins,
      <a href={'https://www.kipo.go.kr/en/'} target="_blank" rel="noreferrer">KIPO (Korean Intellectual Property Office)</a> proteins and
      <a href={'https://www.uspto.gov/'} target="_blank" rel="noreferrer">USPTO (United States Patent and Trademark Office)</a> proteins.
      </>
  },
  9: {
    title: 'PATRIC by university of Chicago',
    short_title: 'PATRIC',
    description: <>
      PATRIC is the Bacterial Bioinformatics Resource Center, an information system designed to support the biomedical
      research community’s work on bacterial infectious diseases via integration of vital pathogen information with
      rich data and analysis tools.
      <a href={'https://doi.org/10.1093/nar/gkw1017'} target="_blank" rel="noreferrer">https://doi.org/10.1093/nar/gkw1017</a>
    </>
  },
  10: {
    title: 'protein data bank',
    short_title: 'PDB',
    description: <>
      Only experimentally-determined 3D structures from the Protein Data Bank (PDB) archive.
      </>
  },
  11: {
    title: 'pfam by ebi.ac.uk',
    short_title: 'pfam',
    description: <>
      Large collection of protein families, each represented by multiple sequence alignments and hidden Markov models
      (HMMs).
      </>
  },
  12: {
    title: 'uniprot by ebi.ac.uk',
    short_title: 'uniprot old',
    description: <>
      High-quality, comprehensive resource of protein sequence and functional information. Partially curated.
      </>
  },
  13: {
    title: 'NR',
    short_title: 'NR',
    description: <>
    Genbanks non-redundant database.
      </>
  },
  14: {
    title: 'JGI IMG/VR',
    short_title: 'JGI IMG/VR',
    description: <>
      Genomes of cultivated and uncultivated viruses
      </>
  },
  15: {
    title: 'GMGC',
    short_title: 'GMGC',
    description: <>
      Integrated, consistently-processed, gene catalog of the microbial world, combining metagenomics and high-quality
      sequenced isolates.
      </>
  },
  16: {
    title: 'peptide / mgy by ebi.ac.uk',
    short_title: 'peptide / mgy',
    description: <>
     Peptides from MGnify database of microbiomes.
      </>
  },
  17: {
    title: 'GMGC NA',
    short_title: 'GMGC NA',
    description: <>
      The Global Microbial Gene Catalog is an integrated, consistently-processed, gene catalog of the microbial world,
      combining metagenomics and high-quality sequenced isolates.
      </>
  },
  18: {
    title: 'Human Gut Microbiome by Nayfach et al',
    short_title: 'HGM',
    description: <>
      Metagenomic compendium of 189,680 DNA viruses from the human gut microbiome (DNA dataset).
      </>
  },
  19: {
    title: 'MGENES nucleotides',
    short_title: 'MGENES nuc',
    description: <>
      Nucleotide data set of KEGG MGENES
      </>
  },
  20: {
    title: 'eggnog 6',
    short_title: 'eggnog6',
    description: <>
      A hierarchical, functionally and phylogenetically annotated orthology resource Version 6
      </>
  },
  21: {
    title: 'NCBI Conserved Domain Database v3.20',
    short_title: 'NCBI CDD',
    description: <>
      A resource for the annotation of protein sequences with the location of conserved domain
      </>
  },
  22: {
    title: 'ProGenomes',
    short_title: 'ProGenomes',
    description: <>
      Over 900,000 consistently annotated bacterial and archaeal genomes containing 4 billion genes from over 40,000
      species. Strict quality controls are employed for the included genomes to enable accurate analyses.
      </>
  },
  23: {
    title: 'MGV Proteins by nersc',
    short_title: 'MGV Proteins',
    description: <>
      Metagenomic compendium of 189,680 DNA viruses from the human gut microbiome (Protein dataset).
      </>
  },
  24: {
    title: 'MERC plass',
    short_title: 'MERC plass',
    description: <>
      Marine eukaryotic reference catalogue.
      </>
  },
  25: {
    title: 'SMAGs Tara Oceans Eukaryotic Genomes',
    short_title: 'SMAGs TOEG',
    description: <>
      Curated Tara Oceans Single-Cell and Metagenome Assembled Genomes (the &quot;SMAGs&quot;)
      </>
  },
  26: {
    title: 'StringDB',
    short_title: 'StringDB',
    description: <>
      STRING is a database of known and predicted protein-protein interactions. The interactions include direct
      (physical) and indirect (functional) associations; they stem from computational prediction, from knowledge
      transfer between organisms, and from interactions aggregated from other (primary) databases.
      <a href={'https://doi.org/10.1093/nar/gkac1000'} target="_blank" rel="noreferrer">https://doi.org/10.1093/nar/gkac1000</a>
    </>
  },
  27: {
    title: 'refseq',
    short_title: 'refseq',
    description: <>
      A comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic,
      transcript, and protein.
      </>
  },
  28: {
    title: 'radicalsam',
    short_title: 'radicalsam',
    description: <>
      Database of radical SAM proteins containing unusual Fe-S cluster associated with generation of a free radical
      by reductive cleavage of SAM.
      </>
  },
  29: {
    title: 'TemStaPro',
    short_title: 'TemStaPro',
    description: <>
      Thermostability prediction model.
      </>
  },
  30: {
    title: 'refseq dna',
    short_title: 'refseq dna',
    description: <>
      A comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic,
      transcript, and protein.
      </>
  },
  31: {
    title: 'radicalsam 4.2',
    short_title: 'radicalsam 4.2',
    description: <>
      Mega-Sub-Cluster 4.2 of radicalSAM database.
      </>
  },
  32: {
    title: 'radicalsam without 4.2',
    short_title: 'radicalsam without 4.2',
    description: <>
      RadicalSAM database without Mega-sub-cluster 4.2
      </>
  }
};
