From e9cf81da2e8defbeead7ec9d10df70036a7429b8 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 13 Sep 2023 21:47:43 +0000
Subject: [PATCH 01/25] xml parsing

---
 chord_metadata_service/experiments/schemas.py |   6 +-
 .../ontologies/SRA.experiment.xsd.xml         | 799 ++++++++++++++++++
 chord_metadata_service/restapi/api_views.py   |   9 +-
 pyproject.toml                                |   1 +
 4 files changed, 813 insertions(+), 2 deletions(-)
 create mode 100644 chord_metadata_service/ontologies/SRA.experiment.xsd.xml

diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index cbd58fb26..3ea8a9620 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -1,10 +1,14 @@
+import requests
 from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
 from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
-
+import xmltodict
 
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
+sra_common_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.common.xsd')
+sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd')
+sra_experiment_data = xmltodict.parse(sra_experiment_response.content)
 
 EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
     "$schema": "http://json-schema.org/draft-07/schema#",
diff --git a/chord_metadata_service/ontologies/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/SRA.experiment.xsd.xml
new file mode 100644
index 000000000..64d4931fa
--- /dev/null
+++ b/chord_metadata_service/ontologies/SRA.experiment.xsd.xml
@@ -0,0 +1,799 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright 2018 EMBL - European Bioinformatics Institute
+  ~ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
+  ~ file except in compliance with the License. You may obtain a copy of the License at
+  ~ http://www.apache.org/licenses/LICENSE-2.0
+  ~ Unless required by applicable law or agreed to in writing, software distributed under the
+  ~ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+  ~ CONDITIONS OF ANY KIND, either express or implied. See the License for the
+  ~ specific language governing permissions and limitations under the License.
+  -->
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:com="SRA.common">
+  <xs:import schemaLocation="SRA.common.xsd" namespace="SRA.common"/>
+
+  <!-- STRING ENUMERATIONS BEGIN -->
+  <xs:simpleType name="typeLibraryStrategy">
+    <xs:annotation>
+      <xs:documentation>Sequencing technique intended for this library.</xs:documentation>
+    </xs:annotation>
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="WGS">
+        <xs:annotation>
+          <xs:documentation>Whole Genome Sequencing - random sequencing of the whole genome (see pubmed 10731132 for details)
+          </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="WGA">
+        <xs:annotation>
+          <xs:documentation>Whole Genome Amplification followed by random sequencing. (see pubmed 1631067,8962113 for details)
+          </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="WXS">
+        <xs:annotation>
+          <xs:documentation> Random sequencing of exonic regions selected from the genome. (see pubmed 20111037 for details)
+          </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RNA-Seq">
+        <xs:annotation>
+          <xs:documentation> Random sequencing of whole transcriptome, also known as Whole Transcriptome Shotgun Sequencing, or WTSS). (see
+            pubmed 18611170 for details) </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ssRNA-seq">
+        <xs:annotation>
+          <xs:documentation> Strand-specific RNA sequencing.
+          </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="snRNA-seq">
+        <xs:annotation>
+          <xs:documentation>Single nucleus RNA sequencing is a method for profiling gene expression in cells which are difficult to isolate.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="miRNA-Seq">
+        <xs:annotation>
+          <xs:documentation> Micro RNA sequencing strategy designed to capture post-transcriptional RNA elements and include non-coding
+            functional elements. (see pubmed 21787409 for details) </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ncRNA-Seq">
+        <xs:annotation>
+          <xs:documentation>Capture of other non-coding RNA types, including post-translation modification types such as snRNA (small
+            nuclear RNA) or snoRNA (small nucleolar RNA), or expression regulation types such as siRNA (small interfering RNA) or
+            piRNA/piwi/RNA (piwi-interacting RNA).</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="FL-cDNA">
+        <xs:annotation>
+          <xs:documentation> Full-length sequencing of cDNA templates </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="EST">
+        <xs:annotation>
+          <xs:documentation> Single pass sequencing of cDNA templates </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Hi-C">
+        <xs:annotation>
+          <xs:documentation> Chromosome Conformation Capture technique where a biotin-labeled nucleotide is incorporated at the ligation junction, enabling selective purification of chimeric DNA ligation junctions followed by deep sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ATAC-seq">
+        <xs:annotation>
+          <xs:documentation> Assay for Transposase-Accessible Chromatin (ATAC) strategy is used to study genome-wide chromatin accessibility. alternative method to DNase-seq that uses an engineered Tn5 transposase to cleave DNA and to integrate primer DNA sequences into the cleaved genomic DNA. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="WCS">
+        <xs:annotation>
+          <xs:documentation> Random sequencing of a whole chromosome or other replicon isolated from a genome. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RAD-Seq"/>
+      <xs:enumeration value="CLONE">
+        <xs:annotation>
+          <xs:documentation> Genomic clone based (hierarchical) sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="POOLCLONE">
+        <xs:annotation>
+          <xs:documentation> Shotgun of pooled clones (usually BACs and Fosmids). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="AMPLICON">
+        <xs:annotation>
+          <xs:documentation> Sequencing of overlapping or distinct PCR or RT-PCR products. For example, metagenomic community profiling
+            using SSU rRNA . </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="CLONEEND">
+        <xs:annotation>
+          <xs:documentation> Clone end (5', 3', or both) sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="FINISHING">
+        <xs:annotation>
+          <xs:documentation> Sequencing intended to finish (close) gaps in existing coverage. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ChIP-Seq">
+        <xs:annotation>
+          <xs:documentation> ChIP-seq, Chromatin ImmunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MNase-Seq">
+        <xs:annotation>
+          <xs:documentation> Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="DNase-Hypersensitivity">
+        <xs:annotation>
+          <xs:documentation> Sequencing of hypersensitive sites, or segments of open chromatin that are more readily cleaved by DNaseI.
+          </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Bisulfite-Seq">
+        <xs:annotation>
+          <xs:documentation>MethylC-seq. Sequencing following treatment of DNA with bisulfite to convert cytosine residues to uracil
+            depending on methylation status. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="CTS">
+        <xs:annotation>
+          <xs:documentation> Concatenated Tag Sequencing </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MRE-Seq">
+        <xs:annotation>
+          <xs:documentation> Methylation-Sensitive Restriction Enzyme Sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MeDIP-Seq">
+        <xs:annotation>
+          <xs:documentation> Methylated DNA Immunoprecipitation Sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MBD-Seq">
+        <xs:annotation>
+          <xs:documentation> Methyl CpG Binding Domain Sequencing. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Tn-Seq">
+        <xs:annotation>
+          <xs:documentation>Quantitatively determine fitness of bacterial genes based on how many times a purposely seeded transposon gets
+            inserted into each gene of a colony after some time. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="VALIDATION">
+        <xs:annotation>
+          <xs:documentation>CGHub special request: Independent experiment to re-evaluate putative variants. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="FAIRE-seq">
+        <xs:annotation>
+          <xs:documentation>Formaldehyde Assisted Isolation of Regulatory Elements. Reveals regions of open chromatin. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="SELEX">
+        <xs:annotation>
+          <xs:documentation>Systematic Evolution of Ligands by Exponential enrichment</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RIP-Seq">
+        <xs:annotation>
+          <xs:documentation>Direct sequencing of RNA immunoprecipitates (includes CLIP-Seq, HITS-CLIP and PAR-CLIP). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ChIA-PET">
+        <xs:annotation>
+          <xs:documentation>Direct sequencing of proximity-ligated chromatin immunoprecipitates.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Synthetic-Long-Read">
+        <xs:annotation>
+          <xs:documentation>binning and barcoding of large DNA fragments to facilitate assembly of the fragment</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Targeted-Capture">
+        <xs:annotation>
+          <xs:documentation>Enrichment of a targeted subset of loci.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Tethered Chromatin Conformation Capture"/>
+      <xs:enumeration value="NOMe-Seq">
+        <xs:annotation>
+          <xs:documentation>Nucleosome Occupancy and Methylome sequencing.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ChM-Seq">
+        <xs:annotation>
+          <xs:documentation>ChIPmentation combines chromatin immunoprecipitation with sequencing library preparation by Tn5 transposase (see pubmed 26280331 for details)</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="GBS">
+        <xs:annotation>
+          <xs:documentation>Genotyping by sequencing is a method to discover single nucleotide polymorphisms for genotyping studies.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Ribo-Seq">
+        <xs:annotation>
+          <xs:documentation>Ribosome profiling (also named ribosome footprinting) that uses specialized messenger RNA (mRNA) sequencing to determine which mRNAs are being actively translated. It produces a "global snapshot" of all the ribosomes active in a cell at a particular moment, known as a translatome.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="OTHER">
+        <xs:annotation>
+          <xs:documentation> Library strategy not listed. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+
+  <xs:simpleType name="typeLibrarySource">
+    <xs:annotation>
+      <xs:documentation> The LIBRARY_SOURCE specifies the type of source material that is being sequenced. </xs:documentation>
+    </xs:annotation>
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="GENOMIC">
+        <xs:annotation>
+          <xs:documentation> Genomic DNA (includes PCR products from genomic DNA). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="GENOMIC SINGLE CELL"/>
+      <xs:enumeration value="TRANSCRIPTOMIC">
+        <xs:annotation>
+          <xs:documentation> Transcription products or non genomic DNA (EST, cDNA, RT-PCR, screened libraries). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="TRANSCRIPTOMIC SINGLE CELL"/>
+      <xs:enumeration value="METAGENOMIC">
+        <xs:annotation>
+          <xs:documentation> Mixed material from metagenome. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="METATRANSCRIPTOMIC">
+        <xs:annotation>
+          <xs:documentation> Transcription products from community targets </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="SYNTHETIC">
+        <xs:annotation>
+          <xs:documentation> Synthetic DNA. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="VIRAL RNA">
+        <xs:annotation>
+          <xs:documentation> Viral RNA. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="OTHER">
+        <xs:annotation>
+          <xs:documentation> Other, unspecified, or unknown library source material. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+
+  <xs:simpleType name="typeLibrarySelection">
+    <xs:annotation>
+      <xs:documentation> Method used to enrich the target in the sequence library preparation </xs:documentation>
+    </xs:annotation>
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="RANDOM">
+        <xs:annotation>
+          <xs:documentation>No Selection or Random selection</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="PCR">
+        <xs:annotation>
+          <xs:documentation>target enrichment via PCR</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RANDOM PCR">
+        <xs:annotation>
+          <xs:documentation>Source material was selected by randomly generated primers.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RT-PCR">
+        <xs:annotation>
+          <xs:documentation>target enrichment via </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="HMPR">
+        <xs:annotation>
+          <xs:documentation>Hypo-methylated partial restriction digest</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MF">
+        <xs:annotation>
+          <xs:documentation>Methyl Filtrated</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="repeat fractionation">
+        <xs:annotation>
+          <xs:documentation>Selection for less repetitive (and more gene rich) sequence through Cot filtration (CF) or other fractionation
+            techniques based on DNA kinetics. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="size fractionation">
+        <xs:annotation>
+          <xs:documentation> Physical selection of size appropriate targets. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MSLL">
+        <xs:annotation>
+          <xs:documentation>Methylation Spanning Linking Library</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="cDNA">
+        <xs:annotation>
+          <xs:documentation>PolyA selection or enrichment for messenger RNA (mRNA); synonymize with PolyA </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="cDNA_randomPriming"/>
+      <xs:enumeration value="cDNA_oligo_dT"/>
+      <xs:enumeration value="PolyA">
+        <xs:annotation>
+          <xs:documentation>PolyA selection or enrichment for messenger RNA (mRNA); should replace cDNA enumeration. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Oligo-dT">
+        <xs:annotation>
+          <xs:documentation>enrichment of messenger RNA (mRNA) by hybridization to Oligo-dT. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Inverse rRNA">
+        <xs:annotation>
+          <xs:documentation>depletion of ribosomal RNA by oligo hybridization. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Inverse rRNA selection">
+        <xs:annotation>
+          <xs:documentation>depletion of ribosomal RNA by inverse oligo hybridization. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ChIP">
+        <xs:annotation>
+          <xs:documentation>Chromatin immunoprecipitation</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="ChIP-Seq">
+        <xs:annotation>
+        <xs:documentation>Chromatin immunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein.</xs:documentation>
+      </xs:annotation>
+    </xs:enumeration>
+      <xs:enumeration value="MNase">
+        <xs:annotation>
+          <xs:documentation>Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="DNase">
+        <xs:annotation>
+          <xs:documentation>DNase I endonuclease digestion and size selection reveals regions of chromatin where the DNA is highly sensitive to DNase I.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Hybrid Selection">
+        <xs:annotation>
+          <xs:documentation>Selection by hybridization in array or solution.</xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Reduced Representation">
+        <xs:annotation>
+          <xs:documentation>Reproducible genomic subsets, often generated by restriction fragment size selection, containing a manageable
+            number of loci to facilitate re-sampling. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="Restriction Digest">
+        <xs:annotation>
+          <xs:documentation> DNA fractionation using restriction enzymes. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="5-methylcytidine antibody">
+        <xs:annotation>
+          <xs:documentation> Selection of methylated DNA fragments using an antibody raised against 5-methylcytosine or 5-methylcytidine
+            (m5C). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MBD2 protein methyl-CpG binding domain">
+        <xs:annotation>
+          <xs:documentation> Enrichment by methyl-CpG binding domain. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="CAGE">
+        <xs:annotation>
+          <xs:documentation> Cap-analysis gene expression. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="RACE">
+        <xs:annotation>
+          <xs:documentation> Rapid Amplification of cDNA Ends. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="MDA">
+        <xs:annotation>
+          <xs:documentation> Multiple Displacement Amplification, a non-PCR based DNA amplification technique that amplifies a minute
+            quantifies of DNA to levels suitable for genomic analysis. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="padlock probes capture method">
+        <xs:annotation>
+          <xs:documentation> Targeted sequence capture protocol covering an arbitrary set of nonrepetitive genomics targets. An example is
+            capture bisulfite sequencing using padlock probes (BSPP). </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="other">
+        <xs:annotation>
+          <xs:documentation> Other library enrichment, screening, or selection process. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+      <xs:enumeration value="unspecified">
+        <xs:annotation>
+          <xs:documentation> Library enrichment, screening, or selection is not specified. </xs:documentation>
+        </xs:annotation>
+      </xs:enumeration>
+    </xs:restriction>
+  </xs:simpleType>
+  <!-- STRING ENUMERATIONS END -->
+
+
+  <xs:complexType name="PoolMemberType">
+    <xs:complexContent>
+      <xs:extension base="com:RefObjectType">
+        <xs:sequence>
+          <xs:element name="READ_LABEL" minOccurs="0" maxOccurs="unbounded">
+            <xs:complexType>
+              <xs:simpleContent>
+                <xs:extension base="xs:string">
+                  <xs:attribute name="read_group_tag" type="xs:string">
+                    <xs:annotation>
+                      <xs:documentation> Assignment of read_group_tag to decoded read </xs:documentation>
+                    </xs:annotation>
+                  </xs:attribute>
+                </xs:extension>
+              </xs:simpleContent>
+            </xs:complexType>
+          </xs:element>
+        </xs:sequence>
+        <xs:attribute name="member_name" type="xs:string" use="optional">
+          <xs:annotation>
+            <xs:documentation> Label a sample within a scope of the pool </xs:documentation>
+          </xs:annotation>
+        </xs:attribute>
+        <xs:attribute name="proportion" type="xs:float" use="optional">
+          <xs:annotation>
+            <xs:documentation> Proportion of this sample (in percent) that was included in sample pool. </xs:documentation>
+          </xs:annotation>
+        </xs:attribute>
+      </xs:extension>
+    </xs:complexContent>
+  </xs:complexType>
+  <xs:complexType name="SampleDescriptorType">
+    <xs:complexContent>
+      <xs:extension base="com:RefObjectType">
+        <xs:choice minOccurs="0" maxOccurs="1">
+          <xs:element name="POOL">
+            <xs:annotation>
+              <xs:documentation>
+            Identifies a list of group/pool/multiplex sample members.  This implies that
+            this sample record is a group, pool, or multiplex, but it continues to receive
+            its own accession and can be referenced by an experiment.  By default if
+            no match to any of the listed members can be determined, then the default
+            sample reference is used.
+          </xs:documentation>
+            </xs:annotation>
+            <xs:complexType>
+              <xs:sequence>
+                <xs:element name="DEFAULT_MEMBER" type="PoolMemberType" minOccurs="0" maxOccurs="1">
+                  <xs:annotation>
+                    <xs:documentation> Reference to the sample that is used when read membership cannot be determined. A default member should
+                  be provided if there exists a possibility that some reads will be left over from barcode/MID resolution. A default member
+                  is not needed when defining a true pool (where individual samples are not distinguished in the reads), or the reads have
+                  been partitioned among the pool members (no leftovers). </xs:documentation>
+                  </xs:annotation>
+                </xs:element>
+                <xs:element name="MEMBER" type="PoolMemberType" minOccurs="1" maxOccurs="unbounded">
+                  <xs:annotation>
+                    <xs:documentation> Reference to the sample as determined from barcode/MID resolution or read partition. </xs:documentation>
+                  </xs:annotation>
+                </xs:element>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+        </xs:choice>
+      </xs:extension>
+    </xs:complexContent>
+  </xs:complexType>
+  <xs:complexType name="LibraryDescriptorType">
+    <xs:annotation>
+      <xs:documentation> The LIBRARY_DESCRIPTOR specifies the origin of the material being
+        sequenced and any treatments that the material might have undergone that affect the
+        sequencing result. This specification is needed even if the platform does not
+        require a library construction step per se. </xs:documentation>
+    </xs:annotation>
+    <xs:sequence>
+      <xs:element name="LIBRARY_NAME" type="xs:string" maxOccurs="1" minOccurs="0">
+        <xs:annotation>
+          <xs:documentation>
+            The submitter's name for this library.
+          </xs:documentation>
+        </xs:annotation>
+      </xs:element>
+      <xs:element name="LIBRARY_STRATEGY" type="typeLibraryStrategy" minOccurs="1" maxOccurs="1"/>
+      <xs:element name="LIBRARY_SOURCE" type="typeLibrarySource" minOccurs="1" maxOccurs="1"/>
+      <xs:element name="LIBRARY_SELECTION" type="typeLibrarySelection" minOccurs="1" maxOccurs="1"/>
+      <xs:element name="LIBRARY_LAYOUT">
+        <xs:annotation>
+          <xs:documentation>
+            LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads.
+            In the case of paired reads, information about the relative distance and orientation is specified.
+          </xs:documentation>
+        </xs:annotation>
+        <xs:complexType>
+          <xs:choice>
+            <xs:element name="SINGLE">
+              <xs:complexType>
+                <xs:annotation>
+                  <xs:documentation>
+                    Reads are unpaired (usual case).
+                  </xs:documentation>
+                </xs:annotation>
+              </xs:complexType>
+            </xs:element>
+            <xs:element name="PAIRED">
+              <xs:complexType>
+                <xs:attribute name="NOMINAL_LENGTH" type="xs:nonNegativeInteger"/>
+                <xs:attribute name="NOMINAL_SDEV" type="xs:double"/>
+              </xs:complexType>
+            </xs:element>
+          </xs:choice>
+        </xs:complexType>
+      </xs:element>
+      <xs:element name="TARGETED_LOCI" minOccurs="0" maxOccurs="1">
+        <xs:complexType>
+          <xs:annotation>
+            <xs:documentation>
+              Names the gene(s) or locus(loci) or other genomic feature(s) targeted by the sequence.
+            </xs:documentation>
+          </xs:annotation>
+          <xs:sequence>
+            <xs:element name="LOCUS" maxOccurs="unbounded" minOccurs="1">
+              <xs:complexType>
+                <xs:sequence>
+                  <xs:element name="PROBE_SET" type="com:XRefType" maxOccurs="1" minOccurs="0">
+                    <xs:annotation>
+                      <xs:documentation> Reference to an archived primer or
+                        probe set. Example: dbProbe </xs:documentation>
+                    </xs:annotation>
+                  </xs:element>
+                </xs:sequence>
+                <xs:attribute name="locus_name">
+                  <xs:simpleType>
+                    <xs:restriction base="xs:string">
+                      <xs:enumeration value="16S rRNA">
+                        <xs:annotation>
+                          <xs:documentation> Bacterial small subunit ribosomal RNA, a locus used for
+                            phylogenetic studies of bacteria and as a target for random target PCR in
+                            environmental biodiversity screening. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="18S rRNA">
+                        <xs:annotation>
+                          <xs:documentation> Eukaryotic small subunit ribosomal RNA, a locus used for
+                            phylogenetic studies of eukaryotes and as a target for random target PCR in
+                            environmental biodiversity screening. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="28S rRNA">
+                        <xs:annotation>
+                          <xs:documentation>Structural ribosomal RNA for the large component, or large
+                            subunit (LSU) of eukaryotic cytoplasmic ribosomes.. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="RBCL">
+                        <xs:annotation>
+                          <xs:documentation> RuBisCO large subunit : ribulose-1,5-bisphosphate
+                            carboxylase/oxygenase large subunit, a locus used for phylogenetic studies
+                            of plants. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="matK">
+                        <xs:annotation>
+                          <xs:documentation> Maturase K gene, a locus used for phylogenetic studies of
+                            plants. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="COX1">
+                        <xs:annotation>
+                          <xs:documentation> Mitochondrial cytochrome c oxidase 1 gene, a locus used for
+                            phylogenetic studies of animals </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="ITS1-5.8S-ITS2">
+                        <xs:annotation>
+                          <xs:documentation> Internal transcribed spacers 1 and 2 plus 5.8S rRNA region,
+                            a locus used for phylogenetic studies of fungi. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="exome">
+                        <xs:annotation>
+                          <xs:documentation> All exonic regions of the genome. </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                      <xs:enumeration value="other">
+                        <xs:annotation>
+                          <xs:documentation> Other locus, please describe.
+                          </xs:documentation>
+                        </xs:annotation>
+                      </xs:enumeration>
+                    </xs:restriction>
+                  </xs:simpleType>
+                </xs:attribute>
+                <xs:attribute name="description" type="xs:string">
+                  <xs:annotation>
+                    <xs:documentation> Submitter supplied description of alternate locus and auxiliary
+                      information. </xs:documentation>
+                  </xs:annotation>
+                </xs:attribute>
+              </xs:complexType>
+            </xs:element>
+
+          </xs:sequence>
+
+
+        </xs:complexType>
+      </xs:element>
+      <xs:element name="POOLING_STRATEGY" minOccurs="0" maxOccurs="1">
+        <xs:annotation>
+          <xs:documentation>
+            The optional pooling strategy indicates how the library or libraries are organized if multiple samples are involved.
+          </xs:documentation>
+        </xs:annotation>
+        <xs:simpleType>
+          <xs:restriction base="xs:string"> </xs:restriction>
+        </xs:simpleType>
+      </xs:element>
+      <xs:element name="LIBRARY_CONSTRUCTION_PROTOCOL" type="xs:string" minOccurs="0" maxOccurs="1">
+        <xs:annotation>
+          <xs:documentation>
+            Free form text describing the protocol by which the sequencing library was constructed.
+          </xs:documentation>
+        </xs:annotation>
+      </xs:element>
+    </xs:sequence>
+  </xs:complexType>
+  <xs:complexType name="LibraryType">
+    <xs:sequence>
+      <xs:element name="DESIGN_DESCRIPTION" type="xs:string">
+        <xs:annotation>
+          <xs:documentation>Goal and setup of the individual library including library was constructed.</xs:documentation>
+        </xs:annotation>
+      </xs:element>
+
+      <xs:element name="SAMPLE_DESCRIPTOR" type="SampleDescriptorType">
+        <xs:annotation>
+          <xs:documentation> Pick a sample to associate this experiment with. The sample may be an individual or a pool,
+            depending on how it is specified. </xs:documentation>
+        </xs:annotation>
+      </xs:element>
+
+      <xs:element name="LIBRARY_DESCRIPTOR" type="LibraryDescriptorType">
+        <xs:annotation>
+          <xs:documentation> The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any
+            treatments that the material might have undergone that affect the sequencing result. This specification is
+            needed even if the platform does not require a library construction step per se. </xs:documentation>
+        </xs:annotation>
+      </xs:element>
+
+      <xs:element name="SPOT_DESCRIPTOR" type="com:SpotDescriptorType" minOccurs="0" maxOccurs="1">
+        <xs:annotation>
+          <xs:documentation> The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the
+            monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and
+            processing information. There will be two methods of specification: one will be an index into a table of
+            typical decodings, the other being an exact specification. This construct is needed for loading data and for
+            interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input
+            files or from one input files). </xs:documentation>
+        </xs:annotation>
+      </xs:element>
+    </xs:sequence>
+
+  </xs:complexType>
+
+  <xs:complexType name="ExperimentType">
+
+    <xs:annotation>
+      <xs:documentation>
+        An Experiment specifies of what will be sequenced and how the sequencing will be performed.
+        It does not contain results.
+        An Experiment is composed of a design, a platform selection, and processing parameters.
+      </xs:documentation>
+    </xs:annotation>
+
+    <xs:complexContent>
+      <xs:extension base="com:ObjectType">
+        <xs:sequence>
+          <xs:element name="TITLE" type="xs:string" minOccurs="0" maxOccurs="1">
+            <xs:annotation>
+              <xs:documentation>
+            Short text that can be used to call out experiment records in searches or in displays.
+            This element is technically optional but should be used for all new records.
+          </xs:documentation>
+            </xs:annotation>
+          </xs:element>
+          <xs:element name="STUDY_REF" minOccurs="1" maxOccurs="1">
+            <xs:annotation>
+              <xs:documentation>
+            Identifies the parent study.
+          </xs:documentation>
+            </xs:annotation>
+            <xs:complexType>
+              <xs:complexContent>
+                <xs:extension base="com:RefObjectType"> </xs:extension>
+              </xs:complexContent>
+            </xs:complexType>
+          </xs:element>
+          <xs:element name="DESIGN" type="LibraryType" maxOccurs="1" minOccurs="1">
+            <xs:annotation>
+              <xs:documentation> The library design including library properties, layout, protocol, targeting information, and spot and gap
+            descriptors. </xs:documentation>
+            </xs:annotation>
+          </xs:element>
+          <xs:element name="PLATFORM" type="com:PlatformType" maxOccurs="1" minOccurs="1">
+            <xs:annotation>
+              <xs:documentation>
+            The PLATFORM record selects which sequencing platform and platform-specific runtime parameters.
+            This will be determined by the Center.
+          </xs:documentation>
+            </xs:annotation>
+          </xs:element>
+
+          <xs:element name="PROCESSING" type="com:ProcessingType" minOccurs="0" maxOccurs="1"/>
+
+          <xs:element name="EXPERIMENT_LINKS" minOccurs="0" maxOccurs="1">
+            <xs:annotation>
+              <xs:documentation>
+            Links to resources related to this experiment or experiment set (publication, datasets, online databases).
+          </xs:documentation>
+            </xs:annotation>
+            <xs:complexType>
+              <xs:sequence minOccurs="1" maxOccurs="unbounded">
+                <xs:element name="EXPERIMENT_LINK" type="com:LinkType"/>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+
+          <xs:element name="EXPERIMENT_ATTRIBUTES" minOccurs="0" maxOccurs="1">
+            <xs:annotation>
+              <xs:documentation>
+            Properties and attributes of the experiment.  These can be entered as free-form
+            tag-value pairs.
+          </xs:documentation>
+            </xs:annotation>
+            <xs:complexType>
+              <xs:sequence maxOccurs="unbounded" minOccurs="1">
+                <xs:element name="EXPERIMENT_ATTRIBUTE" type="com:AttributeType"/>
+              </xs:sequence>
+            </xs:complexType>
+          </xs:element>
+        </xs:sequence>
+      </xs:extension>
+    </xs:complexContent>
+  </xs:complexType>
+
+  <xs:complexType name="ExperimentSetType">
+    <xs:sequence minOccurs="1" maxOccurs="1">
+      <xs:element name="EXPERIMENT" type="ExperimentType" maxOccurs="unbounded"/>
+    </xs:sequence>
+  </xs:complexType>
+
+  <xs:element name="EXPERIMENT_SET" type="ExperimentSetType">
+    <xs:annotation>
+      <xs:documentation>
+        An EXPERMENT_SET is a container for a set of experiments and a common namespace.
+      </xs:documentation>
+    </xs:annotation>
+
+  </xs:element>
+
+  <xs:element name="EXPERIMENT" type="ExperimentType"/>
+</xs:schema>
\ No newline at end of file
diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py
index 6be37f377..3c48a92cb 100644
--- a/chord_metadata_service/restapi/api_views.py
+++ b/chord_metadata_service/restapi/api_views.py
@@ -5,6 +5,7 @@
 
 from django.conf import settings
 from django.views.decorators.cache import cache_page
+import requests
 from rest_framework.permissions import AllowAny
 from rest_framework.response import Response
 from rest_framework.decorators import api_view, permission_classes
@@ -360,7 +361,13 @@ def public_overview(_request):
     get:
     Overview of all public data in the database
     """
-
+    import xmltodict
+    namespace = { 'xs': None }
+    sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd')
+    sra_experiment_data = xmltodict.parse(sra_experiment_response.content, namespaces=namespace)
+    for simple_type in sra_experiment_data['schema']['simpleType']:
+        simple_type['restriction']['enumeration']
+        print(simple_type)
     if not settings.CONFIG_PUBLIC:
         return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
 
diff --git a/pyproject.toml b/pyproject.toml
index e99aebd71..48c607898 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ strict-rfc3339 = "^0.7"
 tabulate = "^0.9.0"
 uritemplate = "^4.1.1"
 adrf = "^0.1.1"
+xmltodict="0.13.0"
 
 [tool.poetry.group.dev.dependencies]
 coverage = "^6.5.0"

From 2f765da94dca99a158798b9581807a3a6e2e7655 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 15:05:52 +0000
Subject: [PATCH 02/25] xsd ontologies utils

---
 chord_metadata_service/experiments/schemas.py       |  9 ++++-----
 chord_metadata_service/ontologies/__init__.py       |  5 +++++
 .../ontologies/{ => sra}/SRA.experiment.xsd.xml     |  0
 chord_metadata_service/ontologies/utils.py          | 13 +++++++++++++
 chord_metadata_service/restapi/api_views.py         |  7 -------
 5 files changed, 22 insertions(+), 12 deletions(-)
 create mode 100644 chord_metadata_service/ontologies/__init__.py
 rename chord_metadata_service/ontologies/{ => sra}/SRA.experiment.xsd.xml (100%)
 create mode 100644 chord_metadata_service/ontologies/utils.py

diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 3ea8a9620..85e695164 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -2,13 +2,12 @@
 from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
 from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
-import xmltodict
+from chord_metadata_service.ontologies import readXsdSimpleTypeValues
+
 
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
-sra_common_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.common.xsd')
-sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd')
-sra_experiment_data = xmltodict.parse(sra_experiment_response.content)
+LIBRARY_STRATEGIES = readXsdSimpleTypeValues('chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', 'typeLibraryStrategy')
 
 EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
     "$schema": "http://json-schema.org/draft-07/schema#",
@@ -108,7 +107,7 @@
         "molecule_ontology": ONTOLOGY_CLASS_LIST,
         "library_strategy": {
             "type": "string",
-            "enum": ["Bisulfite-Seq", "RNA-Seq", "ChIP-Seq", "WES", "Other"]
+            "enum": LIBRARY_STRATEGIES
         },
         "library_source": {
             "type": "string",
diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py
new file mode 100644
index 000000000..e48d1171f
--- /dev/null
+++ b/chord_metadata_service/ontologies/__init__.py
@@ -0,0 +1,5 @@
+from .utils import readXsdSimpleTypeValues
+
+__all__ = [
+    "readXsdSimpleTypeValues",
+]
diff --git a/chord_metadata_service/ontologies/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml
similarity index 100%
rename from chord_metadata_service/ontologies/SRA.experiment.xsd.xml
rename to chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml
diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py
new file mode 100644
index 000000000..6f70c402a
--- /dev/null
+++ b/chord_metadata_service/ontologies/utils.py
@@ -0,0 +1,13 @@
+import xmltodict
+
+def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str):
+    """Reads an XML Schema Definition (XSD) file and returns a type's values.
+    The XSD file is parsed using xmltodict following this spec:
+    https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
+    """
+    sra_file = open(xsd_file_path).read()
+    sra_experiment_data = xmltodict.parse(sra_file, namespaces={ 'xs': None })
+    simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]}
+    target_type = simple_types[type_name]
+    values = [val['@value'] for val in target_type['restriction']['enumeration']]
+    return values
diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py
index 3c48a92cb..065112257 100644
--- a/chord_metadata_service/restapi/api_views.py
+++ b/chord_metadata_service/restapi/api_views.py
@@ -361,13 +361,6 @@ def public_overview(_request):
     get:
     Overview of all public data in the database
     """
-    import xmltodict
-    namespace = { 'xs': None }
-    sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd')
-    sra_experiment_data = xmltodict.parse(sra_experiment_response.content, namespaces=namespace)
-    for simple_type in sra_experiment_data['schema']['simpleType']:
-        simple_type['restriction']['enumeration']
-        print(simple_type)
     if not settings.CONFIG_PUBLIC:
         return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
 

From e479e4c0500b7a051771350a35b89df5f5e8eca3 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 15:07:57 +0000
Subject: [PATCH 03/25] lint

---
 chord_metadata_service/experiments/schemas.py | 6 ++++--
 chord_metadata_service/ontologies/utils.py    | 3 ++-
 chord_metadata_service/restapi/api_views.py   | 1 -
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 85e695164..95d237b1c 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -1,4 +1,3 @@
-import requests
 from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
 from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
@@ -7,7 +6,10 @@
 
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
-LIBRARY_STRATEGIES = readXsdSimpleTypeValues('chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', 'typeLibraryStrategy')
+LIBRARY_STRATEGIES = readXsdSimpleTypeValues(
+    'chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml',
+    'typeLibraryStrategy',
+)
 
 EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
     "$schema": "http://json-schema.org/draft-07/schema#",
diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py
index 6f70c402a..3b3644269 100644
--- a/chord_metadata_service/ontologies/utils.py
+++ b/chord_metadata_service/ontologies/utils.py
@@ -1,12 +1,13 @@
 import xmltodict
 
+
 def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str):
     """Reads an XML Schema Definition (XSD) file and returns a type's values.
     The XSD file is parsed using xmltodict following this spec:
     https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
     """
     sra_file = open(xsd_file_path).read()
-    sra_experiment_data = xmltodict.parse(sra_file, namespaces={ 'xs': None })
+    sra_experiment_data = xmltodict.parse(sra_file, namespaces={'xs': None})
     simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]}
     target_type = simple_types[type_name]
     values = [val['@value'] for val in target_type['restriction']['enumeration']]
diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py
index 065112257..6a1151c2a 100644
--- a/chord_metadata_service/restapi/api_views.py
+++ b/chord_metadata_service/restapi/api_views.py
@@ -5,7 +5,6 @@
 
 from django.conf import settings
 from django.views.decorators.cache import cache_page
-import requests
 from rest_framework.permissions import AllowAny
 from rest_framework.response import Response
 from rest_framework.decorators import api_view, permission_classes

From 26becc31e2624d2d60c20746de34a2b3a08fbc14 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 15:35:54 +0000
Subject: [PATCH 04/25] experiment library strategy data migration

---
 .../experiments/migrations/0009_v4_1_0.py       | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 chord_metadata_service/experiments/migrations/0009_v4_1_0.py

diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
new file mode 100644
index 000000000..458f41c90
--- /dev/null
+++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
@@ -0,0 +1,17 @@
+from django.db import migrations
+
+
+def set_experiment_library_strategy(apps, _schema_editor):
+    Experiment = apps.get_model("experiments", "Experiment")
+    for exp in Experiment.objects.filter(library_strategy="WES"):
+        exp.library_strategy = "WXS"
+        exp.save()
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('experiments', '0007_v4_0_0'),
+    ]
+
+    operations = [
+        migrations.RunPython(set_experiment_library_strategy)
+    ]

From 762d23aaca6a04ba8607d3e41bab73559f4c3b7f Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 16:08:29 +0000
Subject: [PATCH 05/25] xsd dir name change

---
 chord_metadata_service/experiments/schemas.py                   | 2 +-
 .../ontologies/{sra => xsd}/SRA.experiment.xsd.xml              | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename chord_metadata_service/ontologies/{sra => xsd}/SRA.experiment.xsd.xml (100%)

diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 95d237b1c..3b0206b3b 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -7,7 +7,7 @@
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
 LIBRARY_STRATEGIES = readXsdSimpleTypeValues(
-    'chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml',
+    'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml',
     'typeLibraryStrategy',
 )
 
diff --git a/chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml
similarity index 100%
rename from chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml
rename to chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml

From a32bd01f405816f170fc95c058aae513bca7513b Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 12:17:35 -0400
Subject: [PATCH 06/25] fix poetry.lock

---
 poetry.lock | 150 +++++++++++++---------------------------------------
 1 file changed, 38 insertions(+), 112 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e2649a291..4ab834f22 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,10 +1,9 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "adrf"
 version = "0.1.2"
 description = "Async support for Django REST framework"
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -21,7 +20,6 @@ djangorestframework = ">=3.14.0"
 name = "aiohttp"
 version = "3.8.5"
 description = "Async http client/server framework (asyncio)"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -130,7 +128,6 @@ speedups = ["Brotli", "aiodns", "cchardet"]
 name = "aiosignal"
 version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -145,7 +142,6 @@ frozenlist = ">=1.1.0"
 name = "arrow"
 version = "1.2.3"
 description = "Better dates & times for Python"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -160,7 +156,6 @@ python-dateutil = ">=2.7.0"
 name = "asgiref"
 version = "3.7.2"
 description = "ASGI specs, helper code, and adapters"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -178,7 +173,6 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"]
 name = "async-property"
 version = "0.2.2"
 description = "Python decorator for async properties."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -190,7 +184,6 @@ files = [
 name = "async-timeout"
 version = "4.0.3"
 description = "Timeout context manager for asyncio programs"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -202,7 +195,6 @@ files = [
 name = "attrs"
 version = "23.1.0"
 description = "Classes Without Boilerplate"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -221,7 +213,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte
 name = "backports-zoneinfo"
 version = "0.2.1"
 description = "Backport of the standard library zoneinfo module"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -250,7 +241,6 @@ tzdata = ["tzdata"]
 name = "bento-lib"
 version = "6.1.0"
 description = "A set of common utilities and helpers for Bento platform services."
-category = "main"
 optional = false
 python-versions = ">=3.8.1"
 files = [
@@ -278,7 +268,6 @@ quart = ["quart (>=0.18.4,<0.19)"]
 name = "cachetools"
 version = "5.3.1"
 description = "Extensible memoizing collections and decorators"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -290,7 +279,6 @@ files = [
 name = "certifi"
 version = "2023.7.22"
 description = "Python package for providing Mozilla's CA Bundle."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -302,7 +290,6 @@ files = [
 name = "cffi"
 version = "1.15.1"
 description = "Foreign Function Interface for Python calling C code."
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -379,7 +366,6 @@ pycparser = "*"
 name = "chardet"
 version = "5.2.0"
 description = "Universal encoding detector for Python 3"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -391,7 +377,6 @@ files = [
 name = "charset-normalizer"
 version = "3.2.0"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
-category = "main"
 optional = false
 python-versions = ">=3.7.0"
 files = [
@@ -476,7 +461,6 @@ files = [
 name = "colorama"
 version = "0.4.6"
 description = "Cross-platform colored terminal text."
-category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
 files = [
@@ -488,7 +472,6 @@ files = [
 name = "coverage"
 version = "6.5.0"
 description = "Code coverage measurement for Python"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -551,7 +534,6 @@ toml = ["tomli"]
 name = "cryptography"
 version = "41.0.3"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -595,41 +577,35 @@ test-randomorder = ["pytest-randomly"]
 
 [[package]]
 name = "debugpy"
-version = "1.7.0"
+version = "1.8.0"
 description = "An implementation of the Debug Adapter Protocol for Python"
-category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "debugpy-1.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:17ad9a681aca1704c55b9a5edcb495fa8f599e4655c9872b7f9cf3dc25890d48"},
-    {file = "debugpy-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1285920a3f9a75f5d1acf59ab1b9da9ae6eb9a05884cd7674f95170c9cafa4de"},
-    {file = "debugpy-1.7.0-cp310-cp310-win32.whl", hash = "sha256:a6f43a681c5025db1f1c0568069d1d1bad306a02e7c36144912b26d9c90e4724"},
-    {file = "debugpy-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e9571d831ad3c75b5fb6f3efcb71c471cf2a74ba84af6ac1c79ce00683bed4b"},
-    {file = "debugpy-1.7.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:538765a41198aa88cc089295b39c7322dd598f9ef1d52eaae12145c63bf9430a"},
-    {file = "debugpy-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7e8cf91f8f3f9b5fad844dd88427b85d398bda1e2a0cd65d5a21312fcbc0c6f"},
-    {file = "debugpy-1.7.0-cp311-cp311-win32.whl", hash = "sha256:18a69f8e142a716310dd0af6d7db08992aed99e2606108732efde101e7c65e2a"},
-    {file = "debugpy-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7515a5ba5ee9bfe956685909c5f28734c1cecd4ee813523363acfe3ca824883a"},
-    {file = "debugpy-1.7.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:bc8da67ade39d9e75608cdb8601d07e63a4e85966e0572c981f14e2cf42bcdef"},
-    {file = "debugpy-1.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5036e918c6ba8fc4c4f1fd0207d81db634431a02f0dc2ba51b12fd793c8c9de"},
-    {file = "debugpy-1.7.0-cp37-cp37m-win32.whl", hash = "sha256:d5be95b3946a4d7b388e45068c7b75036ac5a610f41014aee6cafcd5506423ad"},
-    {file = "debugpy-1.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0e90314a078d4e3f009520c8387aba8f74c3034645daa7a332a3d1bb81335756"},
-    {file = "debugpy-1.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:1565fd904f9571c430adca597771255cff4f92171486fced6f765dcbdfc8ec8d"},
-    {file = "debugpy-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6516f36a2e95b3be27f171f12b641e443863f4ad5255d0fdcea6ae0be29bb912"},
-    {file = "debugpy-1.7.0-cp38-cp38-win32.whl", hash = "sha256:2b0e489613bc066051439df04c56777ec184b957d6810cb65f235083aef7a0dc"},
-    {file = "debugpy-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:7bf0b4bbd841b2397b6a8de15da9227f1164f6d43ceee971c50194eaed930a9d"},
-    {file = "debugpy-1.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:ad22e1095b9977af432465c1e09132ba176e18df3834b1efcab1a449346b350b"},
-    {file = "debugpy-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f625e427f21423e5874139db529e18cb2966bdfcc1cb87a195538c5b34d163d1"},
-    {file = "debugpy-1.7.0-cp39-cp39-win32.whl", hash = "sha256:18bca8429d6632e2d3435055416d2d88f0309cc39709f4f6355c8d412cc61f24"},
-    {file = "debugpy-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:dc8a12ac8b97ef3d6973c6679a093138c7c9b03eb685f0e253269a195f651559"},
-    {file = "debugpy-1.7.0-py2.py3-none-any.whl", hash = "sha256:f6de2e6f24f62969e0f0ef682d78c98161c4dca29e9fb05df4d2989005005502"},
-    {file = "debugpy-1.7.0.zip", hash = "sha256:676911c710e85567b17172db934a71319ed9d995104610ce23fd74a07f66e6f6"},
+    {file = "debugpy-1.8.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:7fb95ca78f7ac43393cd0e0f2b6deda438ec7c5e47fa5d38553340897d2fbdfb"},
+    {file = "debugpy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef9ab7df0b9a42ed9c878afd3eaaff471fce3fa73df96022e1f5c9f8f8c87ada"},
+    {file = "debugpy-1.8.0-cp310-cp310-win32.whl", hash = "sha256:a8b7a2fd27cd9f3553ac112f356ad4ca93338feadd8910277aff71ab24d8775f"},
+    {file = "debugpy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:5d9de202f5d42e62f932507ee8b21e30d49aae7e46d5b1dd5c908db1d7068637"},
+    {file = "debugpy-1.8.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ef54404365fae8d45cf450d0544ee40cefbcb9cb85ea7afe89a963c27028261e"},
+    {file = "debugpy-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60009b132c91951354f54363f8ebdf7457aeb150e84abba5ae251b8e9f29a8a6"},
+    {file = "debugpy-1.8.0-cp311-cp311-win32.whl", hash = "sha256:8cd0197141eb9e8a4566794550cfdcdb8b3db0818bdf8c49a8e8f8053e56e38b"},
+    {file = "debugpy-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:a64093656c4c64dc6a438e11d59369875d200bd5abb8f9b26c1f5f723622e153"},
+    {file = "debugpy-1.8.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:b05a6b503ed520ad58c8dc682749113d2fd9f41ffd45daec16e558ca884008cd"},
+    {file = "debugpy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c6fb41c98ec51dd010d7ed650accfd07a87fe5e93eca9d5f584d0578f28f35f"},
+    {file = "debugpy-1.8.0-cp38-cp38-win32.whl", hash = "sha256:46ab6780159eeabb43c1495d9c84cf85d62975e48b6ec21ee10c95767c0590aa"},
+    {file = "debugpy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:bdc5ef99d14b9c0fcb35351b4fbfc06ac0ee576aeab6b2511702e5a648a2e595"},
+    {file = "debugpy-1.8.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:61eab4a4c8b6125d41a34bad4e5fe3d2cc145caecd63c3fe953be4cc53e65bf8"},
+    {file = "debugpy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:125b9a637e013f9faac0a3d6a82bd17c8b5d2c875fb6b7e2772c5aba6d082332"},
+    {file = "debugpy-1.8.0-cp39-cp39-win32.whl", hash = "sha256:57161629133113c97b387382045649a2b985a348f0c9366e22217c87b68b73c6"},
+    {file = "debugpy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:e3412f9faa9ade82aa64a50b602544efcba848c91384e9f93497a458767e6926"},
+    {file = "debugpy-1.8.0-py2.py3-none-any.whl", hash = "sha256:9c9b0ac1ce2a42888199df1a1906e45e6f3c9555497643a85e0bf2406e3ffbc4"},
+    {file = "debugpy-1.8.0.zip", hash = "sha256:12af2c55b419521e33d5fb21bd022df0b5eb267c3e178f1d374a63a2a6bdccd0"},
 ]
 
 [[package]]
 name = "distlib"
 version = "0.3.7"
 description = "Distribution utilities"
-category = "dev"
 optional = false
 python-versions = "*"
 files = [
@@ -641,7 +617,6 @@ files = [
 name = "django"
 version = "4.2.5"
 description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design."
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -663,7 +638,6 @@ bcrypt = ["bcrypt"]
 name = "django-autocomplete-light"
 version = "3.9.7"
 description = "Fresh autocompletes for Django"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -684,7 +658,6 @@ tags = ["django-taggit"]
 name = "django-cors-headers"
 version = "3.14.0"
 description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -699,7 +672,6 @@ Django = ">=3.2"
 name = "django-filter"
 version = "22.1"
 description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -714,7 +686,6 @@ Django = ">=3.2"
 name = "djangorestframework"
 version = "3.14.0"
 description = "Web APIs for Django, made easy."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -730,7 +701,6 @@ pytz = "*"
 name = "djangorestframework-camel-case"
 version = "1.4.2"
 description = "Camel case JSON support for Django REST framework."
-category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -741,7 +711,6 @@ files = [
 name = "drf-spectacular"
 version = "0.25.1"
 description = "Sane and flexible OpenAPI 3 schema generation for Django REST framework"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -765,7 +734,6 @@ sidecar = ["drf-spectacular-sidecar"]
 name = "elasticsearch"
 version = "7.8.1"
 description = "Python client for Elasticsearch"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
 files = [
@@ -787,7 +755,6 @@ requests = ["requests (>=2.4.0,<3.0.0)"]
 name = "fhirclient"
 version = "3.2.0"
 description = "A flexible client for FHIR servers supporting the SMART on FHIR protocol"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -801,28 +768,24 @@ requests = "*"
 
 [[package]]
 name = "filelock"
-version = "3.12.3"
+version = "3.12.4"
 description = "A platform independent file lock."
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"},
-    {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"},
+    {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"},
+    {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"},
 ]
 
-[package.dependencies]
-typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""}
-
 [package.extras]
 docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
 testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
+typing = ["typing-extensions (>=4.7.1)"]
 
 [[package]]
 name = "flake8"
 version = "6.1.0"
 description = "the modular source code checker: pep8 pyflakes and co"
-category = "dev"
 optional = false
 python-versions = ">=3.8.1"
 files = [
@@ -839,7 +802,6 @@ pyflakes = ">=3.1.0,<3.2.0"
 name = "fqdn"
 version = "1.5.1"
 description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4"
 files = [
@@ -851,7 +813,6 @@ files = [
 name = "frozenlist"
 version = "1.4.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -922,7 +883,6 @@ files = [
 name = "idna"
 version = "3.4"
 description = "Internationalized Domain Names in Applications (IDNA)"
-category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -934,7 +894,6 @@ files = [
 name = "importlib-resources"
 version = "6.0.1"
 description = "Read resources from Python packages"
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -953,7 +912,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)",
 name = "inflection"
 version = "0.5.1"
 description = "A port of Ruby on Rails inflector to Python"
-category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -965,7 +923,6 @@ files = [
 name = "isodate"
 version = "0.6.1"
 description = "An ISO 8601 date/time/duration parser and formatter"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -980,7 +937,6 @@ six = "*"
 name = "isoduration"
 version = "20.11.0"
 description = "Operations with ISO 8601 durations"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -995,18 +951,17 @@ arrow = ">=0.15.0"
 name = "jsonpointer"
 version = "2.4"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
 files = [
     {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"},
+    {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"},
 ]
 
 [[package]]
 name = "jsonschema"
 version = "4.17.3"
 description = "An implementation of JSON Schema validation for Python"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1036,7 +991,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
 name = "markupsafe"
 version = "2.1.3"
 description = "Safely add untrusted strings to HTML/XML markup."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1106,7 +1060,6 @@ files = [
 name = "mccabe"
 version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
-category = "dev"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1118,7 +1071,6 @@ files = [
 name = "multidict"
 version = "6.0.4"
 description = "multidict implementation"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1202,7 +1154,6 @@ files = [
 name = "packaging"
 version = "23.1"
 description = "Core utilities for Python packages"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1214,7 +1165,6 @@ files = [
 name = "pkgutil-resolve-name"
 version = "1.3.10"
 description = "Resolve a name to an object."
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1226,7 +1176,6 @@ files = [
 name = "platformdirs"
 version = "3.10.0"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1242,7 +1191,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co
 name = "pluggy"
 version = "1.3.0"
 description = "plugin and hook calling mechanisms for python"
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1258,7 +1206,6 @@ testing = ["pytest", "pytest-benchmark"]
 name = "psycopg2-binary"
 version = "2.9.7"
 description = "psycopg2 - Python-PostgreSQL Database Adapter"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1328,7 +1275,6 @@ files = [
 name = "pycodestyle"
 version = "2.11.0"
 description = "Python style guide checker"
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1340,7 +1286,6 @@ files = [
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -1352,7 +1297,6 @@ files = [
 name = "pyflakes"
 version = "3.1.0"
 description = "passive checker of Python programs"
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1364,7 +1308,6 @@ files = [
 name = "pyjwt"
 version = "2.8.0"
 description = "JSON Web Token implementation in Python"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1385,7 +1328,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 name = "pyparsing"
 version = "3.1.1"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
-category = "main"
 optional = false
 python-versions = ">=3.6.8"
 files = [
@@ -1400,7 +1342,6 @@ diagrams = ["jinja2", "railroad-diagrams"]
 name = "pyproject-api"
 version = "1.6.1"
 description = "API to interact with the python pyproject.toml based projects"
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1420,7 +1361,6 @@ testing = ["covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytes
 name = "pyrsistent"
 version = "0.19.3"
 description = "Persistent/Functional/Immutable data structures"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1457,7 +1397,6 @@ files = [
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
-category = "main"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
@@ -1472,7 +1411,6 @@ six = ">=1.5"
 name = "python-dotenv"
 version = "0.21.1"
 description = "Read key-value pairs from a .env file and set them as environment variables"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1487,7 +1425,6 @@ cli = ["click (>=5.0)"]
 name = "pytz"
 version = "2023.3.post1"
 description = "World timezone definitions, modern and historical"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1499,7 +1436,6 @@ files = [
 name = "pyyaml"
 version = "6.0.1"
 description = "YAML parser and emitter for Python"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1559,7 +1495,6 @@ files = [
 name = "rdflib"
 version = "6.3.2"
 description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information."
-category = "main"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
@@ -1581,7 +1516,6 @@ networkx = ["networkx (>=2.0.0,<3.0.0)"]
 name = "redis"
 version = "4.6.0"
 description = "Python client for Redis database and key-value store"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1600,7 +1534,6 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"
 name = "requests"
 version = "2.31.0"
 description = "Python HTTP for Humans."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1622,7 +1555,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 name = "rfc3339-validator"
 version = "0.1.4"
 description = "A pure python RFC3339 validator"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 files = [
@@ -1637,7 +1569,6 @@ six = "*"
 name = "rfc3987"
 version = "1.3.8"
 description = "Parsing and validation of URIs (RFC 3986) and IRIs (RFC 3987)"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1649,7 +1580,6 @@ files = [
 name = "six"
 version = "1.16.0"
 description = "Python 2 and 3 compatibility utilities"
-category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
@@ -1661,7 +1591,6 @@ files = [
 name = "sqlparse"
 version = "0.4.4"
 description = "A non-validating SQL parser."
-category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -1678,7 +1607,6 @@ test = ["pytest", "pytest-cov"]
 name = "strict-rfc3339"
 version = "0.7"
 description = "Strict, simple, lightweight RFC3339 functions"
-category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -1689,7 +1617,6 @@ files = [
 name = "tabulate"
 version = "0.9.0"
 description = "Pretty-print tabular data"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1704,7 +1631,6 @@ widechars = ["wcwidth"]
 name = "tomli"
 version = "2.0.1"
 description = "A lil' TOML parser"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1716,7 +1642,6 @@ files = [
 name = "tox"
 version = "4.11.3"
 description = "tox is a generic virtualenv management and test command line tool"
-category = "dev"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1744,7 +1669,6 @@ testing = ["build[virtualenv] (>=0.10)", "covdefaults (>=2.3)", "detect-test-pol
 name = "typing-extensions"
 version = "4.7.1"
 description = "Backported and Experimental Type Hints for Python 3.7+"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1756,7 +1680,6 @@ files = [
 name = "tzdata"
 version = "2023.3"
 description = "Provider of IANA time zone data"
-category = "main"
 optional = false
 python-versions = ">=2"
 files = [
@@ -1768,7 +1691,6 @@ files = [
 name = "uri-template"
 version = "1.3.0"
 description = "RFC 6570 URI Template Processor"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1783,7 +1705,6 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake
 name = "uritemplate"
 version = "4.1.1"
 description = "Implementation of RFC 6570 URI Templates"
-category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -1795,7 +1716,6 @@ files = [
 name = "urllib3"
 version = "2.0.4"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1813,7 +1733,6 @@ zstd = ["zstandard (>=0.18.0)"]
 name = "virtualenv"
 version = "20.24.5"
 description = "Virtual Python Environment builder"
-category = "dev"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1834,7 +1753,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess
 name = "webcolors"
 version = "1.13"
 description = "A library for working with the color formats defined by HTML and CSS."
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1850,7 +1768,6 @@ tests = ["pytest", "pytest-cov"]
 name = "werkzeug"
 version = "2.3.7"
 description = "The comprehensive WSGI web application library."
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1864,11 +1781,21 @@ MarkupSafe = ">=2.1.1"
 [package.extras]
 watchdog = ["watchdog (>=2.3)"]
 
+[[package]]
+name = "xmltodict"
+version = "0.13.0"
+description = "Makes working with XML feel like you are working with JSON"
+optional = false
+python-versions = ">=3.4"
+files = [
+    {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"},
+    {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
+]
+
 [[package]]
 name = "yarl"
 version = "1.9.2"
 description = "Yet another URL library"
-category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
@@ -1956,7 +1883,6 @@ multidict = ">=4.0"
 name = "zipp"
 version = "3.16.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
-category = "main"
 optional = false
 python-versions = ">=3.8"
 files = [
@@ -1971,4 +1897,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.1"
-content-hash = "c332f4a015b9d342e05f093fa68a17766ec676a714551c19c7e7ebae2e2930db"
+content-hash = "abe23a4eb83ecba4865f7fbccd5409ac2d79f46cc9306b3d3a61500c94702e61"

From 1390066c393d5dee495d7e22f406d3951b930381 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 16:26:45 +0000
Subject: [PATCH 07/25] code clean

---
 chord_metadata_service/experiments/schemas.py |  4 ++--
 chord_metadata_service/ontologies/__init__.py |  4 ++--
 chord_metadata_service/ontologies/utils.py    | 10 ++++++----
 chord_metadata_service/restapi/api_views.py   |  1 +
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 3b0206b3b..4a6bfb9d2 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -1,12 +1,12 @@
 from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
 from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
-from chord_metadata_service.ontologies import readXsdSimpleTypeValues
+from chord_metadata_service.ontologies import read_xsd_simple_type_values
 
 
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
-LIBRARY_STRATEGIES = readXsdSimpleTypeValues(
+LIBRARY_STRATEGIES = read_xsd_simple_type_values(
     'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml',
     'typeLibraryStrategy',
 )
diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py
index e48d1171f..64c3158e0 100644
--- a/chord_metadata_service/ontologies/__init__.py
+++ b/chord_metadata_service/ontologies/__init__.py
@@ -1,5 +1,5 @@
-from .utils import readXsdSimpleTypeValues
+from .utils import read_xsd_simple_type_values
 
 __all__ = [
-    "readXsdSimpleTypeValues",
+    "read_xsd_simple_type_values",
 ]
diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py
index 3b3644269..be5a7ca84 100644
--- a/chord_metadata_service/ontologies/utils.py
+++ b/chord_metadata_service/ontologies/utils.py
@@ -1,14 +1,16 @@
+from typing import List
 import xmltodict
 
 
-def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str):
+def read_xsd_simple_type_values(xsd_file_path: str, type_name: str) -> List[str]:
     """Reads an XML Schema Definition (XSD) file and returns a type's values.
     The XSD file is parsed using xmltodict following this spec:
     https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
     """
-    sra_file = open(xsd_file_path).read()
-    sra_experiment_data = xmltodict.parse(sra_file, namespaces={'xs': None})
-    simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]}
+    with open(xsd_file_path, 'r') as file:
+        xsd_file = file.read()
+    xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None})
+    simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]}
     target_type = simple_types[type_name]
     values = [val['@value'] for val in target_type['restriction']['enumeration']]
     return values
diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py
index 6a1151c2a..0586cbc1a 100644
--- a/chord_metadata_service/restapi/api_views.py
+++ b/chord_metadata_service/restapi/api_views.py
@@ -360,6 +360,7 @@ def public_overview(_request):
     get:
     Overview of all public data in the database
     """
+    
     if not settings.CONFIG_PUBLIC:
         return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
 

From d8d9a85e74ec819c97ca1a7cc3d6b300d8177b37 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 16:29:10 +0000
Subject: [PATCH 08/25] lint

---
 chord_metadata_service/restapi/api_views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py
index 0586cbc1a..6be37f377 100644
--- a/chord_metadata_service/restapi/api_views.py
+++ b/chord_metadata_service/restapi/api_views.py
@@ -360,7 +360,7 @@ def public_overview(_request):
     get:
     Overview of all public data in the database
     """
-    
+
     if not settings.CONFIG_PUBLIC:
         return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
 

From d030cb6760904e7b3c7d2a8cf1e9dfea7cbba1b8 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 20:09:56 +0000
Subject: [PATCH 09/25] read library selection, add doc

---
 README.md                                     | 15 +++++++++
 .../experiments/migrations/0009_v4_1_0.py     | 31 ++++++++++++++++---
 chord_metadata_service/experiments/schemas.py | 17 +++++++---
 chord_metadata_service/ontologies/__init__.py |  3 +-
 chord_metadata_service/ontologies/utils.py    |  9 +++++-
 pyproject.toml                                |  1 +
 6 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 208f0b891..3b22a3048 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,21 @@ Katsu Metadata Service is a service to store epigenomic metadata.
 7. Rest api service handles all generic functionality shared among other services
 
 
+## Schemas
+### Clinical Data
+
+Katsu implements the [Phenopacket V1.0.0](https://phenopacket-schema.readthedocs.io/en/1.0.0/) schema for clinical data.
+
+The schema definition for the phenopacket object is located in [chord_metadata_service/phenopackets/schemas.py](https://github.com/bento-platform/katsu/blob/4ab3c55d6052994ef69b188fb872261c47de24e0/chord_metadata_service/phenopackets/schemas.py#L336).
+
+### Experiments
+
+Katsu's experiments schemas are based on the IHEC [schema](https://github.com/IHEC/ihec-ecosystems/blob/master/docs/metadata/2.0/Ihec_metadata_specification.md#experiments), which is based on EBI/SRA schemas.
+
+The value options for `library_strategy` and `library_selection` are read from [chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml](./chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml), downloaded from the EBI's [SRA v1.5 database](http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/).
+
+The `SRA.experiment.xsd.xml` file is licensed under Apache License V2.0, the full copyright text is included in the file's header.
+
 ## REST API highlights
 
 * Swagger schema docs can be found 
diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
index 458f41c90..0a1e60580 100644
--- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
+++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
@@ -1,11 +1,32 @@
+from typing import List
 from django.db import migrations
 
+LIB_STRATEGY_CONVERSIONS: List[tuple[str, str]] = [
+    # Convert WES -> WXS ...
+    ("WES", "WXS"),
+    ("Other", "OTHER")
+]
 
-def set_experiment_library_strategy(apps, _schema_editor):
+LIB_SELECTION_CONVERIONS: List[tuple[str, str]] = [
+    ("Random", "RANDOM"),
+    ("Random PCR", "RANDOM PCR"),
+    ("Exome capture", "other"), # 'Exome capture' no longer supported
+    ("Other", "other"),
+]
+
+def set_experiment_library(apps, _schema_editor):
     Experiment = apps.get_model("experiments", "Experiment")
-    for exp in Experiment.objects.filter(library_strategy="WES"):
-        exp.library_strategy = "WXS"
-        exp.save()
+    for (old_val, new_val) in LIB_STRATEGY_CONVERSIONS:
+        # Modify library_strategy if necessary
+        for exp in Experiment.objects.filter(library_strategy=old_val):
+            exp.library_strategy = new_val
+            exp.save()
+    
+    for (old_val, new_val) in LIB_SELECTION_CONVERIONS:
+        # Modify library_selection if necessary
+        for exp in Experiment.objects.filter(library_selection=old_val):
+            exp.library_selection = new_val
+            exp.save()
 
 class Migration(migrations.Migration):
     dependencies = [
@@ -13,5 +34,5 @@ class Migration(migrations.Migration):
     ]
 
     operations = [
-        migrations.RunPython(set_experiment_library_strategy)
+        migrations.RunPython(set_experiment_library)
     ]
diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 4a6bfb9d2..225a58eee 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -1,14 +1,21 @@
 from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
 from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
 from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
-from chord_metadata_service.ontologies import read_xsd_simple_type_values
-
+from chord_metadata_service.ontologies import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME
 
 __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]
 
+# Experiment library strategy options are read from the EBI xsd file
 LIBRARY_STRATEGIES = read_xsd_simple_type_values(
-    'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml',
-    'typeLibraryStrategy',
+    SRA_EXPERIMENT_FILE_NAME,
+    "typeLibraryStrategy",
+)
+
+
+# Experiment library selection options are read from the EBI xsd file
+LIBRARY_SELECTION = read_xsd_simple_type_values(
+    SRA_EXPERIMENT_FILE_NAME,
+    "typeLibrarySelection",
 )
 
 EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
@@ -118,7 +125,7 @@
         },
         "library_selection": {
             "type": "string",
-            "enum": ["Random", "PCR", "Random PCR", "RT-PCR", "MF", "Exome capture", "Other"]
+            "enum": LIBRARY_SELECTION
         },
         "library_layout": {
             "type": "string",
diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py
index 64c3158e0..b05e2bc07 100644
--- a/chord_metadata_service/ontologies/__init__.py
+++ b/chord_metadata_service/ontologies/__init__.py
@@ -1,5 +1,6 @@
-from .utils import read_xsd_simple_type_values
+from .utils import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME
 
 __all__ = [
     "read_xsd_simple_type_values",
+    "SRA_EXPERIMENT_FILE_NAME",
 ]
diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py
index be5a7ca84..163bd690a 100644
--- a/chord_metadata_service/ontologies/utils.py
+++ b/chord_metadata_service/ontologies/utils.py
@@ -1,14 +1,21 @@
+import os
 from typing import List
 import xmltodict
+from pathlib import Path
 
+XSD_ONTOLOGIES_PATH = Path("chord_metadata_service/ontologies/xsd/")
+SRA_EXPERIMENT_FILE_NAME = "SRA.experiment.xsd.xml"
 
-def read_xsd_simple_type_values(xsd_file_path: str, type_name: str) -> List[str]:
+
+def read_xsd_simple_type_values(xsd_file_name: str, type_name: str) -> List[str]:
     """Reads an XML Schema Definition (XSD) file and returns a type's values.
     The XSD file is parsed using xmltodict following this spec:
     https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
     """
+    xsd_file_path = os.path.join(XSD_ONTOLOGIES_PATH, xsd_file_name)
     with open(xsd_file_path, 'r') as file:
         xsd_file = file.read()
+
     xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None})
     simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]}
     target_type = simple_types[type_name]
diff --git a/pyproject.toml b/pyproject.toml
index 48c607898..8ad54c29e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ include = [
     "chord_metadata_service/chord/tests/*.json",
     "chord_metadata_service/dats/*",
     "chord_metadata_service/mcode/tests/*.json",
+    "chord_metadata_service/ontologies/xsd/*.xml",
     "chord_metadata_service/restapi/tests/*.json",
 ]
 repository = "https://github.com/bento-platform/katsu"

From 48ff585b41939f3e7f6a0cb43ba17b8d8c9317b1 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 20:26:49 +0000
Subject: [PATCH 10/25] fix test data

---
 chord_metadata_service/chord/tests/example_experiment.json  | 4 ++--
 .../chord/tests/example_experiment_bad_biosample.json       | 4 ++--
 .../experiments/tests/example_experiments.json              | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/chord_metadata_service/chord/tests/example_experiment.json b/chord_metadata_service/chord/tests/example_experiment.json
index 8f6868db4..e263eeca1 100644
--- a/chord_metadata_service/chord/tests/example_experiment.json
+++ b/chord_metadata_service/chord/tests/example_experiment.json
@@ -13,7 +13,7 @@
       ],
       "library_strategy": "ChIP-Seq",
       "library_source": "Genomic",
-      "library_selection": "Random",
+      "library_selection": "RANDOM",
       "library_layout": "Single",
       "extraction_protocol": "NGS",
       "molecule": "genomic DNA",
@@ -75,4 +75,4 @@
       "url": "http://purl.obolibrary.org/obo/so.owl"
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json
index b1f051378..931f15521 100644
--- a/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json
+++ b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json
@@ -13,7 +13,7 @@
       ],
       "library_strategy": "ChIP-Seq",
       "library_source": "Genomic",
-      "library_selection": "Random",
+      "library_selection": "RANDOM",
       "library_layout": "Single",
       "extraction_protocol": "NGS",
       "molecule": "genomic DNA",
@@ -75,4 +75,4 @@
       "url": "http://purl.obolibrary.org/obo/so.owl"
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/chord_metadata_service/experiments/tests/example_experiments.json b/chord_metadata_service/experiments/tests/example_experiments.json
index e888df91b..a36023f59 100644
--- a/chord_metadata_service/experiments/tests/example_experiments.json
+++ b/chord_metadata_service/experiments/tests/example_experiments.json
@@ -13,7 +13,7 @@
       ],
       "library_strategy": "ChIP-Seq",
       "library_source": "Genomic",
-      "library_selection": "Random",
+      "library_selection": "RANDOM",
       "library_layout": "Single",
       "extraction_protocol": "NGS",
       "molecule": "genomic DNA",
@@ -77,7 +77,7 @@
       ],
       "library_strategy": "ChIP-Seq",
       "library_source": "Genomic",
-      "library_selection": "Random",
+      "library_selection": "RANDOM",
       "library_layout": "Single",
       "extraction_protocol": "NGS",
       "molecule": "genomic DNA",
@@ -139,4 +139,4 @@
       "url": "http://purl.obolibrary.org/obo/so.owl"
     }
   ]
-}
\ No newline at end of file
+}

From 2a93fe4a1f50419a41ad367d72662ebb9d5b5263 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 14 Sep 2023 20:30:59 +0000
Subject: [PATCH 11/25] fix migration type

---
 .../experiments/migrations/0009_v4_1_0.py                 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
index 0a1e60580..53fe3d4ca 100644
--- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
+++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
@@ -1,13 +1,13 @@
-from typing import List
+from typing import List, Tuple
 from django.db import migrations
 
-LIB_STRATEGY_CONVERSIONS: List[tuple[str, str]] = [
+LIB_STRATEGY_CONVERSIONS: List[Tuple[str, str]] = [
     # Convert WES -> WXS ...
     ("WES", "WXS"),
-    ("Other", "OTHER")
+    ("Other", "OTHER"),
 ]
 
-LIB_SELECTION_CONVERIONS: List[tuple[str, str]] = [
+LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [
     ("Random", "RANDOM"),
     ("Random PCR", "RANDOM PCR"),
     ("Exome capture", "other"), # 'Exome capture' no longer supported

From eb93de01c10802547afb96aae2f6af3bdcfd2cd3 Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Fri, 15 Sep 2023 14:43:01 +0000
Subject: [PATCH 12/25] migration fix, lint

---
 .../experiments/migrations/0009_v4_1_0.py                   | 4 +++-
 chord_metadata_service/ontologies/utils.py                  | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
index 53fe3d4ca..37fc2c25d 100644
--- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
+++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
@@ -10,10 +10,11 @@
 LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [
     ("Random", "RANDOM"),
     ("Random PCR", "RANDOM PCR"),
-    ("Exome capture", "other"), # 'Exome capture' no longer supported
+    ("Exome capture", "Hybrid Selection"), # 'Exome capture' no longer supported
     ("Other", "other"),
 ]
 
+
 def set_experiment_library(apps, _schema_editor):
     Experiment = apps.get_model("experiments", "Experiment")
     for (old_val, new_val) in LIB_STRATEGY_CONVERSIONS:
@@ -28,6 +29,7 @@ def set_experiment_library(apps, _schema_editor):
             exp.library_selection = new_val
             exp.save()
 
+
 class Migration(migrations.Migration):
     dependencies = [
         ('experiments', '0007_v4_0_0'),
diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py
index 163bd690a..36a28e9f8 100644
--- a/chord_metadata_service/ontologies/utils.py
+++ b/chord_metadata_service/ontologies/utils.py
@@ -13,11 +13,11 @@ def read_xsd_simple_type_values(xsd_file_name: str, type_name: str) -> List[str]
     https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
     """
     xsd_file_path = os.path.join(XSD_ONTOLOGIES_PATH, xsd_file_name)
-    with open(xsd_file_path, 'r') as file:
+    with open(xsd_file_path, "r") as file:
         xsd_file = file.read()
 
-    xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None})
+    xsd_data = xmltodict.parse(xsd_file, namespaces={"xs": None})
     simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]}
     target_type = simple_types[type_name]
-    values = [val['@value'] for val in target_type['restriction']['enumeration']]
+    values = [val["@value"] for val in target_type["restriction"]["enumeration"]]
     return values

From fae4540257fa14a7943f5113c0359df2c100a8a0 Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Mon, 18 Sep 2023 18:48:17 +0000
Subject: [PATCH 13/25] add validation error descriptions to IngestError

---
 .../chord/ingest/exceptions.py                | 24 ++++++++++++++++++-
 .../chord/ingest/experiments.py               | 20 +++++++++-------
 .../chord/ingest/phenopackets.py              | 10 ++++----
 chord_metadata_service/chord/ingest/schema.py |  4 ++--
 chord_metadata_service/chord/ingest/views.py  |  3 ++-
 .../chord/tests/test_ingest.py                | 20 ++++++++--------
 6 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index bea8da514..0211650c7 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -1,7 +1,29 @@
+from typing import List
+from jsonschema.exceptions import ValidationError
+
 __all__ = [
     "IngestError",
 ]
 
 
+def parse_validation_errors(errors: List[ValidationError]):
+    error_descriptions = {}
+    for error in errors:
+        field_path = ".".join(error.schema_path)
+        error_descriptions[field_path] = {
+            "faulty_value": error.instance,
+            "valid_options": error.validator_value,
+            "field_schema": error.schema,
+            "message": error.message,
+        }
+    return error_descriptions
+
+
 class IngestError(Exception):
-    pass
+
+    def __init__(self, schema_validation_errors=[], message="An error occured during ingestion."):
+
+        errors_descriptions = parse_validation_errors(schema_validation_errors)
+
+        self.validation_errors = errors_descriptions
+        self.message = message
diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index 4fbcd60a9..13dc340d7 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -56,12 +56,14 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
 
 def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
     # Validate experiment data against experiments schema.
-    validation = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
-    if not validation:
+    val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
+    if val_errors:
         # TODO: Report more precise errors
         raise IngestError(
-            f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} "
-            f"(check Katsu logs for more information)")
+            schema_validation_errors=val_errors,
+            message=f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} "
+                    f"(check Katsu logs for more information)"
+        )
 
 
 def ingest_experiment(
@@ -164,12 +166,14 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime
     # First, validate all experiment results with the schema before creating anything in the database.
 
     for idx, exp_result in enumerate(json_data):
-        validation = schema_validation(exp_result, EXPERIMENT_RESULT_SCHEMA)
-        if not validation:
+        val_errors = schema_validation(exp_result, EXPERIMENT_RESULT_SCHEMA)
+        if val_errors:
             # TODO: Report more precise errors
             raise IngestError(
-                f"Failed schema validation for experiment result {idx} "
-                f"(check Katsu logs for more information)")
+                schema_validation_errors=val_errors,
+                message=f"Failed schema validation for experiment result {idx} "
+                        f"(check Katsu logs for more information)"
+            )
 
     # If everything passes, perform the actual ingestion next.
 
diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py
index 690160c87..45bdff20e 100644
--- a/chord_metadata_service/chord/ingest/phenopackets.py
+++ b/chord_metadata_service/chord/ingest/phenopackets.py
@@ -60,12 +60,14 @@ def validate_phenopacket(phenopacket_data: dict[str, Any],
                          schema: dict = PHENOPACKET_SCHEMA,
                          idx: Optional[int] = None) -> None:
     # Validate phenopacket data against phenopackets schema.
-    validation = schema_validation(phenopacket_data, schema)
-    if not validation:
+    val_errors = schema_validation(phenopacket_data, schema)
+    if val_errors:
         # TODO: Report more precise errors
         raise IngestError(
-            f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} "
-            f"(check Katsu logs for more information)")
+            schema_validation_errors=val_errors,
+            message=f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} "
+                    f"(check Katsu logs for more information)"
+        )
 
 
 def update_or_create_subject(subject: dict) -> pm.Individual:
diff --git a/chord_metadata_service/chord/ingest/schema.py b/chord_metadata_service/chord/ingest/schema.py
index 538c5c4ed..7cb4fcc5a 100644
--- a/chord_metadata_service/chord/ingest/schema.py
+++ b/chord_metadata_service/chord/ingest/schema.py
@@ -10,10 +10,10 @@ def schema_validation(obj, schema):
     try:
         v.validate(obj)
         logger.info("JSON schema validation passed.")
-        return True
+        return None
     except jsonschema.exceptions.ValidationError:
         errors = [e for e in v.iter_errors(obj)]
         logger.info("JSON schema validation failed.")
         for i, error in enumerate(errors, 1):
             logger.error(f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}")
-        return False
+        return errors
diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index c89f8aad1..cf35f236b 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -46,7 +46,8 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
             WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id)
 
     except IngestError as e:
-        return Response(errors.bad_request_error(f"Encountered ingest error: {e}"), status=400)
+        # return Response(errors.bad_request_error(e.validation_errors), status=400)
+        return Response(errors.bad_request_error(f"Encountered ingest error: {e.validation_errors}"), status=400)
 
     except ValidationError as e:
         return Response(errors.bad_request_error(
diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py
index d93832186..dd0bf335b 100644
--- a/chord_metadata_service/chord/tests/test_ingest.py
+++ b/chord_metadata_service/chord/tests/test_ingest.py
@@ -168,22 +168,22 @@ def test_reingesting_updating_phenopackets_json(self):
     def test_phenopackets_validation(self):
         # check invalid phenopacket, must fail validation & validate_phenopacket must raise
 
-        validation = schema_validation(EXAMPLE_INGEST_INVALID_PHENOPACKET, PHENOPACKET_SCHEMA)
-        self.assertEqual(validation, False)
+        val_errs = schema_validation(EXAMPLE_INGEST_INVALID_PHENOPACKET, PHENOPACKET_SCHEMA)
+        self.assertTrue(len(val_errs) > 0)
         with self.assertRaises(IngestError):
             validate_phenopacket(EXAMPLE_INGEST_INVALID_PHENOPACKET)
         with self.assertRaises(IngestError):
             ingest_phenopacket(EXAMPLE_INGEST_INVALID_PHENOPACKET, "dummy", validate=True)
 
         # valid phenopacket passes validation & doesn't raise
-        validation_2 = schema_validation(EXAMPLE_INGEST_PHENOPACKET, PHENOPACKET_SCHEMA)
-        self.assertEqual(validation_2, True)
+        val_errors_2 = schema_validation(EXAMPLE_INGEST_PHENOPACKET, PHENOPACKET_SCHEMA)
+        self.assertEqual(val_errors_2, None)
         validate_phenopacket(EXAMPLE_INGEST_PHENOPACKET)
 
         # valid experiments pass validation
         for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]:
-            validation_3 = schema_validation(exp, EXPERIMENT_SCHEMA)
-            self.assertEqual(validation_3, True)
+            val_errors_3 = schema_validation(exp, EXPERIMENT_SCHEMA)
+            self.assertEqual(val_errors_3, None)
 
     def test_ingesting_experiments_json(self):
         # ingest phenopackets data in order to match to biosample ids
@@ -220,8 +220,8 @@ def test_ingesting_experiments_json(self):
     def test_ingesting_invalid_experiment_json(self):
         # check invalid experiment, must fail validation
         for exp in EXAMPLE_INGEST_INVALID_EXPERIMENT["experiments"]:
-            validation = schema_validation(exp, EXPERIMENT_SCHEMA)
-            self.assertEqual(validation, False)
+            val_errs = schema_validation(exp, EXPERIMENT_SCHEMA)
+            self.assertTrue(len(val_errs) > 0)
             with self.assertRaises(IngestError):
                 validate_experiment(exp)
             with self.assertRaises(IngestError):
@@ -229,8 +229,8 @@ def test_ingesting_invalid_experiment_json(self):
 
         # check valid experiment, must pass validation
         for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]:
-            validation_2 = schema_validation(exp, EXPERIMENT_SCHEMA)
-            self.assertEqual(validation_2, True)
+            val_errs_2 = schema_validation(exp, EXPERIMENT_SCHEMA)
+            self.assertEqual(val_errs_2, None)
 
     def test_ingesting_experiment_results_json(self):
         # ingest list of experiments

From 0c1cb1bb9b4997f1625663a0198bc256637021dc Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Mon, 18 Sep 2023 22:04:52 +0000
Subject: [PATCH 14/25] ingest response format

---
 .../chord/ingest/exceptions.py                | 11 ++--
 chord_metadata_service/chord/ingest/views.py  | 65 +++++++++++++++----
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index 0211650c7..859ec6011 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -7,21 +7,22 @@
 
 
 def parse_validation_errors(errors: List[ValidationError]):
-    error_descriptions = {}
+    error_descriptions = []
     for error in errors:
-        field_path = ".".join(error.schema_path)
-        error_descriptions[field_path] = {
+        schema_path = ".".join(error.schema_path)
+        error_descriptions.append({
+            "schema_path": schema_path,
             "faulty_value": error.instance,
             "valid_options": error.validator_value,
             "field_schema": error.schema,
             "message": error.message,
-        }
+        })
     return error_descriptions
 
 
 class IngestError(Exception):
 
-    def __init__(self, schema_validation_errors=[], message="An error occured during ingestion."):
+    def __init__(self, schema_validation_errors: List[ValidationError]=[], message="An error occured during ingestion."):
 
         errors_descriptions = parse_validation_errors(schema_validation_errors)
 
diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index cf35f236b..9ea6a20a7 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -10,6 +10,7 @@
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.permissions import AllowAny
 from rest_framework.response import Response
+from typing import List
 
 from bento_lib.schemas.bento import BENTO_INGEST_SCHEMA
 from bento_lib.responses import errors
@@ -26,18 +27,53 @@
 logger = logging.getLogger(__name__)
 
 
+class IngestResponseBuilder:
+
+    def __init__(self, workflow_id: str, dataset_id: str):
+        self.workflow_id = workflow_id
+        self.dataset_id = dataset_id
+        self.success = False
+        self.errors = [] 
+        self.warnings = []
+
+    def set_success(self, success: bool):
+        self.success = success
+
+    def add_error(self, error):
+        self.errors.append(error)
+
+    def add_errors(self, errors: List[any]):
+        self.errors.extend(errors)
+
+    def add_warning(self, warnings: List[any]):
+        self.warnings.extend(warnings)
+
+    def as_response(self, status_code: int):
+        body = {
+            "success": self.success,
+            "warnings": self.warnings,
+            "errors": self.errors,
+        }
+        logger.info(f"Finished {self.workflow_id} ingest request for dataset {self.dataset_id}", body)
+        return Response(body, status=status_code)
+
+
 @api_view(["POST"])
 @permission_classes([AllowAny])
 def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
     logger.info(f"Received a {workflow_id} ingest request for dataset {dataset_id}.")
 
+    response_builder = IngestResponseBuilder(workflow_id=workflow_id, dataset_id=dataset_id)
+
     # Check that the workflow exists
     if workflow_id not in WORKFLOW_INGEST_FUNCTION_MAP:
-        return Response(errors.bad_request_error(f"Ingestion workflow ID {workflow_id} does not exist"), status=400)
+        response_builder.add_error(f"Ingestion workflow ID {workflow_id} does not exist")
+        return response_builder.as_response(400)
 
     if dataset_id not in DATASET_ID_OVERRIDES:
         if not Dataset.objects.filter(identifier=dataset_id).exists():
-            return Response(errors.bad_request_error(f"Dataset with ID {dataset_id} does not exist"), status=400)
+            response_builder.add_error(f"Dataset with ID {dataset_id} does not exist")
+            return response_builder.as_response(400)
         dataset_id = str(uuid.UUID(dataset_id))  # Normalize dataset ID to UUID's str format.
 
     try:
@@ -46,18 +82,23 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
             WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id)
 
     except IngestError as e:
-        # return Response(errors.bad_request_error(e.validation_errors), status=400)
-        return Response(errors.bad_request_error(f"Encountered ingest error: {e.validation_errors}"), status=400)
+        if e.validation_errors:
+            response_builder.add_errors(e.validation_errors)
+        else:
+            response_builder.add_error(e.message)
+        return response_builder.as_response(400)
 
     except ValidationError as e:
-        return Response(errors.bad_request_error(
-            "Encountered validation errors during ingestion",
-            *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()),
-        ))
+        response_builder.add_errors(e.error_list if hasattr(e, "error_list") else e.error_dict.items())
+        return response_builder.as_response(400)
 
     except Exception as e:
         # Encountered some other error from the ingestion attempt, return a somewhat detailed message
-        logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}")
-        return Response(errors.internal_server_error(f"Encountered an exception while processing an ingest attempt "
-                                                     f"(error: {repr(e)}"), status=500)
-    return Response(status=204)
+        error_message = f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}"
+        logger.error(error_message)
+        response_builder.add_error(error_message)
+        return response_builder.as_response(500)
+    
+    # return Response(status=204)
+    response_builder.set_success(True)
+    return response_builder.as_response(204)

From 9f8ff0c0618c007f11c64a8cf33405bdb6d940c2 Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Tue, 19 Sep 2023 21:11:05 +0000
Subject: [PATCH 15/25] ingestion error responds with warnings on schema
 changes

---
 .../chord/ingest/exceptions.py                | 99 +++++++++++++++++--
 .../chord/ingest/experiments.py               |  3 +-
 chord_metadata_service/chord/ingest/views.py  | 28 +++---
 .../experiments/migrations/0009_v4_1_0.py     | 18 +---
 chord_metadata_service/experiments/schemas.py | 21 ++++
 5 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index 859ec6011..b40468dda 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -1,30 +1,111 @@
-from typing import List
+from typing import List, Optional
 from jsonschema.exceptions import ValidationError
+from chord_metadata_service import __version__
+from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES
+from chord_metadata_service.chord.data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET
 
 __all__ = [
     "IngestError",
 ]
 
 
-def parse_validation_errors(errors: List[ValidationError]):
+DATA_TYPE_SCHEMA_CHANGES = {
+    DATA_TYPE_EXPERIMENT: EXPERIMENT_SCHEMA_CHANGES,
+    DATA_TYPE_PHENOPACKET: None
+}
+
+
+def parse_validation_errors(errors: List[ValidationError]) -> Optional[List[dict]]:
+    """
+    Accepts a list of jsonschema ValidationError and converts them to a client error format.
+
+    Parameters:
+        errors (List[ValidationError]): errors raised by jsonschema during validation
+    Returns:
+        List[dict]:
+            dict:
+                schema_path (str): Schema path string (e.g "properties.library_strategy")
+                faulty_value (str | obj): The value at the schema_path causing the error
+                property_schema (dict): JSON schema of the property (includes valid options)
+                message (str): The ValidationError.message
+    """
     error_descriptions = []
     for error in errors:
         schema_path = ".".join(error.schema_path)
         error_descriptions.append({
             "schema_path": schema_path,
             "faulty_value": error.instance,
-            "valid_options": error.validator_value,
-            "field_schema": error.schema,
             "message": error.message,
+            "property_schema": error.schema,
         })
-    return error_descriptions
+    return error_descriptions if len(error_descriptions) else None
 
 
-class IngestError(Exception):
+def parse_property_warnings(data: dict, prop_name: str, property_changes: List[tuple]) -> Optional[dict]:
+    for (old_value, new_value) in property_changes:
+        value = data[prop_name]
+        property_warning = {
+                "property_name": prop_name,
+                "property_value": value,
+                "deprecated_value": old_value,
+                "suggested_replacement": new_value,
+        }
+
+        if value == old_value:
+            # Naive comparison for dicts
+            return property_warning
+
+        if isinstance(value, str) and isinstance(old_value, str):
+            # Lower case comparison for string values (JSON schema enum)
+            if value.lower() == old_value.lower():
+                return property_warning
+
+        # Only warn when mecessary
+        return None
+
 
-    def __init__(self, schema_validation_errors: List[ValidationError]=[], message="An error occured during ingestion."):
+def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]:
+    """
+    Schema warnings are issued on Katsu releases that include schema changes.
+    Warnings are returned to highlight schema changes that may be the root cause of an IngestionError.
+
+    Parameters:
+        data (dict): the data submitted for ingestion
+
+    Returns:
+        List[dict]:
+            dict:
+                property_name (str): The name of the property
+                property_value (str | dict)
+                deprecated_value (str | dict): The deprecated property option
+                suggested_replacement (str | dict): The new suggested property option
+    """
+    if not data or not schema:
+        return None
+
+    data_type = schema.get("$id", "").split(":")[-1]
+    applicable_changes = DATA_TYPE_SCHEMA_CHANGES.get(data_type, None)
+
+    if not applicable_changes or __version__ not in applicable_changes:
+        # Skip if data type's schema is not affected in current Katsu version
+        return None
+
+    warnings = []
+    for (prop_name, changes) in applicable_changes[__version__].get("properties", {}).items():
+        property_warning = parse_property_warnings(data=data, prop_name=prop_name, property_changes=changes)
+        if property_warning:
+            warnings.append(property_warning)
+    return warnings if len(warnings) else None
+
+
+class IngestError(Exception):
 
-        errors_descriptions = parse_validation_errors(schema_validation_errors)
+    def __init__(self,
+                 data: dict = None,
+                 schema: dict = None,
+                 schema_validation_errors: List[ValidationError] = [],
+                 message="An error occured during ingestion."):
 
-        self.validation_errors = errors_descriptions
+        self.validation_errors = parse_validation_errors(schema_validation_errors)
+        self.schema_warnings = parse_schema_warnings(data=data, schema=schema)
         self.message = message
diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index 13dc340d7..1437b4a09 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -58,8 +58,9 @@ def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
     # Validate experiment data against experiments schema.
     val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
     if val_errors:
-        # TODO: Report more precise errors
         raise IngestError(
+            data=experiment_data,
+            schema=EXPERIMENT_SCHEMA,
             schema_validation_errors=val_errors,
             message=f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} "
                     f"(check Katsu logs for more information)"
diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index 9ea6a20a7..e0c7a5aca 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -13,7 +13,6 @@
 from typing import List
 
 from bento_lib.schemas.bento import BENTO_INGEST_SCHEMA
-from bento_lib.responses import errors
 
 from . import WORKFLOW_INGEST_FUNCTION_MAP
 from .exceptions import IngestError
@@ -33,7 +32,7 @@ def __init__(self, workflow_id: str, dataset_id: str):
         self.workflow_id = workflow_id
         self.dataset_id = dataset_id
         self.success = False
-        self.errors = [] 
+        self.errors = []
         self.warnings = []
 
     def set_success(self, success: bool):
@@ -42,12 +41,21 @@ def set_success(self, success: bool):
     def add_error(self, error):
         self.errors.append(error)
 
-    def add_errors(self, errors: List[any]):
+    def add_errors(self, errors: List):
         self.errors.extend(errors)
 
-    def add_warning(self, warnings: List[any]):
+    def add_warning(self, warnings: List):
         self.warnings.extend(warnings)
 
+    def add_ingest_error(self, error: IngestError):
+        if error.validation_errors:
+            self.add_errors(error.validation_errors)
+        else:
+            self.add_error(error.message)
+
+        if error.schema_warnings:
+            self.warnings.extend(error.schema_warnings)
+
     def as_response(self, status_code: int):
         body = {
             "success": self.success,
@@ -82,10 +90,7 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
             WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id)
 
     except IngestError as e:
-        if e.validation_errors:
-            response_builder.add_errors(e.validation_errors)
-        else:
-            response_builder.add_error(e.message)
+        response_builder.add_ingest_error(e)
         return response_builder.as_response(400)
 
     except ValidationError as e:
@@ -94,11 +99,10 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
 
     except Exception as e:
         # Encountered some other error from the ingestion attempt, return a somewhat detailed message
-        error_message = f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}"
-        logger.error(error_message)
-        response_builder.add_error(error_message)
+        logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}")
+        response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})")
         return response_builder.as_response(500)
-    
+
     # return Response(status=204)
     response_builder.set_success(True)
     return response_builder.as_response(204)
diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
index 37fc2c25d..532935887 100644
--- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
+++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
@@ -1,18 +1,10 @@
 from typing import List, Tuple
 from django.db import migrations
+from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES
 
-LIB_STRATEGY_CONVERSIONS: List[Tuple[str, str]] = [
-    # Convert WES -> WXS ...
-    ("WES", "WXS"),
-    ("Other", "OTHER"),
-]
-
-LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [
-    ("Random", "RANDOM"),
-    ("Random PCR", "RANDOM PCR"),
-    ("Exome capture", "Hybrid Selection"), # 'Exome capture' no longer supported
-    ("Other", "other"),
-]
+V4_1_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["4.1.0"]["properties"]
+LIB_STRATEGY_CONVERSIONS = V4_1_0_PROPERTIES["library_strategy"]
+LIB_SELECTION_CONVERIONS = V4_1_0_PROPERTIES["library_selection"]
 
 
 def set_experiment_library(apps, _schema_editor):
@@ -22,7 +14,7 @@ def set_experiment_library(apps, _schema_editor):
         for exp in Experiment.objects.filter(library_strategy=old_val):
             exp.library_strategy = new_val
             exp.save()
-    
+
     for (old_val, new_val) in LIB_SELECTION_CONVERIONS:
         # Modify library_selection if necessary
         for exp in Experiment.objects.filter(library_selection=old_val):
diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 225a58eee..fe81c4cc7 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -155,3 +155,24 @@
     },
     "required": ["id", "experiment_type"]
 }, EXPERIMENT)
+
+
+"""
+Dictionary of schema changes for warnings.
+"""
+EXPERIMENT_SCHEMA_CHANGES = {
+    "4.1.0": {
+        "properties": {
+            "library_strategy": [
+                    ("WES", "WXS"),
+                    ("Other", "OTHER"),
+            ],
+            "library_selection": [
+                    ("Random", "RANDOM"),
+                    ("Random PCR", "RANDOM PCR"),
+                    ("Exome capture", "Hybrid Selection"),
+                    ("Other", "other"),
+            ]
+        }
+    }
+}

From 2194a080ffb34bb56e42ec375f49befad318eba1 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 17:11:31 +0000
Subject: [PATCH 16/25] update api ingestion tests

---
 .../chord/ingest/exceptions.py                |  10 +-
 .../chord/ingest/experiments.py               |   8 +-
 .../chord/ingest/phenopackets.py              |   6 +-
 chord_metadata_service/chord/ingest/views.py  |   8 +-
 .../tests/example_invalid_experiment.json     |   6 +-
 .../chord/tests/test_api_ingest.py            | 107 +++++++++++++++++-
 .../chord/workflows/wdls/experiments_json.wdl |   2 +-
 .../workflows/wdls/phenopackets_json.wdl      |   2 +-
 8 files changed, 124 insertions(+), 25 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index b40468dda..431e41242 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -79,6 +79,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]:
                 property_value (str | dict)
                 deprecated_value (str | dict): The deprecated property option
                 suggested_replacement (str | dict): The new suggested property option
+                version (str): The Katsu release version associated with the schema change
     """
     if not data or not schema:
         return None
@@ -91,10 +92,11 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]:
         return None
 
     warnings = []
-    for (prop_name, changes) in applicable_changes[__version__].get("properties", {}).items():
-        property_warning = parse_property_warnings(data=data, prop_name=prop_name, property_changes=changes)
-        if property_warning:
-            warnings.append(property_warning)
+    for (version, version_changes) in applicable_changes.items():
+        for (prop_name, changes) in version_changes.get("properties", {}).items():
+            if property_warning:= parse_property_warnings(data, prop_name, changes):
+                property_warning["version"] = version
+                warnings.append(property_warning)
     return warnings if len(warnings) else None
 
 
diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index 1437b4a09..03a7aaf3c 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -56,8 +56,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
 
 def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
     # Validate experiment data against experiments schema.
-    val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA)
-    if val_errors:
+    if val_errors:= schema_validation(experiment_data, EXPERIMENT_SCHEMA):
         raise IngestError(
             data=experiment_data,
             schema=EXPERIMENT_SCHEMA,
@@ -149,6 +148,11 @@ def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experimen
 
     exps = json_data.get("experiments", [])
 
+    if len(exps) == 0:
+        # If empty experiments array
+        # Validate an empty json to raise an IngestError with validation details
+        validate_experiment({})
+
     # First, validate all experiments with the schema before creating anything in the database.
     for idx, exp in enumerate(exps):
         validate_experiment(exp, idx)
diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py
index 45bdff20e..14b87f65c 100644
--- a/chord_metadata_service/chord/ingest/phenopackets.py
+++ b/chord_metadata_service/chord/ingest/phenopackets.py
@@ -60,10 +60,10 @@ def validate_phenopacket(phenopacket_data: dict[str, Any],
                          schema: dict = PHENOPACKET_SCHEMA,
                          idx: Optional[int] = None) -> None:
     # Validate phenopacket data against phenopackets schema.
-    val_errors = schema_validation(phenopacket_data, schema)
-    if val_errors:
-        # TODO: Report more precise errors
+    if val_errors:= schema_validation(phenopacket_data, schema):
         raise IngestError(
+            data=phenopacket_data,
+            schema=PHENOPACKET_SCHEMA,
             schema_validation_errors=val_errors,
             message=f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} "
                     f"(check Katsu logs for more information)"
diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index e0c7a5aca..2441d89e2 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -44,9 +44,6 @@ def add_error(self, error):
     def add_errors(self, errors: List):
         self.errors.extend(errors)
 
-    def add_warning(self, warnings: List):
-        self.warnings.extend(warnings)
-
     def add_ingest_error(self, error: IngestError):
         if error.validation_errors:
             self.add_errors(error.validation_errors)
@@ -56,7 +53,7 @@ def add_ingest_error(self, error: IngestError):
         if error.schema_warnings:
             self.warnings.extend(error.schema_warnings)
 
-    def as_response(self, status_code: int):
+    def as_response(self, status_code: int) -> Response:
         body = {
             "success": self.success,
             "warnings": self.warnings,
@@ -103,6 +100,5 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
         response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})")
         return response_builder.as_response(500)
 
-    # return Response(status=204)
     response_builder.set_success(True)
-    return response_builder.as_response(204)
+    return response_builder.as_response(201)
diff --git a/chord_metadata_service/chord/tests/example_invalid_experiment.json b/chord_metadata_service/chord/tests/example_invalid_experiment.json
index 437d238cf..0e47f9b72 100644
--- a/chord_metadata_service/chord/tests/example_invalid_experiment.json
+++ b/chord_metadata_service/chord/tests/example_invalid_experiment.json
@@ -11,9 +11,9 @@
           "label": "ChIP-seq"
         }
       ],
-      "library_strategy": "ChIP-Seq",
+      "library_strategy": "WES",
       "library_source": "Genomic",
-      "library_selection": "Random",
+      "library_selection": "random",
       "library_layout": "Single",
       "extraction_protocol": "NGS",
       "molecule": "genomic DNA",
@@ -75,4 +75,4 @@
       "url": "http://purl.obolibrary.org/obo/so.owl"
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py
index 53f50bcb2..15cbb2f10 100644
--- a/chord_metadata_service/chord/tests/test_api_ingest.py
+++ b/chord_metadata_service/chord/tests/test_api_ingest.py
@@ -3,6 +3,7 @@
 from django.urls import reverse
 from rest_framework import status
 from rest_framework.test import APITestCase
+from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
 
 from chord_metadata_service.restapi.tests.utils import load_local_json
 from .constants import VALID_PROJECT_1, valid_dataset_1
@@ -63,14 +64,20 @@ def test_phenopackets_ingest(self):
             reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json_invalid")),
             content_type="application/json",
         )
+        c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
 
         # No ingestion body
         r = self.client.post(
             reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")),
             content_type="application/json",
         )
+        c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
 
         # Bad ingestion body JSON
         r = self.client.post(
@@ -78,22 +85,112 @@ def test_phenopackets_ingest(self):
             content_type="application/json",
             data="\{\}\}",  # noqa: W605
         )
+        c = r.json()
         self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1) # 1 required property
 
         # Invalid phenopacket JSON validation
-        invalid_phenopacket = load_local_json("example_invalid_phenopacket.json")
         r = self.client.post(
             reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")),
             content_type="application/json",
-            data=json.dumps(invalid_phenopacket),
+            data=json.dumps(EXAMPLE_INGEST_INVALID_PHENOPACKET),
         )
+        c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 2)
 
         # Success
-        valid_phenopacket = load_local_json("example_phenopacket.json")
         r = self.client.post(
             reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")),
             content_type="application/json",
-            data=json.dumps(valid_phenopacket),
+            data=json.dumps(EXAMPLE_INGEST_PHENOPACKET),
         )
-        self.assertEqual(r.status_code, status.HTTP_204_NO_CONTENT)
+        c = r.json()
+        self.assertEqual(c["success"], True)
+        self.assertEqual(len(c["errors"]), 0)
+        self.assertEqual(len(c["warnings"]), 0)
+        self.assertEqual(r.status_code, status.HTTP_201_CREATED)
+
+
+    def test_experiments_ingest_failures(self):
+        # Invalid workflow ID
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json_invalid")),
+            content_type="application/json",
+        )
+        c = r.json()
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
+
+        # No ingestion body
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+        )
+        c = r.json()
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 2) # 2 required properties
+
+        # Bad ingestion body JSON
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+            data="\{\}\}",  # noqa: W605
+        )
+        c = r.json()
+        self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
+
+        # Invalid experiments JSON validation
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_INVALID_EXPERIMENT),
+        )
+        c = r.json()
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 4)
+        
+        # Two of the errors concern experiment schema changes
+        warnings = c["warnings"]
+        self.assertEqual(len(warnings), 2)
+        warned_properties = [schema_warning["property_name"] for schema_warning in warnings]
+        self.assertTrue("library_selection" in warned_properties)
+        self.assertTrue("library_strategy" in warned_properties)
+
+        # Biosample not present
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT),
+        )
+        c = r.json()
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
+        self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
+    
+    def test_experiment_ingest_success(self):
+        # Create the required phenopacket with a biosample first
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_PHENOPACKET),
+        )
+
+        # Ingest experiment
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT),
+        )
+        c = r.json()
+        self.assertEqual(c["success"], True)
+        self.assertEqual(len(c["errors"]), 0)
+        self.assertEqual(len(c["warnings"]), 0)
+        self.assertEqual(r.status_code, status.HTTP_201_CREATED)
diff --git a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
index 881545a0c..eeb26318a 100644
--- a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
+++ b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
@@ -37,7 +37,7 @@ task ingest_task {
             -H "Authorization: Bearer ~{token}" \
             --data "@~{json_document}" \
             "~{katsu_url}/ingest/~{dataset_id}/experiments_json")
-        if [[ "${RESPONSE}" != "204" ]]
+        if [[ "${RESPONSE}" != "201" ]]
         then
             echo "Error: Metadata service replied with ${RESPONSE}" 1>&2  # to stderr
             exit 1
diff --git a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
index f0e05c18d..f423fb673 100644
--- a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
+++ b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
@@ -37,7 +37,7 @@ task ingest_task {
             -H "Authorization: Bearer ~{token}" \
             --data "@~{json_document}" \
             "~{katsu_url}/ingest/~{dataset_id}/phenopackets_json")
-        if [[ "${RESPONSE}" != "204" ]]
+        if [[ "${RESPONSE}" != "201" ]]
         then
             echo "Error: Metadata service replied with ${RESPONSE}" 1>&2  # to stderr
             exit 1

From 948f6bcae3b9e97ee7b4727fbdcc1a01bc3ba8aa Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 17:14:39 +0000
Subject: [PATCH 17/25] lint

---
 chord_metadata_service/chord/ingest/exceptions.py  |  2 +-
 chord_metadata_service/chord/ingest/experiments.py |  2 +-
 .../chord/ingest/phenopackets.py                   |  2 +-
 .../chord/tests/test_api_ingest.py                 | 14 ++++++--------
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index 431e41242..954118629 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -94,7 +94,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]:
     warnings = []
     for (version, version_changes) in applicable_changes.items():
         for (prop_name, changes) in version_changes.get("properties", {}).items():
-            if property_warning:= parse_property_warnings(data, prop_name, changes):
+            if property_warning := parse_property_warnings(data, prop_name, changes):
                 property_warning["version"] = version
                 warnings.append(property_warning)
     return warnings if len(warnings) else None
diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index 03a7aaf3c..df9713547 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -56,7 +56,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult:
 
 def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
     # Validate experiment data against experiments schema.
-    if val_errors:= schema_validation(experiment_data, EXPERIMENT_SCHEMA):
+    if val_errors := schema_validation(experiment_data, EXPERIMENT_SCHEMA):
         raise IngestError(
             data=experiment_data,
             schema=EXPERIMENT_SCHEMA,
diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py
index 14b87f65c..133b71bf0 100644
--- a/chord_metadata_service/chord/ingest/phenopackets.py
+++ b/chord_metadata_service/chord/ingest/phenopackets.py
@@ -60,7 +60,7 @@ def validate_phenopacket(phenopacket_data: dict[str, Any],
                          schema: dict = PHENOPACKET_SCHEMA,
                          idx: Optional[int] = None) -> None:
     # Validate phenopacket data against phenopackets schema.
-    if val_errors:= schema_validation(phenopacket_data, schema):
+    if val_errors := schema_validation(phenopacket_data, schema):
         raise IngestError(
             data=phenopacket_data,
             schema=PHENOPACKET_SCHEMA,
diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py
index 15cbb2f10..ced50a5c0 100644
--- a/chord_metadata_service/chord/tests/test_api_ingest.py
+++ b/chord_metadata_service/chord/tests/test_api_ingest.py
@@ -3,9 +3,8 @@
 from django.urls import reverse
 from rest_framework import status
 from rest_framework.test import APITestCase
-from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
-
-from chord_metadata_service.restapi.tests.utils import load_local_json
+from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \
+    EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
 from .constants import VALID_PROJECT_1, valid_dataset_1
 from ..workflows.metadata import METADATA_WORKFLOWS
 
@@ -88,7 +87,7 @@ def test_phenopackets_ingest(self):
         c = r.json()
         self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
         self.assertEqual(c["success"], False)
-        self.assertEqual(len(c["errors"]), 1) # 1 required property
+        self.assertEqual(len(c["errors"]), 1)  # 1 required property
 
         # Invalid phenopacket JSON validation
         r = self.client.post(
@@ -113,7 +112,6 @@ def test_phenopackets_ingest(self):
         self.assertEqual(len(c["warnings"]), 0)
         self.assertEqual(r.status_code, status.HTTP_201_CREATED)
 
-
     def test_experiments_ingest_failures(self):
         # Invalid workflow ID
         r = self.client.post(
@@ -133,7 +131,7 @@ def test_experiments_ingest_failures(self):
         c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertEqual(c["success"], False)
-        self.assertEqual(len(c["errors"]), 2) # 2 required properties
+        self.assertEqual(len(c["errors"]), 2)  # 2 required properties
 
         # Bad ingestion body JSON
         r = self.client.post(
@@ -156,7 +154,7 @@ def test_experiments_ingest_failures(self):
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertEqual(c["success"], False)
         self.assertEqual(len(c["errors"]), 4)
-        
+
         # Two of the errors concern experiment schema changes
         warnings = c["warnings"]
         self.assertEqual(len(warnings), 2)
@@ -174,7 +172,7 @@ def test_experiments_ingest_failures(self):
         self.assertEqual(c["success"], False)
         self.assertEqual(len(c["errors"]), 1)
         self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
-    
+
     def test_experiment_ingest_success(self):
         # Create the required phenopacket with a biosample first
         r = self.client.post(

From db378d249986c3565cfeb19ec674dca87996cd97 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 18:36:41 +0000
Subject: [PATCH 18/25] add ingestion tests, exp workflow payload schema

---
 .../chord/ingest/exceptions.py                |  2 +-
 .../chord/ingest/experiments.py               | 23 ++++--
 chord_metadata_service/chord/ingest/views.py  | 13 ++--
 .../example_experiment bad_resource.json      | 78 +++++++++++++++++++
 .../chord/tests/example_ingest.py             |  1 +
 .../chord/tests/test_api_ingest.py            | 28 ++++++-
 chord_metadata_service/experiments/schemas.py | 21 +++++
 7 files changed, 150 insertions(+), 16 deletions(-)
 create mode 100644 chord_metadata_service/chord/tests/example_experiment bad_resource.json

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index 954118629..a359d36fe 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -60,7 +60,7 @@ def parse_property_warnings(data: dict, prop_name: str, property_changes: List[t
             if value.lower() == old_value.lower():
                 return property_warning
 
-        # Only warn when mecessary
+        # Only warn when necessary
         return None
 
 
diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index df9713547..183c668c7 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -4,7 +4,8 @@
 
 from chord_metadata_service.chord.models import Dataset
 from chord_metadata_service.experiments import models as em
-from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA
+from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, \
+    EXPERIMENT_RESULT_SCHEMA, EXPERIMENT_WORKFLOW_SCHEMA
 from chord_metadata_service.phenopackets import models as pm
 
 from typing import Optional
@@ -66,6 +67,16 @@ def validate_experiment(experiment_data, idx: Optional[int] = None) -> None:
         )
 
 
+def validate_experiment_workflow(json_data: dict) -> None:
+    if val_errors := schema_validation(json_data, EXPERIMENT_WORKFLOW_SCHEMA):
+        raise IngestError(
+            data=json_data,
+            schema=EXPERIMENT_WORKFLOW_SCHEMA,
+            schema_validation_errors=val_errors,
+            message="Failed schema validation for experiments ingestion workflow payload.",
+        )
+
+
 def ingest_experiment(
     experiment_data: dict,
     dataset_id: str,
@@ -141,6 +152,9 @@ def ingest_experiment(
 
 
 def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experiment]:
+    # First, validate the workflow's json_data
+    validate_experiment_workflow(json_data)
+
     dataset = Dataset.objects.get(identifier=dataset_id)
 
     for rs in json_data.get("resources", []):
@@ -148,12 +162,7 @@ def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experimen
 
     exps = json_data.get("experiments", [])
 
-    if len(exps) == 0:
-        # If empty experiments array
-        # Validate an empty json to raise an IngestError with validation details
-        validate_experiment({})
-
-    # First, validate all experiments with the schema before creating anything in the database.
+    # Second, validate all experiments with the schema before creating anything in the database.
     for idx, exp in enumerate(exps):
         validate_experiment(exp, idx)
 
diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index 2441d89e2..d587e216e 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -7,6 +7,7 @@
 from django.core.exceptions import ValidationError
 from django.db import transaction
 from jsonschema import Draft7Validator
+from rest_framework import status
 from rest_framework.decorators import api_view, permission_classes
 from rest_framework.permissions import AllowAny
 from rest_framework.response import Response
@@ -73,12 +74,12 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
     # Check that the workflow exists
     if workflow_id not in WORKFLOW_INGEST_FUNCTION_MAP:
         response_builder.add_error(f"Ingestion workflow ID {workflow_id} does not exist")
-        return response_builder.as_response(400)
+        return response_builder.as_response(status.HTTP_400_BAD_REQUEST)
 
     if dataset_id not in DATASET_ID_OVERRIDES:
         if not Dataset.objects.filter(identifier=dataset_id).exists():
             response_builder.add_error(f"Dataset with ID {dataset_id} does not exist")
-            return response_builder.as_response(400)
+            return response_builder.as_response(status.HTTP_400_BAD_REQUEST)
         dataset_id = str(uuid.UUID(dataset_id))  # Normalize dataset ID to UUID's str format.
 
     try:
@@ -88,17 +89,17 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
 
     except IngestError as e:
         response_builder.add_ingest_error(e)
-        return response_builder.as_response(400)
+        return response_builder.as_response(status.HTTP_400_BAD_REQUEST)
 
     except ValidationError as e:
         response_builder.add_errors(e.error_list if hasattr(e, "error_list") else e.error_dict.items())
-        return response_builder.as_response(400)
+        return response_builder.as_response(status.HTTP_400_BAD_REQUEST)
 
     except Exception as e:
         # Encountered some other error from the ingestion attempt, return a somewhat detailed message
         logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}")
         response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})")
-        return response_builder.as_response(500)
+        return response_builder.as_response(status.HTTP_500_INTERNAL_SERVER_ERROR)
 
     response_builder.set_success(True)
-    return response_builder.as_response(201)
+    return response_builder.as_response(status.HTTP_201_CREATED)
diff --git a/chord_metadata_service/chord/tests/example_experiment bad_resource.json b/chord_metadata_service/chord/tests/example_experiment bad_resource.json
new file mode 100644
index 000000000..910f42417
--- /dev/null
+++ b/chord_metadata_service/chord/tests/example_experiment bad_resource.json	
@@ -0,0 +1,78 @@
+{
+  "experiments": [
+    {
+      "id": "experiment:1",
+      "biosample": "sample1",
+      "study_type": "Epigenomics",
+      "experiment_type": "Other",
+      "experiment_ontology": [
+        {
+          "id": "http://www.ebi.ac.uk/efo/EFO_0002692",
+          "label": "ChIP-seq"
+        }
+      ],
+      "library_strategy": "ChIP-Seq",
+      "library_source": "Genomic",
+      "library_selection": "RANDOM",
+      "library_layout": "Single",
+      "extraction_protocol": "NGS",
+      "molecule": "genomic DNA",
+      "molecule_ontology": [
+        {
+          "id": "SO:0000991",
+          "label": "genomic DNA"
+        }
+      ],
+      "experiment_results": [
+        {
+          "identifier": "sample1_01",
+          "description": "test",
+          "filename": "sample1_01.vcf.gz",
+          "file_format": "VCF",
+          "data_output_type": "Derived data",
+          "usage": "Visualized",
+          "creation_date": "01-09-2021",
+          "created_by": "Admin",
+          "extra_properties": {
+            "test": "test"
+          }
+        },
+        {
+          "identifier": "sample1_02",
+          "description": "test2",
+          "filename": "sample1_02.vcf.gz",
+          "file_format": "CRAM",
+          "data_output_type": "Raw data",
+          "usage": "Visualized",
+          "creation_date": "01-09-2021",
+          "created_by": "Admin",
+          "extra_properties": {
+            "test": "test"
+          }
+        }
+      ],
+      "instrument": {
+        "identifier": "instrument:01",
+        "platform": "Illumina",
+        "description": "Test description",
+        "model": "Illumina HiSeq 4000",
+        "extra_properties": {
+          "date": "2021-06-21"
+        }
+      },
+      "extra_properties": {
+        "date_uploaded": "2021-03-16"
+      }
+    }
+  ],
+  "resources": [
+    {
+      "name": "Sequence types and features ontology",
+      "version": "THIS_VALUE_BREAKS_VALIDATION",
+      "namespace_prefix": "SO",
+      "id": "SO:2021-02-16",
+      "iri_prefix": "http://purl.obolibrary.org/obo/so.owl#",
+      "url": "http://purl.obolibrary.org/obo/so.owl"
+    }
+  ]
+}
diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py
index 00ea712b8..6e812db42 100644
--- a/chord_metadata_service/chord/tests/example_ingest.py
+++ b/chord_metadata_service/chord/tests/example_ingest.py
@@ -16,6 +16,7 @@
 
 EXAMPLE_INGEST_EXPERIMENT = load_local_json("example_experiment.json")
 EXAMPLE_INGEST_EXPERIMENT_BAD_BIOSAMPLE = load_local_json("example_experiment_bad_biosample.json")
+EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE = load_local_json("example_experiment bad_resource.json")
 
 EXAMPLE_INGEST_INVALID_EXPERIMENT = load_local_json("example_invalid_experiment.json")
 EXAMPLE_INGEST_EXPERIMENT_RESULT = load_local_json("example_derived_experiment_result.json")
diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py
index ced50a5c0..02addf6c3 100644
--- a/chord_metadata_service/chord/tests/test_api_ingest.py
+++ b/chord_metadata_service/chord/tests/test_api_ingest.py
@@ -1,10 +1,12 @@
 import json
+import uuid
 
 from django.urls import reverse
 from rest_framework import status
 from rest_framework.test import APITestCase
 from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \
-    EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
+    EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE, EXAMPLE_INGEST_INVALID_EXPERIMENT, \
+    EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
 from .constants import VALID_PROJECT_1, valid_dataset_1
 from ..workflows.metadata import METADATA_WORKFLOWS
 
@@ -131,7 +133,7 @@ def test_experiments_ingest_failures(self):
         c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertEqual(c["success"], False)
-        self.assertEqual(len(c["errors"]), 2)  # 2 required properties
+        self.assertEqual(len(c["errors"]), 1)
 
         # Bad ingestion body JSON
         r = self.client.post(
@@ -173,6 +175,28 @@ def test_experiments_ingest_failures(self):
         self.assertEqual(len(c["errors"]), 1)
         self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
 
+        # Wrong dataset id
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(uuid.uuid4(), "experiments_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT),
+        )
+        c = r.json()
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
+        # Invalid resource
+        r = self.client.post(
+            reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
+            content_type="application/json",
+            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE),
+        )
+        c = r.json()
+        self.assertEqual(c["success"], False)
+        self.assertEqual(len(c["errors"]), 1)
+        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+
     def test_experiment_ingest_success(self):
         # Create the required phenopacket with a biosample first
         r = self.client.post(
diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index fe81c4cc7..c288f3c07 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -157,6 +157,27 @@
 }, EXPERIMENT)
 
 
+EXPERIMENT_WORKFLOW_SCHEMA = {
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "$id": "katsu:experiments:experiment_workflow_schema",
+    "title": "Experiment workflow schema",
+    "description": "Schema that describes the shape \
+        of an experiment workflow ingestion",
+    "type": "object",
+    "properties": {
+        "experiments": {
+            "type": "array",
+            "items": {"type": "object"},
+            "minItems": 1,
+        },
+        "resources": {
+            "type": "array",
+            "items": {"type": "object"},
+        }
+    },
+    "required": ["experiments"]
+}
+
 """
 Dictionary of schema changes for warnings.
 """

From 881f792ca57e049d3d93c91be0d27cc52a9cc509 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 18:40:37 +0000
Subject: [PATCH 19/25] infer success from status code

---
 chord_metadata_service/chord/ingest/views.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index d587e216e..a6f669987 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -32,13 +32,9 @@ class IngestResponseBuilder:
     def __init__(self, workflow_id: str, dataset_id: str):
         self.workflow_id = workflow_id
         self.dataset_id = dataset_id
-        self.success = False
         self.errors = []
         self.warnings = []
 
-    def set_success(self, success: bool):
-        self.success = success
-
     def add_error(self, error):
         self.errors.append(error)
 
@@ -56,7 +52,7 @@ def add_ingest_error(self, error: IngestError):
 
     def as_response(self, status_code: int) -> Response:
         body = {
-            "success": self.success,
+            "success": status_code < status.HTTP_400_BAD_REQUEST,
             "warnings": self.warnings,
             "errors": self.errors,
         }

From b35997332ae496f1df1ec17ec3b79a1543a33d68 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 18:42:28 +0000
Subject: [PATCH 20/25] remove line call

---
 chord_metadata_service/chord/ingest/views.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py
index a6f669987..cb65e23dd 100644
--- a/chord_metadata_service/chord/ingest/views.py
+++ b/chord_metadata_service/chord/ingest/views.py
@@ -97,5 +97,4 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str):
         response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})")
         return response_builder.as_response(status.HTTP_500_INTERNAL_SERVER_ERROR)
 
-    response_builder.set_success(True)
     return response_builder.as_response(status.HTTP_201_CREATED)

From 9b58dc4efbeb5cc2b1dee0c36a508426bbab93a2 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 18:49:57 +0000
Subject: [PATCH 21/25] get ingestion warnings from derived experiment results
 ingestion

---
 chord_metadata_service/chord/ingest/experiments.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py
index 183c668c7..c852840ee 100644
--- a/chord_metadata_service/chord/ingest/experiments.py
+++ b/chord_metadata_service/chord/ingest/experiments.py
@@ -184,6 +184,8 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime
         if val_errors:
             # TODO: Report more precise errors
             raise IngestError(
+                data=exp_result,
+                schema=EXPERIMENT_RESULT_SCHEMA,
                 schema_validation_errors=val_errors,
                 message=f"Failed schema validation for experiment result {idx} "
                         f"(check Katsu logs for more information)"

From dd0f66c9fb0154c4277314dee51db0805527b208 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Wed, 20 Sep 2023 21:19:32 +0000
Subject: [PATCH 22/25] save ingest report to file and output

---
 .../chord/workflows/wdls/experiments_json.wdl        | 12 +++++++++---
 .../chord/workflows/wdls/phenopackets_json.wdl       |  8 +++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
index eeb26318a..b8a1427d6 100644
--- a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
+++ b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl
@@ -15,12 +15,14 @@ workflow experiments_json {
             json_document = json_document,
             katsu_url = katsu_url,
             dataset_id = dataset_id,
-            token = secret__access_token
+            token = secret__access_token,
+            ingest_report = "~{run_dir}/ingest_report.json"
     }
 
     output {
         File stdout = ingest_task.txt_output
         File stderr = ingest_task.err_output
+        File ingest_report = ingest_task.ingest_report
     }
 }
 
@@ -30,14 +32,17 @@ task ingest_task {
         String katsu_url
         String dataset_id
         String token
+        String ingest_report
     }
     command <<<
         RESPONSE=$(curl -X POST -k -s -w "%{http_code}" \
             -H "Content-Type: application/json" \
             -H "Authorization: Bearer ~{token}" \
             --data "@~{json_document}" \
-            "~{katsu_url}/ingest/~{dataset_id}/experiments_json")
-        if [[ "${RESPONSE}" != "201" ]]
+            -o "~{ingest_report}" \
+            "~{katsu_url}/ingest/~{dataset_id}/experiments_json" | jq)
+
+        if [[ "${RESPONSE}" != true ]]
         then
             echo "Error: Metadata service replied with ${RESPONSE}" 1>&2  # to stderr
             exit 1
@@ -48,5 +53,6 @@ task ingest_task {
     output {
         File txt_output = stdout()
         File err_output = stderr()
+        File ingest_report = "~{ingest_report}"
     }
 }
diff --git a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
index f423fb673..80c6d08d0 100644
--- a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
+++ b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl
@@ -15,12 +15,14 @@ workflow phenopackets_json {
             json_document = json_document,
             katsu_url = katsu_url,
             dataset_id = dataset_id,
-            token = secret__access_token
+            token = secret__access_token,
+            ingest_report = "~{run_dir}/ingest_report.json"
     }
 
     output {
         File stdout = ingest_task.txt_output
         File stderr = ingest_task.err_output
+        File stderr = ingest_task.err_output
     }
 }
 
@@ -30,13 +32,16 @@ task ingest_task {
         String katsu_url
         String dataset_id
         String token
+        String ingest_report
     }
     command <<<
         RESPONSE=$(curl -X POST -k -s -w "%{http_code}" \
             -H "Content-Type: application/json" \
             -H "Authorization: Bearer ~{token}" \
             --data "@~{json_document}" \
+            -o "~{ingest_report}" \
             "~{katsu_url}/ingest/~{dataset_id}/phenopackets_json")
+
         if [[ "${RESPONSE}" != "201" ]]
         then
             echo "Error: Metadata service replied with ${RESPONSE}" 1>&2  # to stderr
@@ -48,5 +53,6 @@ task ingest_task {
     output {
         File txt_output = stdout()
         File err_output = stderr()
+        File ingest_report = "~{ingest_report}"
     }
 }

From f9fa0f9930875a170eb5a273db248f84a25bebb2 Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 18 Jan 2024 16:30:23 -0500
Subject: [PATCH 23/25] fix migrations, lint

---
 chord_metadata_service/chord/ingest/schema.py                   | 2 --
 .../experiments/migrations/{0009_v4_1_0.py => 0010_v6_2_0.py}   | 2 +-
 chord_metadata_service/phenopackets/schemas.py                  | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)
 rename chord_metadata_service/experiments/migrations/{0009_v4_1_0.py => 0010_v6_2_0.py} (96%)

diff --git a/chord_metadata_service/chord/ingest/schema.py b/chord_metadata_service/chord/ingest/schema.py
index dbf531a64..577ad1e5a 100644
--- a/chord_metadata_service/chord/ingest/schema.py
+++ b/chord_metadata_service/chord/ingest/schema.py
@@ -1,4 +1,3 @@
-from ctypes import Array
 from jsonschema import Draft7Validator
 from jsonschema.exceptions import ValidationError
 
@@ -32,4 +31,3 @@ def schema_validation(obj, schema, registry=None):
         for i, error in enumerate(errors, 1):
             logger.error(f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}")
         return errors
-            
diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py
similarity index 96%
rename from chord_metadata_service/experiments/migrations/0009_v4_1_0.py
rename to chord_metadata_service/experiments/migrations/0010_v6_2_0.py
index 532935887..edadedcad 100644
--- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py
+++ b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py
@@ -24,7 +24,7 @@ def set_experiment_library(apps, _schema_editor):
 
 class Migration(migrations.Migration):
     dependencies = [
-        ('experiments', '0007_v4_0_0'),
+        ('experiments', '0009_v6_0_0'),
     ]
 
     operations = [
diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py
index 51be759b9..a5dc4d042 100644
--- a/chord_metadata_service/phenopackets/schemas.py
+++ b/chord_metadata_service/phenopackets/schemas.py
@@ -658,7 +658,7 @@
     "required": ["id", "meta_data"],
 }, descriptions.PHENOPACKET)
 
-VRS_REF_RESOURCE = Resource.from_contents(contents=vrs_schema_definitions, default_specification=DRAFT_07)
+VRS_REF_RESOURCE = Resource.from_contents(contents=vrs_schema_definitions)
 VRS_REF_REGISTRY = VRS_REF_RESOURCE @ Registry()
 
 resolver = VRS_REF_REGISTRY.resolver()

From 4148d36823c04432a7838ff6f98df1b0f0e701ea Mon Sep 17 00:00:00 2001
From: Victor Rocheleau <victor.rocheleau@mcgill.ca>
Date: Thu, 18 Jan 2024 16:59:49 -0500
Subject: [PATCH 24/25] schema changes version update

---
 chord_metadata_service/chord/ingest/exceptions.py           | 2 +-
 .../experiments/migrations/0010_v6_2_0.py                   | 6 +++---
 chord_metadata_service/experiments/schemas.py               | 2 +-
 pyproject.toml                                              | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py
index a359d36fe..7f362a715 100644
--- a/chord_metadata_service/chord/ingest/exceptions.py
+++ b/chord_metadata_service/chord/ingest/exceptions.py
@@ -84,7 +84,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]:
     if not data or not schema:
         return None
 
-    data_type = schema.get("$id", "").split(":")[-1]
+    data_type = schema.get("$id", "").split("/")[-1]
     applicable_changes = DATA_TYPE_SCHEMA_CHANGES.get(data_type, None)
 
     if not applicable_changes or __version__ not in applicable_changes:
diff --git a/chord_metadata_service/experiments/migrations/0010_v6_2_0.py b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py
index edadedcad..c8754cc66 100644
--- a/chord_metadata_service/experiments/migrations/0010_v6_2_0.py
+++ b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py
@@ -2,9 +2,9 @@
 from django.db import migrations
 from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES
 
-V4_1_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["4.1.0"]["properties"]
-LIB_STRATEGY_CONVERSIONS = V4_1_0_PROPERTIES["library_strategy"]
-LIB_SELECTION_CONVERIONS = V4_1_0_PROPERTIES["library_selection"]
+V6_2_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["6.2.0"]["properties"]
+LIB_STRATEGY_CONVERSIONS = V6_2_0_PROPERTIES["library_strategy"]
+LIB_SELECTION_CONVERIONS = V6_2_0_PROPERTIES["library_selection"]
 
 
 def set_experiment_library(apps, _schema_editor):
diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py
index 2c7724ef5..92b780ea0 100644
--- a/chord_metadata_service/experiments/schemas.py
+++ b/chord_metadata_service/experiments/schemas.py
@@ -187,7 +187,7 @@
 Dictionary of schema changes for warnings.
 """
 EXPERIMENT_SCHEMA_CHANGES = {
-    "4.1.0": {
+    "6.2.0": {
         "properties": {
             "library_strategy": [
                     ("WES", "WXS"),
diff --git a/pyproject.toml b/pyproject.toml
index e4d4cae3f..3160308fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "chord_metadata_service"  # can be renamed to katsu if inner module directory is renamed too
-version = "6.0.0"
+version = "6.2.0"
 description = "An implementation of a clin/pheno metadata store for the Bento platform."
 authors = [
     "Ksenia Zaytseva",

From bbef730d1db8042583083ce4d70d5e0071c6a929 Mon Sep 17 00:00:00 2001
From: v-rocheleau <victor.rocheleau@mcgill.ca>
Date: Fri, 19 Jan 2024 12:19:06 -0500
Subject: [PATCH 25/25] fix api ingest tests

---
 .../example_experiment bad_resource.json      | 78 -------------------
 .../chord/tests/example_ingest.py             |  1 -
 .../chord/tests/test_api_ingest.py            | 13 ++--
 3 files changed, 6 insertions(+), 86 deletions(-)
 delete mode 100644 chord_metadata_service/chord/tests/example_experiment bad_resource.json

diff --git a/chord_metadata_service/chord/tests/example_experiment bad_resource.json b/chord_metadata_service/chord/tests/example_experiment bad_resource.json
deleted file mode 100644
index 910f42417..000000000
--- a/chord_metadata_service/chord/tests/example_experiment bad_resource.json	
+++ /dev/null
@@ -1,78 +0,0 @@
-{
-  "experiments": [
-    {
-      "id": "experiment:1",
-      "biosample": "sample1",
-      "study_type": "Epigenomics",
-      "experiment_type": "Other",
-      "experiment_ontology": [
-        {
-          "id": "http://www.ebi.ac.uk/efo/EFO_0002692",
-          "label": "ChIP-seq"
-        }
-      ],
-      "library_strategy": "ChIP-Seq",
-      "library_source": "Genomic",
-      "library_selection": "RANDOM",
-      "library_layout": "Single",
-      "extraction_protocol": "NGS",
-      "molecule": "genomic DNA",
-      "molecule_ontology": [
-        {
-          "id": "SO:0000991",
-          "label": "genomic DNA"
-        }
-      ],
-      "experiment_results": [
-        {
-          "identifier": "sample1_01",
-          "description": "test",
-          "filename": "sample1_01.vcf.gz",
-          "file_format": "VCF",
-          "data_output_type": "Derived data",
-          "usage": "Visualized",
-          "creation_date": "01-09-2021",
-          "created_by": "Admin",
-          "extra_properties": {
-            "test": "test"
-          }
-        },
-        {
-          "identifier": "sample1_02",
-          "description": "test2",
-          "filename": "sample1_02.vcf.gz",
-          "file_format": "CRAM",
-          "data_output_type": "Raw data",
-          "usage": "Visualized",
-          "creation_date": "01-09-2021",
-          "created_by": "Admin",
-          "extra_properties": {
-            "test": "test"
-          }
-        }
-      ],
-      "instrument": {
-        "identifier": "instrument:01",
-        "platform": "Illumina",
-        "description": "Test description",
-        "model": "Illumina HiSeq 4000",
-        "extra_properties": {
-          "date": "2021-06-21"
-        }
-      },
-      "extra_properties": {
-        "date_uploaded": "2021-03-16"
-      }
-    }
-  ],
-  "resources": [
-    {
-      "name": "Sequence types and features ontology",
-      "version": "THIS_VALUE_BREAKS_VALIDATION",
-      "namespace_prefix": "SO",
-      "id": "SO:2021-02-16",
-      "iri_prefix": "http://purl.obolibrary.org/obo/so.owl#",
-      "url": "http://purl.obolibrary.org/obo/so.owl"
-    }
-  ]
-}
diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py
index 4511f6527..3d57e6024 100644
--- a/chord_metadata_service/chord/tests/example_ingest.py
+++ b/chord_metadata_service/chord/tests/example_ingest.py
@@ -16,7 +16,6 @@
 
 EXAMPLE_INGEST_EXPERIMENT = load_local_json("example_experiment.json")
 EXAMPLE_INGEST_EXPERIMENT_BAD_BIOSAMPLE = load_local_json("example_experiment_bad_biosample.json")
-EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE = load_local_json("example_experiment bad_resource.json")
 
 EXAMPLE_INGEST_INVALID_EXPERIMENT = load_local_json("example_invalid_experiment.json")
 EXAMPLE_INGEST_EXPERIMENT_RESULT = load_local_json("example_derived_experiment_result.json")
diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py
index e080d5ef5..e90bc4196 100644
--- a/chord_metadata_service/chord/tests/test_api_ingest.py
+++ b/chord_metadata_service/chord/tests/test_api_ingest.py
@@ -5,8 +5,7 @@
 from rest_framework import status
 from rest_framework.test import APITestCase
 from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \
-    EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE, EXAMPLE_INGEST_INVALID_EXPERIMENT, \
-    EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
+    EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET
 from .constants import VALID_PROJECT_1, valid_dataset_1
 from ..workflows.metadata import workflow_set, WORKFLOW_PHENOPACKETS_JSON
 
@@ -75,7 +74,7 @@ def test_phenopackets_ingest(self):
         c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertEqual(c["success"], False)
-        self.assertEqual(len(c["errors"]), 1)
+        self.assertEqual(len(c["errors"]), 2)  # 2 required properties missing: 'id' and 'meta_data'
 
         # Bad ingestion body JSON
         r = self.client.post(
@@ -97,7 +96,7 @@ def test_phenopackets_ingest(self):
         c = r.json()
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
         self.assertEqual(c["success"], False)
-        self.assertEqual(len(c["errors"]), 2)
+        self.assertEqual(len(c["errors"]), 1)  # missing required phenopacket ID
 
         # Success
         r = self.client.post(
@@ -183,16 +182,16 @@ def test_experiments_ingest_failures(self):
         self.assertEqual(len(c["errors"]), 1)
         self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
 
-        # Invalid resource
+        # Missing biosample
         r = self.client.post(
             reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")),
             content_type="application/json",
-            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE),
+            data=json.dumps(EXAMPLE_INGEST_EXPERIMENT),
         )
         c = r.json()
         self.assertEqual(c["success"], False)
         self.assertEqual(len(c["errors"]), 1)
-        self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST)
+        self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
 
     def test_experiment_ingest_success(self):
         # Create the required phenopacket with a biosample first