From e9cf81da2e8defbeead7ec9d10df70036a7429b8 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 13 Sep 2023 21:47:43 +0000 Subject: [PATCH 01/25] xml parsing --- chord_metadata_service/experiments/schemas.py | 6 +- .../ontologies/SRA.experiment.xsd.xml | 799 ++++++++++++++++++ chord_metadata_service/restapi/api_views.py | 9 +- pyproject.toml | 1 + 4 files changed, 813 insertions(+), 2 deletions(-) create mode 100644 chord_metadata_service/ontologies/SRA.experiment.xsd.xml diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index cbd58fb26..3ea8a9620 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,10 +1,14 @@ +import requests from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe - +import xmltodict __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] +sra_common_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.common.xsd') +sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd') +sra_experiment_data = xmltodict.parse(sra_experiment_response.content) EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/chord_metadata_service/ontologies/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/SRA.experiment.xsd.xml new file mode 100644 index 000000000..64d4931fa --- /dev/null +++ b/chord_metadata_service/ontologies/SRA.experiment.xsd.xml @@ -0,0 +1,799 @@ + + + + + + + + + Sequencing technique intended for this library. + + + + + Whole Genome Sequencing - random sequencing of the whole genome (see pubmed 10731132 for details) + + + + + + Whole Genome Amplification followed by random sequencing. (see pubmed 1631067,8962113 for details) + + + + + + Random sequencing of exonic regions selected from the genome. (see pubmed 20111037 for details) + + + + + + Random sequencing of whole transcriptome, also known as Whole Transcriptome Shotgun Sequencing, or WTSS). (see + pubmed 18611170 for details) + + + + + Strand-specific RNA sequencing. + + + + + + Single nucleus RNA sequencing is a method for profiling gene expression in cells which are difficult to isolate. + + + + + Micro RNA sequencing strategy designed to capture post-transcriptional RNA elements and include non-coding + functional elements. (see pubmed 21787409 for details) + + + + + Capture of other non-coding RNA types, including post-translation modification types such as snRNA (small + nuclear RNA) or snoRNA (small nucleolar RNA), or expression regulation types such as siRNA (small interfering RNA) or + piRNA/piwi/RNA (piwi-interacting RNA). + + + + + Full-length sequencing of cDNA templates + + + + + Single pass sequencing of cDNA templates + + + + + Chromosome Conformation Capture technique where a biotin-labeled nucleotide is incorporated at the ligation junction, enabling selective purification of chimeric DNA ligation junctions followed by deep sequencing. + + + + + Assay for Transposase-Accessible Chromatin (ATAC) strategy is used to study genome-wide chromatin accessibility. alternative method to DNase-seq that uses an engineered Tn5 transposase to cleave DNA and to integrate primer DNA sequences into the cleaved genomic DNA. + + + + + Random sequencing of a whole chromosome or other replicon isolated from a genome. + + + + + + Genomic clone based (hierarchical) sequencing. + + + + + Shotgun of pooled clones (usually BACs and Fosmids). + + + + + Sequencing of overlapping or distinct PCR or RT-PCR products. For example, metagenomic community profiling + using SSU rRNA . + + + + + Clone end (5', 3', or both) sequencing. + + + + + Sequencing intended to finish (close) gaps in existing coverage. + + + + + ChIP-seq, Chromatin ImmunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein. + + + + + Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached. + + + + + Sequencing of hypersensitive sites, or segments of open chromatin that are more readily cleaved by DNaseI. + + + + + + MethylC-seq. Sequencing following treatment of DNA with bisulfite to convert cytosine residues to uracil + depending on methylation status. + + + + + Concatenated Tag Sequencing + + + + + Methylation-Sensitive Restriction Enzyme Sequencing. + + + + + Methylated DNA Immunoprecipitation Sequencing. + + + + + Methyl CpG Binding Domain Sequencing. + + + + + Quantitatively determine fitness of bacterial genes based on how many times a purposely seeded transposon gets + inserted into each gene of a colony after some time. + + + + + CGHub special request: Independent experiment to re-evaluate putative variants. + + + + + Formaldehyde Assisted Isolation of Regulatory Elements. Reveals regions of open chromatin. + + + + + Systematic Evolution of Ligands by Exponential enrichment + + + + + Direct sequencing of RNA immunoprecipitates (includes CLIP-Seq, HITS-CLIP and PAR-CLIP). + + + + + Direct sequencing of proximity-ligated chromatin immunoprecipitates. + + + + + binning and barcoding of large DNA fragments to facilitate assembly of the fragment + + + + + Enrichment of a targeted subset of loci. + + + + + + Nucleosome Occupancy and Methylome sequencing. + + + + + ChIPmentation combines chromatin immunoprecipitation with sequencing library preparation by Tn5 transposase (see pubmed 26280331 for details) + + + + + Genotyping by sequencing is a method to discover single nucleotide polymorphisms for genotyping studies. + + + + + Ribosome profiling (also named ribosome footprinting) that uses specialized messenger RNA (mRNA) sequencing to determine which mRNAs are being actively translated. It produces a "global snapshot" of all the ribosomes active in a cell at a particular moment, known as a translatome. + + + + + Library strategy not listed. + + + + + + + + The LIBRARY_SOURCE specifies the type of source material that is being sequenced. + + + + + Genomic DNA (includes PCR products from genomic DNA). + + + + + + Transcription products or non genomic DNA (EST, cDNA, RT-PCR, screened libraries). + + + + + + Mixed material from metagenome. + + + + + Transcription products from community targets + + + + + Synthetic DNA. + + + + + Viral RNA. + + + + + Other, unspecified, or unknown library source material. + + + + + + + + Method used to enrich the target in the sequence library preparation + + + + + No Selection or Random selection + + + + + target enrichment via PCR + + + + + Source material was selected by randomly generated primers. + + + + + target enrichment via + + + + + Hypo-methylated partial restriction digest + + + + + Methyl Filtrated + + + + + Selection for less repetitive (and more gene rich) sequence through Cot filtration (CF) or other fractionation + techniques based on DNA kinetics. + + + + + Physical selection of size appropriate targets. + + + + + Methylation Spanning Linking Library + + + + + PolyA selection or enrichment for messenger RNA (mRNA); synonymize with PolyA + + + + + + + PolyA selection or enrichment for messenger RNA (mRNA); should replace cDNA enumeration. + + + + + enrichment of messenger RNA (mRNA) by hybridization to Oligo-dT. + + + + + depletion of ribosomal RNA by oligo hybridization. + + + + + depletion of ribosomal RNA by inverse oligo hybridization. + + + + + Chromatin immunoprecipitation + + + + + Chromatin immunoPrecipitation, reveals binding sites of specific proteins, typically transcription factors (TFs) using antibodies to extract DNA fragments bound to the target protein. + + + + + Identifies well-positioned nucleosomes. uses Micrococcal Nuclease (MNase) is an endo-exonuclease that processively digests DNA until an obstruction, such as a nucleosome, is reached. + + + + + DNase I endonuclease digestion and size selection reveals regions of chromatin where the DNA is highly sensitive to DNase I. + + + + + Selection by hybridization in array or solution. + + + + + Reproducible genomic subsets, often generated by restriction fragment size selection, containing a manageable + number of loci to facilitate re-sampling. + + + + + DNA fractionation using restriction enzymes. + + + + + Selection of methylated DNA fragments using an antibody raised against 5-methylcytosine or 5-methylcytidine + (m5C). + + + + + Enrichment by methyl-CpG binding domain. + + + + + Cap-analysis gene expression. + + + + + Rapid Amplification of cDNA Ends. + + + + + Multiple Displacement Amplification, a non-PCR based DNA amplification technique that amplifies a minute + quantifies of DNA to levels suitable for genomic analysis. + + + + + Targeted sequence capture protocol covering an arbitrary set of nonrepetitive genomics targets. An example is + capture bisulfite sequencing using padlock probes (BSPP). + + + + + Other library enrichment, screening, or selection process. + + + + + Library enrichment, screening, or selection is not specified. + + + + + + + + + + + + + + + + + + Assignment of read_group_tag to decoded read + + + + + + + + + + Label a sample within a scope of the pool + + + + + Proportion of this sample (in percent) that was included in sample pool. + + + + + + + + + + + + + Identifies a list of group/pool/multiplex sample members. This implies that + this sample record is a group, pool, or multiplex, but it continues to receive + its own accession and can be referenced by an experiment. By default if + no match to any of the listed members can be determined, then the default + sample reference is used. + + + + + + + Reference to the sample that is used when read membership cannot be determined. A default member should + be provided if there exists a possibility that some reads will be left over from barcode/MID resolution. A default member + is not needed when defining a true pool (where individual samples are not distinguished in the reads), or the reads have + been partitioned among the pool members (no leftovers). + + + + + Reference to the sample as determined from barcode/MID resolution or read partition. + + + + + + + + + + + + The LIBRARY_DESCRIPTOR specifies the origin of the material being + sequenced and any treatments that the material might have undergone that affect the + sequencing result. This specification is needed even if the platform does not + require a library construction step per se. + + + + + + The submitter's name for this library. + + + + + + + + + + LIBRARY_LAYOUT specifies whether to expect single, paired, or other configuration of reads. + In the case of paired reads, information about the relative distance and orientation is specified. + + + + + + + + + Reads are unpaired (usual case). + + + + + + + + + + + + + + + + + + Names the gene(s) or locus(loci) or other genomic feature(s) targeted by the sequence. + + + + + + + + + Reference to an archived primer or + probe set. Example: dbProbe + + + + + + + + + Bacterial small subunit ribosomal RNA, a locus used for + phylogenetic studies of bacteria and as a target for random target PCR in + environmental biodiversity screening. + + + + + Eukaryotic small subunit ribosomal RNA, a locus used for + phylogenetic studies of eukaryotes and as a target for random target PCR in + environmental biodiversity screening. + + + + + Structural ribosomal RNA for the large component, or large + subunit (LSU) of eukaryotic cytoplasmic ribosomes.. + + + + + RuBisCO large subunit : ribulose-1,5-bisphosphate + carboxylase/oxygenase large subunit, a locus used for phylogenetic studies + of plants. + + + + + Maturase K gene, a locus used for phylogenetic studies of + plants. + + + + + Mitochondrial cytochrome c oxidase 1 gene, a locus used for + phylogenetic studies of animals + + + + + Internal transcribed spacers 1 and 2 plus 5.8S rRNA region, + a locus used for phylogenetic studies of fungi. + + + + + All exonic regions of the genome. + + + + + Other locus, please describe. + + + + + + + + + Submitter supplied description of alternate locus and auxiliary + information. + + + + + + + + + + + + + + The optional pooling strategy indicates how the library or libraries are organized if multiple samples are involved. + + + + + + + + + + Free form text describing the protocol by which the sequencing library was constructed. + + + + + + + + + + Goal and setup of the individual library including library was constructed. + + + + + + Pick a sample to associate this experiment with. The sample may be an individual or a pool, + depending on how it is specified. + + + + + + The LIBRARY_DESCRIPTOR specifies the origin of the material being sequenced and any + treatments that the material might have undergone that affect the sequencing result. This specification is + needed even if the platform does not require a library construction step per se. + + + + + + The SPOT_DESCRIPTOR specifies how to decode the individual reads of interest from the + monolithic spot sequence. The spot descriptor contains aspects of the experimental design, platform, and + processing information. There will be two methods of specification: one will be an index into a table of + typical decodings, the other being an exact specification. This construct is needed for loading data and for + interpreting the loaded runs. It can be omitted if the loader can infer read layout (from multiple input + files or from one input files). + + + + + + + + + + + An Experiment specifies of what will be sequenced and how the sequencing will be performed. + It does not contain results. + An Experiment is composed of a design, a platform selection, and processing parameters. + + + + + + + + + + Short text that can be used to call out experiment records in searches or in displays. + This element is technically optional but should be used for all new records. + + + + + + + Identifies the parent study. + + + + + + + + + + + The library design including library properties, layout, protocol, targeting information, and spot and gap + descriptors. + + + + + + The PLATFORM record selects which sequencing platform and platform-specific runtime parameters. + This will be determined by the Center. + + + + + + + + + + Links to resources related to this experiment or experiment set (publication, datasets, online databases). + + + + + + + + + + + + + Properties and attributes of the experiment. These can be entered as free-form + tag-value pairs. + + + + + + + + + + + + + + + + + + + + + + + An EXPERMENT_SET is a container for a set of experiments and a common namespace. + + + + + + + \ No newline at end of file diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 6be37f377..3c48a92cb 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -5,6 +5,7 @@ from django.conf import settings from django.views.decorators.cache import cache_page +import requests from rest_framework.permissions import AllowAny from rest_framework.response import Response from rest_framework.decorators import api_view, permission_classes @@ -360,7 +361,13 @@ def public_overview(_request): get: Overview of all public data in the database """ - + import xmltodict + namespace = { 'xs': None } + sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd') + sra_experiment_data = xmltodict.parse(sra_experiment_response.content, namespaces=namespace) + for simple_type in sra_experiment_data['schema']['simpleType']: + simple_type['restriction']['enumeration'] + print(simple_type) if not settings.CONFIG_PUBLIC: return Response(settings.NO_PUBLIC_DATA_AVAILABLE) diff --git a/pyproject.toml b/pyproject.toml index e99aebd71..48c607898 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ strict-rfc3339 = "^0.7" tabulate = "^0.9.0" uritemplate = "^4.1.1" adrf = "^0.1.1" +xmltodict="0.13.0" [tool.poetry.group.dev.dependencies] coverage = "^6.5.0" From 2f765da94dca99a158798b9581807a3a6e2e7655 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 15:05:52 +0000 Subject: [PATCH 02/25] xsd ontologies utils --- chord_metadata_service/experiments/schemas.py | 9 ++++----- chord_metadata_service/ontologies/__init__.py | 5 +++++ .../ontologies/{ => sra}/SRA.experiment.xsd.xml | 0 chord_metadata_service/ontologies/utils.py | 13 +++++++++++++ chord_metadata_service/restapi/api_views.py | 7 ------- 5 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 chord_metadata_service/ontologies/__init__.py rename chord_metadata_service/ontologies/{ => sra}/SRA.experiment.xsd.xml (100%) create mode 100644 chord_metadata_service/ontologies/utils.py diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 3ea8a9620..85e695164 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -2,13 +2,12 @@ from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe -import xmltodict +from chord_metadata_service.ontologies import readXsdSimpleTypeValues + __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] -sra_common_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.common.xsd') -sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd') -sra_experiment_data = xmltodict.parse(sra_experiment_response.content) +LIBRARY_STRATEGIES = readXsdSimpleTypeValues('chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', 'typeLibraryStrategy') EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", @@ -108,7 +107,7 @@ "molecule_ontology": ONTOLOGY_CLASS_LIST, "library_strategy": { "type": "string", - "enum": ["Bisulfite-Seq", "RNA-Seq", "ChIP-Seq", "WES", "Other"] + "enum": LIBRARY_STRATEGIES }, "library_source": { "type": "string", diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py new file mode 100644 index 000000000..e48d1171f --- /dev/null +++ b/chord_metadata_service/ontologies/__init__.py @@ -0,0 +1,5 @@ +from .utils import readXsdSimpleTypeValues + +__all__ = [ + "readXsdSimpleTypeValues", +] diff --git a/chord_metadata_service/ontologies/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml similarity index 100% rename from chord_metadata_service/ontologies/SRA.experiment.xsd.xml rename to chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py new file mode 100644 index 000000000..6f70c402a --- /dev/null +++ b/chord_metadata_service/ontologies/utils.py @@ -0,0 +1,13 @@ +import xmltodict + +def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str): + """Reads an XML Schema Definition (XSD) file and returns a type's values. + The XSD file is parsed using xmltodict following this spec: + https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html + """ + sra_file = open(xsd_file_path).read() + sra_experiment_data = xmltodict.parse(sra_file, namespaces={ 'xs': None }) + simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]} + target_type = simple_types[type_name] + values = [val['@value'] for val in target_type['restriction']['enumeration']] + return values diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 3c48a92cb..065112257 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -361,13 +361,6 @@ def public_overview(_request): get: Overview of all public data in the database """ - import xmltodict - namespace = { 'xs': None } - sra_experiment_response = requests.get('http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/SRA.experiment.xsd') - sra_experiment_data = xmltodict.parse(sra_experiment_response.content, namespaces=namespace) - for simple_type in sra_experiment_data['schema']['simpleType']: - simple_type['restriction']['enumeration'] - print(simple_type) if not settings.CONFIG_PUBLIC: return Response(settings.NO_PUBLIC_DATA_AVAILABLE) From e479e4c0500b7a051771350a35b89df5f5e8eca3 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 15:07:57 +0000 Subject: [PATCH 03/25] lint --- chord_metadata_service/experiments/schemas.py | 6 ++++-- chord_metadata_service/ontologies/utils.py | 3 ++- chord_metadata_service/restapi/api_views.py | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 85e695164..95d237b1c 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,4 +1,3 @@ -import requests from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe @@ -7,7 +6,10 @@ __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] -LIBRARY_STRATEGIES = readXsdSimpleTypeValues('chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', 'typeLibraryStrategy') +LIBRARY_STRATEGIES = readXsdSimpleTypeValues( + 'chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', + 'typeLibraryStrategy', +) EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py index 6f70c402a..3b3644269 100644 --- a/chord_metadata_service/ontologies/utils.py +++ b/chord_metadata_service/ontologies/utils.py @@ -1,12 +1,13 @@ import xmltodict + def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str): """Reads an XML Schema Definition (XSD) file and returns a type's values. The XSD file is parsed using xmltodict following this spec: https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html """ sra_file = open(xsd_file_path).read() - sra_experiment_data = xmltodict.parse(sra_file, namespaces={ 'xs': None }) + sra_experiment_data = xmltodict.parse(sra_file, namespaces={'xs': None}) simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]} target_type = simple_types[type_name] values = [val['@value'] for val in target_type['restriction']['enumeration']] diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 065112257..6a1151c2a 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -5,7 +5,6 @@ from django.conf import settings from django.views.decorators.cache import cache_page -import requests from rest_framework.permissions import AllowAny from rest_framework.response import Response from rest_framework.decorators import api_view, permission_classes From 26becc31e2624d2d60c20746de34a2b3a08fbc14 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 15:35:54 +0000 Subject: [PATCH 04/25] experiment library strategy data migration --- .../experiments/migrations/0009_v4_1_0.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 chord_metadata_service/experiments/migrations/0009_v4_1_0.py diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py new file mode 100644 index 000000000..458f41c90 --- /dev/null +++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py @@ -0,0 +1,17 @@ +from django.db import migrations + + +def set_experiment_library_strategy(apps, _schema_editor): + Experiment = apps.get_model("experiments", "Experiment") + for exp in Experiment.objects.filter(library_strategy="WES"): + exp.library_strategy = "WXS" + exp.save() + +class Migration(migrations.Migration): + dependencies = [ + ('experiments', '0007_v4_0_0'), + ] + + operations = [ + migrations.RunPython(set_experiment_library_strategy) + ] From 762d23aaca6a04ba8607d3e41bab73559f4c3b7f Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 16:08:29 +0000 Subject: [PATCH 05/25] xsd dir name change --- chord_metadata_service/experiments/schemas.py | 2 +- .../ontologies/{sra => xsd}/SRA.experiment.xsd.xml | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename chord_metadata_service/ontologies/{sra => xsd}/SRA.experiment.xsd.xml (100%) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 95d237b1c..3b0206b3b 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -7,7 +7,7 @@ __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] LIBRARY_STRATEGIES = readXsdSimpleTypeValues( - 'chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml', + 'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml', 'typeLibraryStrategy', ) diff --git a/chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml b/chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml similarity index 100% rename from chord_metadata_service/ontologies/sra/SRA.experiment.xsd.xml rename to chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml From a32bd01f405816f170fc95c058aae513bca7513b Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 14 Sep 2023 12:17:35 -0400 Subject: [PATCH 06/25] fix poetry.lock --- poetry.lock | 150 +++++++++++++--------------------------------------- 1 file changed, 38 insertions(+), 112 deletions(-) diff --git a/poetry.lock b/poetry.lock index e2649a291..4ab834f22 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "adrf" version = "0.1.2" description = "Async support for Django REST framework" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -21,7 +20,6 @@ djangorestframework = ">=3.14.0" name = "aiohttp" version = "3.8.5" description = "Async http client/server framework (asyncio)" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -130,7 +128,6 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -145,7 +142,6 @@ frozenlist = ">=1.1.0" name = "arrow" version = "1.2.3" description = "Better dates & times for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -160,7 +156,6 @@ python-dateutil = ">=2.7.0" name = "asgiref" version = "3.7.2" description = "ASGI specs, helper code, and adapters" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -178,7 +173,6 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "async-property" version = "0.2.2" description = "Python decorator for async properties." -category = "main" optional = false python-versions = "*" files = [ @@ -190,7 +184,6 @@ files = [ name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -202,7 +195,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -221,7 +213,6 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "backports-zoneinfo" version = "0.2.1" description = "Backport of the standard library zoneinfo module" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -250,7 +241,6 @@ tzdata = ["tzdata"] name = "bento-lib" version = "6.1.0" description = "A set of common utilities and helpers for Bento platform services." -category = "main" optional = false python-versions = ">=3.8.1" files = [ @@ -278,7 +268,6 @@ quart = ["quart (>=0.18.4,<0.19)"] name = "cachetools" version = "5.3.1" description = "Extensible memoizing collections and decorators" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -290,7 +279,6 @@ files = [ name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -302,7 +290,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = false python-versions = "*" files = [ @@ -379,7 +366,6 @@ pycparser = "*" name = "chardet" version = "5.2.0" description = "Universal encoding detector for Python 3" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -391,7 +377,6 @@ files = [ name = "charset-normalizer" version = "3.2.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -476,7 +461,6 @@ files = [ name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -488,7 +472,6 @@ files = [ name = "coverage" version = "6.5.0" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -551,7 +534,6 @@ toml = ["tomli"] name = "cryptography" version = "41.0.3" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -595,41 +577,35 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "debugpy" -version = "1.7.0" +version = "1.8.0" description = "An implementation of the Debug Adapter Protocol for Python" -category = "dev" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "debugpy-1.7.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:17ad9a681aca1704c55b9a5edcb495fa8f599e4655c9872b7f9cf3dc25890d48"}, - {file = "debugpy-1.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1285920a3f9a75f5d1acf59ab1b9da9ae6eb9a05884cd7674f95170c9cafa4de"}, - {file = "debugpy-1.7.0-cp310-cp310-win32.whl", hash = "sha256:a6f43a681c5025db1f1c0568069d1d1bad306a02e7c36144912b26d9c90e4724"}, - {file = "debugpy-1.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:9e9571d831ad3c75b5fb6f3efcb71c471cf2a74ba84af6ac1c79ce00683bed4b"}, - {file = "debugpy-1.7.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:538765a41198aa88cc089295b39c7322dd598f9ef1d52eaae12145c63bf9430a"}, - {file = "debugpy-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7e8cf91f8f3f9b5fad844dd88427b85d398bda1e2a0cd65d5a21312fcbc0c6f"}, - {file = "debugpy-1.7.0-cp311-cp311-win32.whl", hash = "sha256:18a69f8e142a716310dd0af6d7db08992aed99e2606108732efde101e7c65e2a"}, - {file = "debugpy-1.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7515a5ba5ee9bfe956685909c5f28734c1cecd4ee813523363acfe3ca824883a"}, - {file = "debugpy-1.7.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:bc8da67ade39d9e75608cdb8601d07e63a4e85966e0572c981f14e2cf42bcdef"}, - {file = "debugpy-1.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5036e918c6ba8fc4c4f1fd0207d81db634431a02f0dc2ba51b12fd793c8c9de"}, - {file = "debugpy-1.7.0-cp37-cp37m-win32.whl", hash = "sha256:d5be95b3946a4d7b388e45068c7b75036ac5a610f41014aee6cafcd5506423ad"}, - {file = "debugpy-1.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0e90314a078d4e3f009520c8387aba8f74c3034645daa7a332a3d1bb81335756"}, - {file = "debugpy-1.7.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:1565fd904f9571c430adca597771255cff4f92171486fced6f765dcbdfc8ec8d"}, - {file = "debugpy-1.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6516f36a2e95b3be27f171f12b641e443863f4ad5255d0fdcea6ae0be29bb912"}, - {file = "debugpy-1.7.0-cp38-cp38-win32.whl", hash = "sha256:2b0e489613bc066051439df04c56777ec184b957d6810cb65f235083aef7a0dc"}, - {file = "debugpy-1.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:7bf0b4bbd841b2397b6a8de15da9227f1164f6d43ceee971c50194eaed930a9d"}, - {file = "debugpy-1.7.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:ad22e1095b9977af432465c1e09132ba176e18df3834b1efcab1a449346b350b"}, - {file = "debugpy-1.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f625e427f21423e5874139db529e18cb2966bdfcc1cb87a195538c5b34d163d1"}, - {file = "debugpy-1.7.0-cp39-cp39-win32.whl", hash = "sha256:18bca8429d6632e2d3435055416d2d88f0309cc39709f4f6355c8d412cc61f24"}, - {file = "debugpy-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:dc8a12ac8b97ef3d6973c6679a093138c7c9b03eb685f0e253269a195f651559"}, - {file = "debugpy-1.7.0-py2.py3-none-any.whl", hash = "sha256:f6de2e6f24f62969e0f0ef682d78c98161c4dca29e9fb05df4d2989005005502"}, - {file = "debugpy-1.7.0.zip", hash = "sha256:676911c710e85567b17172db934a71319ed9d995104610ce23fd74a07f66e6f6"}, + {file = "debugpy-1.8.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:7fb95ca78f7ac43393cd0e0f2b6deda438ec7c5e47fa5d38553340897d2fbdfb"}, + {file = "debugpy-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef9ab7df0b9a42ed9c878afd3eaaff471fce3fa73df96022e1f5c9f8f8c87ada"}, + {file = "debugpy-1.8.0-cp310-cp310-win32.whl", hash = "sha256:a8b7a2fd27cd9f3553ac112f356ad4ca93338feadd8910277aff71ab24d8775f"}, + {file = "debugpy-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:5d9de202f5d42e62f932507ee8b21e30d49aae7e46d5b1dd5c908db1d7068637"}, + {file = "debugpy-1.8.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ef54404365fae8d45cf450d0544ee40cefbcb9cb85ea7afe89a963c27028261e"}, + {file = "debugpy-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60009b132c91951354f54363f8ebdf7457aeb150e84abba5ae251b8e9f29a8a6"}, + {file = "debugpy-1.8.0-cp311-cp311-win32.whl", hash = "sha256:8cd0197141eb9e8a4566794550cfdcdb8b3db0818bdf8c49a8e8f8053e56e38b"}, + {file = "debugpy-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:a64093656c4c64dc6a438e11d59369875d200bd5abb8f9b26c1f5f723622e153"}, + {file = "debugpy-1.8.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:b05a6b503ed520ad58c8dc682749113d2fd9f41ffd45daec16e558ca884008cd"}, + {file = "debugpy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c6fb41c98ec51dd010d7ed650accfd07a87fe5e93eca9d5f584d0578f28f35f"}, + {file = "debugpy-1.8.0-cp38-cp38-win32.whl", hash = "sha256:46ab6780159eeabb43c1495d9c84cf85d62975e48b6ec21ee10c95767c0590aa"}, + {file = "debugpy-1.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:bdc5ef99d14b9c0fcb35351b4fbfc06ac0ee576aeab6b2511702e5a648a2e595"}, + {file = "debugpy-1.8.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:61eab4a4c8b6125d41a34bad4e5fe3d2cc145caecd63c3fe953be4cc53e65bf8"}, + {file = "debugpy-1.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:125b9a637e013f9faac0a3d6a82bd17c8b5d2c875fb6b7e2772c5aba6d082332"}, + {file = "debugpy-1.8.0-cp39-cp39-win32.whl", hash = "sha256:57161629133113c97b387382045649a2b985a348f0c9366e22217c87b68b73c6"}, + {file = "debugpy-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:e3412f9faa9ade82aa64a50b602544efcba848c91384e9f93497a458767e6926"}, + {file = "debugpy-1.8.0-py2.py3-none-any.whl", hash = "sha256:9c9b0ac1ce2a42888199df1a1906e45e6f3c9555497643a85e0bf2406e3ffbc4"}, + {file = "debugpy-1.8.0.zip", hash = "sha256:12af2c55b419521e33d5fb21bd022df0b5eb267c3e178f1d374a63a2a6bdccd0"}, ] [[package]] name = "distlib" version = "0.3.7" description = "Distribution utilities" -category = "dev" optional = false python-versions = "*" files = [ @@ -641,7 +617,6 @@ files = [ name = "django" version = "4.2.5" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -663,7 +638,6 @@ bcrypt = ["bcrypt"] name = "django-autocomplete-light" version = "3.9.7" description = "Fresh autocompletes for Django" -category = "main" optional = false python-versions = "*" files = [ @@ -684,7 +658,6 @@ tags = ["django-taggit"] name = "django-cors-headers" version = "3.14.0" description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -699,7 +672,6 @@ Django = ">=3.2" name = "django-filter" version = "22.1" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -714,7 +686,6 @@ Django = ">=3.2" name = "djangorestframework" version = "3.14.0" description = "Web APIs for Django, made easy." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -730,7 +701,6 @@ pytz = "*" name = "djangorestframework-camel-case" version = "1.4.2" description = "Camel case JSON support for Django REST framework." -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -741,7 +711,6 @@ files = [ name = "drf-spectacular" version = "0.25.1" description = "Sane and flexible OpenAPI 3 schema generation for Django REST framework" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -765,7 +734,6 @@ sidecar = ["drf-spectacular-sidecar"] name = "elasticsearch" version = "7.8.1" description = "Python client for Elasticsearch" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ @@ -787,7 +755,6 @@ requests = ["requests (>=2.4.0,<3.0.0)"] name = "fhirclient" version = "3.2.0" description = "A flexible client for FHIR servers supporting the SMART on FHIR protocol" -category = "main" optional = false python-versions = "*" files = [ @@ -801,28 +768,24 @@ requests = "*" [[package]] name = "filelock" -version = "3.12.3" +version = "3.12.4" description = "A platform independent file lock." -category = "dev" optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, - {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, + {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, + {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} - [package.extras] docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] +typing = ["typing-extensions (>=4.7.1)"] [[package]] name = "flake8" version = "6.1.0" description = "the modular source code checker: pep8 pyflakes and co" -category = "dev" optional = false python-versions = ">=3.8.1" files = [ @@ -839,7 +802,6 @@ pyflakes = ">=3.1.0,<3.2.0" name = "fqdn" version = "1.5.1" description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" -category = "main" optional = false python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" files = [ @@ -851,7 +813,6 @@ files = [ name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -922,7 +883,6 @@ files = [ name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -934,7 +894,6 @@ files = [ name = "importlib-resources" version = "6.0.1" description = "Read resources from Python packages" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -953,7 +912,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "inflection" version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -965,7 +923,6 @@ files = [ name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" -category = "main" optional = false python-versions = "*" files = [ @@ -980,7 +937,6 @@ six = "*" name = "isoduration" version = "20.11.0" description = "Operations with ISO 8601 durations" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -995,18 +951,17 @@ arrow = ">=0.15.0" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1036,7 +991,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1106,7 +1060,6 @@ files = [ name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1118,7 +1071,6 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1202,7 +1154,6 @@ files = [ name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1214,7 +1165,6 @@ files = [ name = "pkgutil-resolve-name" version = "1.3.10" description = "Resolve a name to an object." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1226,7 +1176,6 @@ files = [ name = "platformdirs" version = "3.10.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1242,7 +1191,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1258,7 +1206,6 @@ testing = ["pytest", "pytest-benchmark"] name = "psycopg2-binary" version = "2.9.7" description = "psycopg2 - Python-PostgreSQL Database Adapter" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1328,7 +1275,6 @@ files = [ name = "pycodestyle" version = "2.11.0" description = "Python style guide checker" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1340,7 +1286,6 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1352,7 +1297,6 @@ files = [ name = "pyflakes" version = "3.1.0" description = "passive checker of Python programs" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1364,7 +1308,6 @@ files = [ name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1385,7 +1328,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pyparsing" version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -1400,7 +1342,6 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pyproject-api" version = "1.6.1" description = "API to interact with the python pyproject.toml based projects" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1420,7 +1361,6 @@ testing = ["covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytes name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1457,7 +1397,6 @@ files = [ name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1472,7 +1411,6 @@ six = ">=1.5" name = "python-dotenv" version = "0.21.1" description = "Read key-value pairs from a .env file and set them as environment variables" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1487,7 +1425,6 @@ cli = ["click (>=5.0)"] name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -1499,7 +1436,6 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1559,7 +1495,6 @@ files = [ name = "rdflib" version = "6.3.2" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." -category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -1581,7 +1516,6 @@ networkx = ["networkx (>=2.0.0,<3.0.0)"] name = "redis" version = "4.6.0" description = "Python client for Redis database and key-value store" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1600,7 +1534,6 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1622,7 +1555,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rfc3339-validator" version = "0.1.4" description = "A pure python RFC3339 validator" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -1637,7 +1569,6 @@ six = "*" name = "rfc3987" version = "1.3.8" description = "Parsing and validation of URIs (RFC 3986) and IRIs (RFC 3987)" -category = "main" optional = false python-versions = "*" files = [ @@ -1649,7 +1580,6 @@ files = [ name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1661,7 +1591,6 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1678,7 +1607,6 @@ test = ["pytest", "pytest-cov"] name = "strict-rfc3339" version = "0.7" description = "Strict, simple, lightweight RFC3339 functions" -category = "main" optional = false python-versions = "*" files = [ @@ -1689,7 +1617,6 @@ files = [ name = "tabulate" version = "0.9.0" description = "Pretty-print tabular data" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1704,7 +1631,6 @@ widechars = ["wcwidth"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1716,7 +1642,6 @@ files = [ name = "tox" version = "4.11.3" description = "tox is a generic virtualenv management and test command line tool" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1744,7 +1669,6 @@ testing = ["build[virtualenv] (>=0.10)", "covdefaults (>=2.3)", "detect-test-pol name = "typing-extensions" version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1756,7 +1680,6 @@ files = [ name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -1768,7 +1691,6 @@ files = [ name = "uri-template" version = "1.3.0" description = "RFC 6570 URI Template Processor" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1783,7 +1705,6 @@ dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake name = "uritemplate" version = "4.1.1" description = "Implementation of RFC 6570 URI Templates" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1795,7 +1716,6 @@ files = [ name = "urllib3" version = "2.0.4" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1813,7 +1733,6 @@ zstd = ["zstandard (>=0.18.0)"] name = "virtualenv" version = "20.24.5" description = "Virtual Python Environment builder" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1834,7 +1753,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "webcolors" version = "1.13" description = "A library for working with the color formats defined by HTML and CSS." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1850,7 +1768,6 @@ tests = ["pytest", "pytest-cov"] name = "werkzeug" version = "2.3.7" description = "The comprehensive WSGI web application library." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1864,11 +1781,21 @@ MarkupSafe = ">=2.1.1" [package.extras] watchdog = ["watchdog (>=2.3)"] +[[package]] +name = "xmltodict" +version = "0.13.0" +description = "Makes working with XML feel like you are working with JSON" +optional = false +python-versions = ">=3.4" +files = [ + {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, + {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, +] + [[package]] name = "yarl" version = "1.9.2" description = "Yet another URL library" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1956,7 +1883,6 @@ multidict = ">=4.0" name = "zipp" version = "3.16.2" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1971,4 +1897,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "c332f4a015b9d342e05f093fa68a17766ec676a714551c19c7e7ebae2e2930db" +content-hash = "abe23a4eb83ecba4865f7fbccd5409ac2d79f46cc9306b3d3a61500c94702e61" From 1390066c393d5dee495d7e22f406d3951b930381 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 16:26:45 +0000 Subject: [PATCH 07/25] code clean --- chord_metadata_service/experiments/schemas.py | 4 ++-- chord_metadata_service/ontologies/__init__.py | 4 ++-- chord_metadata_service/ontologies/utils.py | 10 ++++++---- chord_metadata_service/restapi/api_views.py | 1 + 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 3b0206b3b..4a6bfb9d2 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,12 +1,12 @@ from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe -from chord_metadata_service.ontologies import readXsdSimpleTypeValues +from chord_metadata_service.ontologies import read_xsd_simple_type_values __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] -LIBRARY_STRATEGIES = readXsdSimpleTypeValues( +LIBRARY_STRATEGIES = read_xsd_simple_type_values( 'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml', 'typeLibraryStrategy', ) diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py index e48d1171f..64c3158e0 100644 --- a/chord_metadata_service/ontologies/__init__.py +++ b/chord_metadata_service/ontologies/__init__.py @@ -1,5 +1,5 @@ -from .utils import readXsdSimpleTypeValues +from .utils import read_xsd_simple_type_values __all__ = [ - "readXsdSimpleTypeValues", + "read_xsd_simple_type_values", ] diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py index 3b3644269..be5a7ca84 100644 --- a/chord_metadata_service/ontologies/utils.py +++ b/chord_metadata_service/ontologies/utils.py @@ -1,14 +1,16 @@ +from typing import List import xmltodict -def readXsdSimpleTypeValues(xsd_file_path: str, type_name: str): +def read_xsd_simple_type_values(xsd_file_path: str, type_name: str) -> List[str]: """Reads an XML Schema Definition (XSD) file and returns a type's values. The XSD file is parsed using xmltodict following this spec: https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html """ - sra_file = open(xsd_file_path).read() - sra_experiment_data = xmltodict.parse(sra_file, namespaces={'xs': None}) - simple_types = {sp["@name"]: sp for sp in sra_experiment_data["schema"]["simpleType"]} + with open(xsd_file_path, 'r') as file: + xsd_file = file.read() + xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None}) + simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]} target_type = simple_types[type_name] values = [val['@value'] for val in target_type['restriction']['enumeration']] return values diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 6a1151c2a..0586cbc1a 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -360,6 +360,7 @@ def public_overview(_request): get: Overview of all public data in the database """ + if not settings.CONFIG_PUBLIC: return Response(settings.NO_PUBLIC_DATA_AVAILABLE) From d8d9a85e74ec819c97ca1a7cc3d6b300d8177b37 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 16:29:10 +0000 Subject: [PATCH 08/25] lint --- chord_metadata_service/restapi/api_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 0586cbc1a..6be37f377 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -360,7 +360,7 @@ def public_overview(_request): get: Overview of all public data in the database """ - + if not settings.CONFIG_PUBLIC: return Response(settings.NO_PUBLIC_DATA_AVAILABLE) From d030cb6760904e7b3c7d2a8cf1e9dfea7cbba1b8 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 20:09:56 +0000 Subject: [PATCH 09/25] read library selection, add doc --- README.md | 15 +++++++++ .../experiments/migrations/0009_v4_1_0.py | 31 ++++++++++++++++--- chord_metadata_service/experiments/schemas.py | 17 +++++++--- chord_metadata_service/ontologies/__init__.py | 3 +- chord_metadata_service/ontologies/utils.py | 9 +++++- pyproject.toml | 1 + 6 files changed, 64 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 208f0b891..3b22a3048 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,21 @@ Katsu Metadata Service is a service to store epigenomic metadata. 7. Rest api service handles all generic functionality shared among other services +## Schemas +### Clinical Data + +Katsu implements the [Phenopacket V1.0.0](https://phenopacket-schema.readthedocs.io/en/1.0.0/) schema for clinical data. + +The schema definition for the phenopacket object is located in [chord_metadata_service/phenopackets/schemas.py](https://github.com/bento-platform/katsu/blob/4ab3c55d6052994ef69b188fb872261c47de24e0/chord_metadata_service/phenopackets/schemas.py#L336). + +### Experiments + +Katsu's experiments schemas are based on the IHEC [schema](https://github.com/IHEC/ihec-ecosystems/blob/master/docs/metadata/2.0/Ihec_metadata_specification.md#experiments), which is based on EBI/SRA schemas. + +The value options for `library_strategy` and `library_selection` are read from [chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml](./chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml), downloaded from the EBI's [SRA v1.5 database](http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/). + +The `SRA.experiment.xsd.xml` file is licensed under Apache License V2.0, the full copyright text is included in the file's header. + ## REST API highlights * Swagger schema docs can be found diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py index 458f41c90..0a1e60580 100644 --- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py +++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py @@ -1,11 +1,32 @@ +from typing import List from django.db import migrations +LIB_STRATEGY_CONVERSIONS: List[tuple[str, str]] = [ + # Convert WES -> WXS ... + ("WES", "WXS"), + ("Other", "OTHER") +] -def set_experiment_library_strategy(apps, _schema_editor): +LIB_SELECTION_CONVERIONS: List[tuple[str, str]] = [ + ("Random", "RANDOM"), + ("Random PCR", "RANDOM PCR"), + ("Exome capture", "other"), # 'Exome capture' no longer supported + ("Other", "other"), +] + +def set_experiment_library(apps, _schema_editor): Experiment = apps.get_model("experiments", "Experiment") - for exp in Experiment.objects.filter(library_strategy="WES"): - exp.library_strategy = "WXS" - exp.save() + for (old_val, new_val) in LIB_STRATEGY_CONVERSIONS: + # Modify library_strategy if necessary + for exp in Experiment.objects.filter(library_strategy=old_val): + exp.library_strategy = new_val + exp.save() + + for (old_val, new_val) in LIB_SELECTION_CONVERIONS: + # Modify library_selection if necessary + for exp in Experiment.objects.filter(library_selection=old_val): + exp.library_selection = new_val + exp.save() class Migration(migrations.Migration): dependencies = [ @@ -13,5 +34,5 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython(set_experiment_library_strategy) + migrations.RunPython(set_experiment_library) ] diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 4a6bfb9d2..225a58eee 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,14 +1,21 @@ from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe -from chord_metadata_service.ontologies import read_xsd_simple_type_values - +from chord_metadata_service.ontologies import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME __all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"] +# Experiment library strategy options are read from the EBI xsd file LIBRARY_STRATEGIES = read_xsd_simple_type_values( - 'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml', - 'typeLibraryStrategy', + SRA_EXPERIMENT_FILE_NAME, + "typeLibraryStrategy", +) + + +# Experiment library selection options are read from the EBI xsd file +LIBRARY_SELECTION = read_xsd_simple_type_values( + SRA_EXPERIMENT_FILE_NAME, + "typeLibrarySelection", ) EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({ @@ -118,7 +125,7 @@ }, "library_selection": { "type": "string", - "enum": ["Random", "PCR", "Random PCR", "RT-PCR", "MF", "Exome capture", "Other"] + "enum": LIBRARY_SELECTION }, "library_layout": { "type": "string", diff --git a/chord_metadata_service/ontologies/__init__.py b/chord_metadata_service/ontologies/__init__.py index 64c3158e0..b05e2bc07 100644 --- a/chord_metadata_service/ontologies/__init__.py +++ b/chord_metadata_service/ontologies/__init__.py @@ -1,5 +1,6 @@ -from .utils import read_xsd_simple_type_values +from .utils import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME __all__ = [ "read_xsd_simple_type_values", + "SRA_EXPERIMENT_FILE_NAME", ] diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py index be5a7ca84..163bd690a 100644 --- a/chord_metadata_service/ontologies/utils.py +++ b/chord_metadata_service/ontologies/utils.py @@ -1,14 +1,21 @@ +import os from typing import List import xmltodict +from pathlib import Path +XSD_ONTOLOGIES_PATH = Path("chord_metadata_service/ontologies/xsd/") +SRA_EXPERIMENT_FILE_NAME = "SRA.experiment.xsd.xml" -def read_xsd_simple_type_values(xsd_file_path: str, type_name: str) -> List[str]: + +def read_xsd_simple_type_values(xsd_file_name: str, type_name: str) -> List[str]: """Reads an XML Schema Definition (XSD) file and returns a type's values. The XSD file is parsed using xmltodict following this spec: https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html """ + xsd_file_path = os.path.join(XSD_ONTOLOGIES_PATH, xsd_file_name) with open(xsd_file_path, 'r') as file: xsd_file = file.read() + xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None}) simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]} target_type = simple_types[type_name] diff --git a/pyproject.toml b/pyproject.toml index 48c607898..8ad54c29e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ include = [ "chord_metadata_service/chord/tests/*.json", "chord_metadata_service/dats/*", "chord_metadata_service/mcode/tests/*.json", + "chord_metadata_service/ontologies/xsd/*.xml", "chord_metadata_service/restapi/tests/*.json", ] repository = "https://github.com/bento-platform/katsu" From 48ff585b41939f3e7f6a0cb43ba17b8d8c9317b1 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 20:26:49 +0000 Subject: [PATCH 10/25] fix test data --- chord_metadata_service/chord/tests/example_experiment.json | 4 ++-- .../chord/tests/example_experiment_bad_biosample.json | 4 ++-- .../experiments/tests/example_experiments.json | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/chord_metadata_service/chord/tests/example_experiment.json b/chord_metadata_service/chord/tests/example_experiment.json index 8f6868db4..e263eeca1 100644 --- a/chord_metadata_service/chord/tests/example_experiment.json +++ b/chord_metadata_service/chord/tests/example_experiment.json @@ -13,7 +13,7 @@ ], "library_strategy": "ChIP-Seq", "library_source": "Genomic", - "library_selection": "Random", + "library_selection": "RANDOM", "library_layout": "Single", "extraction_protocol": "NGS", "molecule": "genomic DNA", @@ -75,4 +75,4 @@ "url": "http://purl.obolibrary.org/obo/so.owl" } ] -} \ No newline at end of file +} diff --git a/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json index b1f051378..931f15521 100644 --- a/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json +++ b/chord_metadata_service/chord/tests/example_experiment_bad_biosample.json @@ -13,7 +13,7 @@ ], "library_strategy": "ChIP-Seq", "library_source": "Genomic", - "library_selection": "Random", + "library_selection": "RANDOM", "library_layout": "Single", "extraction_protocol": "NGS", "molecule": "genomic DNA", @@ -75,4 +75,4 @@ "url": "http://purl.obolibrary.org/obo/so.owl" } ] -} \ No newline at end of file +} diff --git a/chord_metadata_service/experiments/tests/example_experiments.json b/chord_metadata_service/experiments/tests/example_experiments.json index e888df91b..a36023f59 100644 --- a/chord_metadata_service/experiments/tests/example_experiments.json +++ b/chord_metadata_service/experiments/tests/example_experiments.json @@ -13,7 +13,7 @@ ], "library_strategy": "ChIP-Seq", "library_source": "Genomic", - "library_selection": "Random", + "library_selection": "RANDOM", "library_layout": "Single", "extraction_protocol": "NGS", "molecule": "genomic DNA", @@ -77,7 +77,7 @@ ], "library_strategy": "ChIP-Seq", "library_source": "Genomic", - "library_selection": "Random", + "library_selection": "RANDOM", "library_layout": "Single", "extraction_protocol": "NGS", "molecule": "genomic DNA", @@ -139,4 +139,4 @@ "url": "http://purl.obolibrary.org/obo/so.owl" } ] -} \ No newline at end of file +} From 2a93fe4a1f50419a41ad367d72662ebb9d5b5263 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Thu, 14 Sep 2023 20:30:59 +0000 Subject: [PATCH 11/25] fix migration type --- .../experiments/migrations/0009_v4_1_0.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py index 0a1e60580..53fe3d4ca 100644 --- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py +++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py @@ -1,13 +1,13 @@ -from typing import List +from typing import List, Tuple from django.db import migrations -LIB_STRATEGY_CONVERSIONS: List[tuple[str, str]] = [ +LIB_STRATEGY_CONVERSIONS: List[Tuple[str, str]] = [ # Convert WES -> WXS ... ("WES", "WXS"), - ("Other", "OTHER") + ("Other", "OTHER"), ] -LIB_SELECTION_CONVERIONS: List[tuple[str, str]] = [ +LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [ ("Random", "RANDOM"), ("Random PCR", "RANDOM PCR"), ("Exome capture", "other"), # 'Exome capture' no longer supported From eb93de01c10802547afb96aae2f6af3bdcfd2cd3 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Fri, 15 Sep 2023 14:43:01 +0000 Subject: [PATCH 12/25] migration fix, lint --- .../experiments/migrations/0009_v4_1_0.py | 4 +++- chord_metadata_service/ontologies/utils.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py index 53fe3d4ca..37fc2c25d 100644 --- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py +++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py @@ -10,10 +10,11 @@ LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [ ("Random", "RANDOM"), ("Random PCR", "RANDOM PCR"), - ("Exome capture", "other"), # 'Exome capture' no longer supported + ("Exome capture", "Hybrid Selection"), # 'Exome capture' no longer supported ("Other", "other"), ] + def set_experiment_library(apps, _schema_editor): Experiment = apps.get_model("experiments", "Experiment") for (old_val, new_val) in LIB_STRATEGY_CONVERSIONS: @@ -28,6 +29,7 @@ def set_experiment_library(apps, _schema_editor): exp.library_selection = new_val exp.save() + class Migration(migrations.Migration): dependencies = [ ('experiments', '0007_v4_0_0'), diff --git a/chord_metadata_service/ontologies/utils.py b/chord_metadata_service/ontologies/utils.py index 163bd690a..36a28e9f8 100644 --- a/chord_metadata_service/ontologies/utils.py +++ b/chord_metadata_service/ontologies/utils.py @@ -13,11 +13,11 @@ def read_xsd_simple_type_values(xsd_file_name: str, type_name: str) -> List[str] https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html """ xsd_file_path = os.path.join(XSD_ONTOLOGIES_PATH, xsd_file_name) - with open(xsd_file_path, 'r') as file: + with open(xsd_file_path, "r") as file: xsd_file = file.read() - xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None}) + xsd_data = xmltodict.parse(xsd_file, namespaces={"xs": None}) simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]} target_type = simple_types[type_name] - values = [val['@value'] for val in target_type['restriction']['enumeration']] + values = [val["@value"] for val in target_type["restriction"]["enumeration"]] return values From fae4540257fa14a7943f5113c0359df2c100a8a0 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 18 Sep 2023 18:48:17 +0000 Subject: [PATCH 13/25] add validation error descriptions to IngestError --- .../chord/ingest/exceptions.py | 24 ++++++++++++++++++- .../chord/ingest/experiments.py | 20 +++++++++------- .../chord/ingest/phenopackets.py | 10 ++++---- chord_metadata_service/chord/ingest/schema.py | 4 ++-- chord_metadata_service/chord/ingest/views.py | 3 ++- .../chord/tests/test_ingest.py | 20 ++++++++-------- 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index bea8da514..0211650c7 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -1,7 +1,29 @@ +from typing import List +from jsonschema.exceptions import ValidationError + __all__ = [ "IngestError", ] +def parse_validation_errors(errors: List[ValidationError]): + error_descriptions = {} + for error in errors: + field_path = ".".join(error.schema_path) + error_descriptions[field_path] = { + "faulty_value": error.instance, + "valid_options": error.validator_value, + "field_schema": error.schema, + "message": error.message, + } + return error_descriptions + + class IngestError(Exception): - pass + + def __init__(self, schema_validation_errors=[], message="An error occured during ingestion."): + + errors_descriptions = parse_validation_errors(schema_validation_errors) + + self.validation_errors = errors_descriptions + self.message = message diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index 4fbcd60a9..13dc340d7 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -56,12 +56,14 @@ def create_experiment_result(er: dict) -> em.ExperimentResult: def validate_experiment(experiment_data, idx: Optional[int] = None) -> None: # Validate experiment data against experiments schema. - validation = schema_validation(experiment_data, EXPERIMENT_SCHEMA) - if not validation: + val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA) + if val_errors: # TODO: Report more precise errors raise IngestError( - f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} " - f"(check Katsu logs for more information)") + schema_validation_errors=val_errors, + message=f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} " + f"(check Katsu logs for more information)" + ) def ingest_experiment( @@ -164,12 +166,14 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime # First, validate all experiment results with the schema before creating anything in the database. for idx, exp_result in enumerate(json_data): - validation = schema_validation(exp_result, EXPERIMENT_RESULT_SCHEMA) - if not validation: + val_errors = schema_validation(exp_result, EXPERIMENT_RESULT_SCHEMA) + if val_errors: # TODO: Report more precise errors raise IngestError( - f"Failed schema validation for experiment result {idx} " - f"(check Katsu logs for more information)") + schema_validation_errors=val_errors, + message=f"Failed schema validation for experiment result {idx} " + f"(check Katsu logs for more information)" + ) # If everything passes, perform the actual ingestion next. diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 690160c87..45bdff20e 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -60,12 +60,14 @@ def validate_phenopacket(phenopacket_data: dict[str, Any], schema: dict = PHENOPACKET_SCHEMA, idx: Optional[int] = None) -> None: # Validate phenopacket data against phenopackets schema. - validation = schema_validation(phenopacket_data, schema) - if not validation: + val_errors = schema_validation(phenopacket_data, schema) + if val_errors: # TODO: Report more precise errors raise IngestError( - f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} " - f"(check Katsu logs for more information)") + schema_validation_errors=val_errors, + message=f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} " + f"(check Katsu logs for more information)" + ) def update_or_create_subject(subject: dict) -> pm.Individual: diff --git a/chord_metadata_service/chord/ingest/schema.py b/chord_metadata_service/chord/ingest/schema.py index 538c5c4ed..7cb4fcc5a 100644 --- a/chord_metadata_service/chord/ingest/schema.py +++ b/chord_metadata_service/chord/ingest/schema.py @@ -10,10 +10,10 @@ def schema_validation(obj, schema): try: v.validate(obj) logger.info("JSON schema validation passed.") - return True + return None except jsonschema.exceptions.ValidationError: errors = [e for e in v.iter_errors(obj)] logger.info("JSON schema validation failed.") for i, error in enumerate(errors, 1): logger.error(f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}") - return False + return errors diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index c89f8aad1..cf35f236b 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -46,7 +46,8 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id) except IngestError as e: - return Response(errors.bad_request_error(f"Encountered ingest error: {e}"), status=400) + # return Response(errors.bad_request_error(e.validation_errors), status=400) + return Response(errors.bad_request_error(f"Encountered ingest error: {e.validation_errors}"), status=400) except ValidationError as e: return Response(errors.bad_request_error( diff --git a/chord_metadata_service/chord/tests/test_ingest.py b/chord_metadata_service/chord/tests/test_ingest.py index d93832186..dd0bf335b 100644 --- a/chord_metadata_service/chord/tests/test_ingest.py +++ b/chord_metadata_service/chord/tests/test_ingest.py @@ -168,22 +168,22 @@ def test_reingesting_updating_phenopackets_json(self): def test_phenopackets_validation(self): # check invalid phenopacket, must fail validation & validate_phenopacket must raise - validation = schema_validation(EXAMPLE_INGEST_INVALID_PHENOPACKET, PHENOPACKET_SCHEMA) - self.assertEqual(validation, False) + val_errs = schema_validation(EXAMPLE_INGEST_INVALID_PHENOPACKET, PHENOPACKET_SCHEMA) + self.assertTrue(len(val_errs) > 0) with self.assertRaises(IngestError): validate_phenopacket(EXAMPLE_INGEST_INVALID_PHENOPACKET) with self.assertRaises(IngestError): ingest_phenopacket(EXAMPLE_INGEST_INVALID_PHENOPACKET, "dummy", validate=True) # valid phenopacket passes validation & doesn't raise - validation_2 = schema_validation(EXAMPLE_INGEST_PHENOPACKET, PHENOPACKET_SCHEMA) - self.assertEqual(validation_2, True) + val_errors_2 = schema_validation(EXAMPLE_INGEST_PHENOPACKET, PHENOPACKET_SCHEMA) + self.assertEqual(val_errors_2, None) validate_phenopacket(EXAMPLE_INGEST_PHENOPACKET) # valid experiments pass validation for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]: - validation_3 = schema_validation(exp, EXPERIMENT_SCHEMA) - self.assertEqual(validation_3, True) + val_errors_3 = schema_validation(exp, EXPERIMENT_SCHEMA) + self.assertEqual(val_errors_3, None) def test_ingesting_experiments_json(self): # ingest phenopackets data in order to match to biosample ids @@ -220,8 +220,8 @@ def test_ingesting_experiments_json(self): def test_ingesting_invalid_experiment_json(self): # check invalid experiment, must fail validation for exp in EXAMPLE_INGEST_INVALID_EXPERIMENT["experiments"]: - validation = schema_validation(exp, EXPERIMENT_SCHEMA) - self.assertEqual(validation, False) + val_errs = schema_validation(exp, EXPERIMENT_SCHEMA) + self.assertTrue(len(val_errs) > 0) with self.assertRaises(IngestError): validate_experiment(exp) with self.assertRaises(IngestError): @@ -229,8 +229,8 @@ def test_ingesting_invalid_experiment_json(self): # check valid experiment, must pass validation for exp in EXAMPLE_INGEST_EXPERIMENT["experiments"]: - validation_2 = schema_validation(exp, EXPERIMENT_SCHEMA) - self.assertEqual(validation_2, True) + val_errs_2 = schema_validation(exp, EXPERIMENT_SCHEMA) + self.assertEqual(val_errs_2, None) def test_ingesting_experiment_results_json(self): # ingest list of experiments From 0c1cb1bb9b4997f1625663a0198bc256637021dc Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 18 Sep 2023 22:04:52 +0000 Subject: [PATCH 14/25] ingest response format --- .../chord/ingest/exceptions.py | 11 ++-- chord_metadata_service/chord/ingest/views.py | 65 +++++++++++++++---- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index 0211650c7..859ec6011 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -7,21 +7,22 @@ def parse_validation_errors(errors: List[ValidationError]): - error_descriptions = {} + error_descriptions = [] for error in errors: - field_path = ".".join(error.schema_path) - error_descriptions[field_path] = { + schema_path = ".".join(error.schema_path) + error_descriptions.append({ + "schema_path": schema_path, "faulty_value": error.instance, "valid_options": error.validator_value, "field_schema": error.schema, "message": error.message, - } + }) return error_descriptions class IngestError(Exception): - def __init__(self, schema_validation_errors=[], message="An error occured during ingestion."): + def __init__(self, schema_validation_errors: List[ValidationError]=[], message="An error occured during ingestion."): errors_descriptions = parse_validation_errors(schema_validation_errors) diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index cf35f236b..9ea6a20a7 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -10,6 +10,7 @@ from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import AllowAny from rest_framework.response import Response +from typing import List from bento_lib.schemas.bento import BENTO_INGEST_SCHEMA from bento_lib.responses import errors @@ -26,18 +27,53 @@ logger = logging.getLogger(__name__) +class IngestResponseBuilder: + + def __init__(self, workflow_id: str, dataset_id: str): + self.workflow_id = workflow_id + self.dataset_id = dataset_id + self.success = False + self.errors = [] + self.warnings = [] + + def set_success(self, success: bool): + self.success = success + + def add_error(self, error): + self.errors.append(error) + + def add_errors(self, errors: List[any]): + self.errors.extend(errors) + + def add_warning(self, warnings: List[any]): + self.warnings.extend(warnings) + + def as_response(self, status_code: int): + body = { + "success": self.success, + "warnings": self.warnings, + "errors": self.errors, + } + logger.info(f"Finished {self.workflow_id} ingest request for dataset {self.dataset_id}", body) + return Response(body, status=status_code) + + @api_view(["POST"]) @permission_classes([AllowAny]) def ingest_into_dataset(request, dataset_id: str, workflow_id: str): logger.info(f"Received a {workflow_id} ingest request for dataset {dataset_id}.") + response_builder = IngestResponseBuilder(workflow_id=workflow_id, dataset_id=dataset_id) + # Check that the workflow exists if workflow_id not in WORKFLOW_INGEST_FUNCTION_MAP: - return Response(errors.bad_request_error(f"Ingestion workflow ID {workflow_id} does not exist"), status=400) + response_builder.add_error(f"Ingestion workflow ID {workflow_id} does not exist") + return response_builder.as_response(400) if dataset_id not in DATASET_ID_OVERRIDES: if not Dataset.objects.filter(identifier=dataset_id).exists(): - return Response(errors.bad_request_error(f"Dataset with ID {dataset_id} does not exist"), status=400) + response_builder.add_error(f"Dataset with ID {dataset_id} does not exist") + return response_builder.as_response(400) dataset_id = str(uuid.UUID(dataset_id)) # Normalize dataset ID to UUID's str format. try: @@ -46,18 +82,23 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id) except IngestError as e: - # return Response(errors.bad_request_error(e.validation_errors), status=400) - return Response(errors.bad_request_error(f"Encountered ingest error: {e.validation_errors}"), status=400) + if e.validation_errors: + response_builder.add_errors(e.validation_errors) + else: + response_builder.add_error(e.message) + return response_builder.as_response(400) except ValidationError as e: - return Response(errors.bad_request_error( - "Encountered validation errors during ingestion", - *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()), - )) + response_builder.add_errors(e.error_list if hasattr(e, "error_list") else e.error_dict.items()) + return response_builder.as_response(400) except Exception as e: # Encountered some other error from the ingestion attempt, return a somewhat detailed message - logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}") - return Response(errors.internal_server_error(f"Encountered an exception while processing an ingest attempt " - f"(error: {repr(e)}"), status=500) - return Response(status=204) + error_message = f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}" + logger.error(error_message) + response_builder.add_error(error_message) + return response_builder.as_response(500) + + # return Response(status=204) + response_builder.set_success(True) + return response_builder.as_response(204) From 9f8ff0c0618c007f11c64a8cf33405bdb6d940c2 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 19 Sep 2023 21:11:05 +0000 Subject: [PATCH 15/25] ingestion error responds with warnings on schema changes --- .../chord/ingest/exceptions.py | 99 +++++++++++++++++-- .../chord/ingest/experiments.py | 3 +- chord_metadata_service/chord/ingest/views.py | 28 +++--- .../experiments/migrations/0009_v4_1_0.py | 18 +--- chord_metadata_service/experiments/schemas.py | 21 ++++ 5 files changed, 134 insertions(+), 35 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index 859ec6011..b40468dda 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -1,30 +1,111 @@ -from typing import List +from typing import List, Optional from jsonschema.exceptions import ValidationError +from chord_metadata_service import __version__ +from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES +from chord_metadata_service.chord.data_types import DATA_TYPE_EXPERIMENT, DATA_TYPE_PHENOPACKET __all__ = [ "IngestError", ] -def parse_validation_errors(errors: List[ValidationError]): +DATA_TYPE_SCHEMA_CHANGES = { + DATA_TYPE_EXPERIMENT: EXPERIMENT_SCHEMA_CHANGES, + DATA_TYPE_PHENOPACKET: None +} + + +def parse_validation_errors(errors: List[ValidationError]) -> Optional[List[dict]]: + """ + Accepts a list of jsonschema ValidationError and converts them to a client error format. + + Parameters: + errors (List[ValidationError]): errors raised by jsonschema during validation + Returns: + List[dict]: + dict: + schema_path (str): Schema path string (e.g "properties.library_strategy") + faulty_value (str | obj): The value at the schema_path causing the error + property_schema (dict): JSON schema of the property (includes valid options) + message (str): The ValidationError.message + """ error_descriptions = [] for error in errors: schema_path = ".".join(error.schema_path) error_descriptions.append({ "schema_path": schema_path, "faulty_value": error.instance, - "valid_options": error.validator_value, - "field_schema": error.schema, "message": error.message, + "property_schema": error.schema, }) - return error_descriptions + return error_descriptions if len(error_descriptions) else None -class IngestError(Exception): +def parse_property_warnings(data: dict, prop_name: str, property_changes: List[tuple]) -> Optional[dict]: + for (old_value, new_value) in property_changes: + value = data[prop_name] + property_warning = { + "property_name": prop_name, + "property_value": value, + "deprecated_value": old_value, + "suggested_replacement": new_value, + } + + if value == old_value: + # Naive comparison for dicts + return property_warning + + if isinstance(value, str) and isinstance(old_value, str): + # Lower case comparison for string values (JSON schema enum) + if value.lower() == old_value.lower(): + return property_warning + + # Only warn when mecessary + return None + - def __init__(self, schema_validation_errors: List[ValidationError]=[], message="An error occured during ingestion."): +def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]: + """ + Schema warnings are issued on Katsu releases that include schema changes. + Warnings are returned to highlight schema changes that may be the root cause of an IngestionError. + + Parameters: + data (dict): the data submitted for ingestion + + Returns: + List[dict]: + dict: + property_name (str): The name of the property + property_value (str | dict) + deprecated_value (str | dict): The deprecated property option + suggested_replacement (str | dict): The new suggested property option + """ + if not data or not schema: + return None + + data_type = schema.get("$id", "").split(":")[-1] + applicable_changes = DATA_TYPE_SCHEMA_CHANGES.get(data_type, None) + + if not applicable_changes or __version__ not in applicable_changes: + # Skip if data type's schema is not affected in current Katsu version + return None + + warnings = [] + for (prop_name, changes) in applicable_changes[__version__].get("properties", {}).items(): + property_warning = parse_property_warnings(data=data, prop_name=prop_name, property_changes=changes) + if property_warning: + warnings.append(property_warning) + return warnings if len(warnings) else None + + +class IngestError(Exception): - errors_descriptions = parse_validation_errors(schema_validation_errors) + def __init__(self, + data: dict = None, + schema: dict = None, + schema_validation_errors: List[ValidationError] = [], + message="An error occured during ingestion."): - self.validation_errors = errors_descriptions + self.validation_errors = parse_validation_errors(schema_validation_errors) + self.schema_warnings = parse_schema_warnings(data=data, schema=schema) self.message = message diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index 13dc340d7..1437b4a09 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -58,8 +58,9 @@ def validate_experiment(experiment_data, idx: Optional[int] = None) -> None: # Validate experiment data against experiments schema. val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA) if val_errors: - # TODO: Report more precise errors raise IngestError( + data=experiment_data, + schema=EXPERIMENT_SCHEMA, schema_validation_errors=val_errors, message=f"Failed schema validation for experiment{(' ' + str(idx)) if idx is not None else ''} " f"(check Katsu logs for more information)" diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index 9ea6a20a7..e0c7a5aca 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -13,7 +13,6 @@ from typing import List from bento_lib.schemas.bento import BENTO_INGEST_SCHEMA -from bento_lib.responses import errors from . import WORKFLOW_INGEST_FUNCTION_MAP from .exceptions import IngestError @@ -33,7 +32,7 @@ def __init__(self, workflow_id: str, dataset_id: str): self.workflow_id = workflow_id self.dataset_id = dataset_id self.success = False - self.errors = [] + self.errors = [] self.warnings = [] def set_success(self, success: bool): @@ -42,12 +41,21 @@ def set_success(self, success: bool): def add_error(self, error): self.errors.append(error) - def add_errors(self, errors: List[any]): + def add_errors(self, errors: List): self.errors.extend(errors) - def add_warning(self, warnings: List[any]): + def add_warning(self, warnings: List): self.warnings.extend(warnings) + def add_ingest_error(self, error: IngestError): + if error.validation_errors: + self.add_errors(error.validation_errors) + else: + self.add_error(error.message) + + if error.schema_warnings: + self.warnings.extend(error.schema_warnings) + def as_response(self, status_code: int): body = { "success": self.success, @@ -82,10 +90,7 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): WORKFLOW_INGEST_FUNCTION_MAP[workflow_id](request.data, dataset_id) except IngestError as e: - if e.validation_errors: - response_builder.add_errors(e.validation_errors) - else: - response_builder.add_error(e.message) + response_builder.add_ingest_error(e) return response_builder.as_response(400) except ValidationError as e: @@ -94,11 +99,10 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): except Exception as e: # Encountered some other error from the ingestion attempt, return a somewhat detailed message - error_message = f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}" - logger.error(error_message) - response_builder.add_error(error_message) + logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}") + response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})") return response_builder.as_response(500) - + # return Response(status=204) response_builder.set_success(True) return response_builder.as_response(204) diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py index 37fc2c25d..532935887 100644 --- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py +++ b/chord_metadata_service/experiments/migrations/0009_v4_1_0.py @@ -1,18 +1,10 @@ from typing import List, Tuple from django.db import migrations +from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES -LIB_STRATEGY_CONVERSIONS: List[Tuple[str, str]] = [ - # Convert WES -> WXS ... - ("WES", "WXS"), - ("Other", "OTHER"), -] - -LIB_SELECTION_CONVERIONS: List[Tuple[str, str]] = [ - ("Random", "RANDOM"), - ("Random PCR", "RANDOM PCR"), - ("Exome capture", "Hybrid Selection"), # 'Exome capture' no longer supported - ("Other", "other"), -] +V4_1_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["4.1.0"]["properties"] +LIB_STRATEGY_CONVERSIONS = V4_1_0_PROPERTIES["library_strategy"] +LIB_SELECTION_CONVERIONS = V4_1_0_PROPERTIES["library_selection"] def set_experiment_library(apps, _schema_editor): @@ -22,7 +14,7 @@ def set_experiment_library(apps, _schema_editor): for exp in Experiment.objects.filter(library_strategy=old_val): exp.library_strategy = new_val exp.save() - + for (old_val, new_val) in LIB_SELECTION_CONVERIONS: # Modify library_selection if necessary for exp in Experiment.objects.filter(library_selection=old_val): diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 225a58eee..fe81c4cc7 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -155,3 +155,24 @@ }, "required": ["id", "experiment_type"] }, EXPERIMENT) + + +""" +Dictionary of schema changes for warnings. +""" +EXPERIMENT_SCHEMA_CHANGES = { + "4.1.0": { + "properties": { + "library_strategy": [ + ("WES", "WXS"), + ("Other", "OTHER"), + ], + "library_selection": [ + ("Random", "RANDOM"), + ("Random PCR", "RANDOM PCR"), + ("Exome capture", "Hybrid Selection"), + ("Other", "other"), + ] + } + } +} From 2194a080ffb34bb56e42ec375f49befad318eba1 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 17:11:31 +0000 Subject: [PATCH 16/25] update api ingestion tests --- .../chord/ingest/exceptions.py | 10 +- .../chord/ingest/experiments.py | 8 +- .../chord/ingest/phenopackets.py | 6 +- chord_metadata_service/chord/ingest/views.py | 8 +- .../tests/example_invalid_experiment.json | 6 +- .../chord/tests/test_api_ingest.py | 107 +++++++++++++++++- .../chord/workflows/wdls/experiments_json.wdl | 2 +- .../workflows/wdls/phenopackets_json.wdl | 2 +- 8 files changed, 124 insertions(+), 25 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index b40468dda..431e41242 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -79,6 +79,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]: property_value (str | dict) deprecated_value (str | dict): The deprecated property option suggested_replacement (str | dict): The new suggested property option + version (str): The Katsu release version associated with the schema change """ if not data or not schema: return None @@ -91,10 +92,11 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]: return None warnings = [] - for (prop_name, changes) in applicable_changes[__version__].get("properties", {}).items(): - property_warning = parse_property_warnings(data=data, prop_name=prop_name, property_changes=changes) - if property_warning: - warnings.append(property_warning) + for (version, version_changes) in applicable_changes.items(): + for (prop_name, changes) in version_changes.get("properties", {}).items(): + if property_warning:= parse_property_warnings(data, prop_name, changes): + property_warning["version"] = version + warnings.append(property_warning) return warnings if len(warnings) else None diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index 1437b4a09..03a7aaf3c 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -56,8 +56,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult: def validate_experiment(experiment_data, idx: Optional[int] = None) -> None: # Validate experiment data against experiments schema. - val_errors = schema_validation(experiment_data, EXPERIMENT_SCHEMA) - if val_errors: + if val_errors:= schema_validation(experiment_data, EXPERIMENT_SCHEMA): raise IngestError( data=experiment_data, schema=EXPERIMENT_SCHEMA, @@ -149,6 +148,11 @@ def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experimen exps = json_data.get("experiments", []) + if len(exps) == 0: + # If empty experiments array + # Validate an empty json to raise an IngestError with validation details + validate_experiment({}) + # First, validate all experiments with the schema before creating anything in the database. for idx, exp in enumerate(exps): validate_experiment(exp, idx) diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 45bdff20e..14b87f65c 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -60,10 +60,10 @@ def validate_phenopacket(phenopacket_data: dict[str, Any], schema: dict = PHENOPACKET_SCHEMA, idx: Optional[int] = None) -> None: # Validate phenopacket data against phenopackets schema. - val_errors = schema_validation(phenopacket_data, schema) - if val_errors: - # TODO: Report more precise errors + if val_errors:= schema_validation(phenopacket_data, schema): raise IngestError( + data=phenopacket_data, + schema=PHENOPACKET_SCHEMA, schema_validation_errors=val_errors, message=f"Failed schema validation for phenopacket{(' ' + str(idx)) if idx is not None else ''} " f"(check Katsu logs for more information)" diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index e0c7a5aca..2441d89e2 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -44,9 +44,6 @@ def add_error(self, error): def add_errors(self, errors: List): self.errors.extend(errors) - def add_warning(self, warnings: List): - self.warnings.extend(warnings) - def add_ingest_error(self, error: IngestError): if error.validation_errors: self.add_errors(error.validation_errors) @@ -56,7 +53,7 @@ def add_ingest_error(self, error: IngestError): if error.schema_warnings: self.warnings.extend(error.schema_warnings) - def as_response(self, status_code: int): + def as_response(self, status_code: int) -> Response: body = { "success": self.success, "warnings": self.warnings, @@ -103,6 +100,5 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})") return response_builder.as_response(500) - # return Response(status=204) response_builder.set_success(True) - return response_builder.as_response(204) + return response_builder.as_response(201) diff --git a/chord_metadata_service/chord/tests/example_invalid_experiment.json b/chord_metadata_service/chord/tests/example_invalid_experiment.json index 437d238cf..0e47f9b72 100644 --- a/chord_metadata_service/chord/tests/example_invalid_experiment.json +++ b/chord_metadata_service/chord/tests/example_invalid_experiment.json @@ -11,9 +11,9 @@ "label": "ChIP-seq" } ], - "library_strategy": "ChIP-Seq", + "library_strategy": "WES", "library_source": "Genomic", - "library_selection": "Random", + "library_selection": "random", "library_layout": "Single", "extraction_protocol": "NGS", "molecule": "genomic DNA", @@ -75,4 +75,4 @@ "url": "http://purl.obolibrary.org/obo/so.owl" } ] -} \ No newline at end of file +} diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py index 53f50bcb2..15cbb2f10 100644 --- a/chord_metadata_service/chord/tests/test_api_ingest.py +++ b/chord_metadata_service/chord/tests/test_api_ingest.py @@ -3,6 +3,7 @@ from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase +from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET from chord_metadata_service.restapi.tests.utils import load_local_json from .constants import VALID_PROJECT_1, valid_dataset_1 @@ -63,14 +64,20 @@ def test_phenopackets_ingest(self): reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json_invalid")), content_type="application/json", ) + c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) # No ingestion body r = self.client.post( reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")), content_type="application/json", ) + c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) # Bad ingestion body JSON r = self.client.post( @@ -78,22 +85,112 @@ def test_phenopackets_ingest(self): content_type="application/json", data="\{\}\}", # noqa: W605 ) + c = r.json() self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) # 1 required property # Invalid phenopacket JSON validation - invalid_phenopacket = load_local_json("example_invalid_phenopacket.json") r = self.client.post( reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")), content_type="application/json", - data=json.dumps(invalid_phenopacket), + data=json.dumps(EXAMPLE_INGEST_INVALID_PHENOPACKET), ) + c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 2) # Success - valid_phenopacket = load_local_json("example_phenopacket.json") r = self.client.post( reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")), content_type="application/json", - data=json.dumps(valid_phenopacket), + data=json.dumps(EXAMPLE_INGEST_PHENOPACKET), ) - self.assertEqual(r.status_code, status.HTTP_204_NO_CONTENT) + c = r.json() + self.assertEqual(c["success"], True) + self.assertEqual(len(c["errors"]), 0) + self.assertEqual(len(c["warnings"]), 0) + self.assertEqual(r.status_code, status.HTTP_201_CREATED) + + + def test_experiments_ingest_failures(self): + # Invalid workflow ID + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json_invalid")), + content_type="application/json", + ) + c = r.json() + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) + + # No ingestion body + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + ) + c = r.json() + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 2) # 2 required properties + + # Bad ingestion body JSON + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + data="\{\}\}", # noqa: W605 + ) + c = r.json() + self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) + + # Invalid experiments JSON validation + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_INVALID_EXPERIMENT), + ) + c = r.json() + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 4) + + # Two of the errors concern experiment schema changes + warnings = c["warnings"] + self.assertEqual(len(warnings), 2) + warned_properties = [schema_warning["property_name"] for schema_warning in warnings] + self.assertTrue("library_selection" in warned_properties) + self.assertTrue("library_strategy" in warned_properties) + + # Biosample not present + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_EXPERIMENT), + ) + c = r.json() + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) + self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) + + def test_experiment_ingest_success(self): + # Create the required phenopacket with a biosample first + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "phenopackets_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_PHENOPACKET), + ) + + # Ingest experiment + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_EXPERIMENT), + ) + c = r.json() + self.assertEqual(c["success"], True) + self.assertEqual(len(c["errors"]), 0) + self.assertEqual(len(c["warnings"]), 0) + self.assertEqual(r.status_code, status.HTTP_201_CREATED) diff --git a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl index 881545a0c..eeb26318a 100644 --- a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl +++ b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl @@ -37,7 +37,7 @@ task ingest_task { -H "Authorization: Bearer ~{token}" \ --data "@~{json_document}" \ "~{katsu_url}/ingest/~{dataset_id}/experiments_json") - if [[ "${RESPONSE}" != "204" ]] + if [[ "${RESPONSE}" != "201" ]] then echo "Error: Metadata service replied with ${RESPONSE}" 1>&2 # to stderr exit 1 diff --git a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl index f0e05c18d..f423fb673 100644 --- a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl +++ b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl @@ -37,7 +37,7 @@ task ingest_task { -H "Authorization: Bearer ~{token}" \ --data "@~{json_document}" \ "~{katsu_url}/ingest/~{dataset_id}/phenopackets_json") - if [[ "${RESPONSE}" != "204" ]] + if [[ "${RESPONSE}" != "201" ]] then echo "Error: Metadata service replied with ${RESPONSE}" 1>&2 # to stderr exit 1 From 948f6bcae3b9e97ee7b4727fbdcc1a01bc3ba8aa Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 17:14:39 +0000 Subject: [PATCH 17/25] lint --- chord_metadata_service/chord/ingest/exceptions.py | 2 +- chord_metadata_service/chord/ingest/experiments.py | 2 +- .../chord/ingest/phenopackets.py | 2 +- .../chord/tests/test_api_ingest.py | 14 ++++++-------- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index 431e41242..954118629 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -94,7 +94,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]: warnings = [] for (version, version_changes) in applicable_changes.items(): for (prop_name, changes) in version_changes.get("properties", {}).items(): - if property_warning:= parse_property_warnings(data, prop_name, changes): + if property_warning := parse_property_warnings(data, prop_name, changes): property_warning["version"] = version warnings.append(property_warning) return warnings if len(warnings) else None diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index 03a7aaf3c..df9713547 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -56,7 +56,7 @@ def create_experiment_result(er: dict) -> em.ExperimentResult: def validate_experiment(experiment_data, idx: Optional[int] = None) -> None: # Validate experiment data against experiments schema. - if val_errors:= schema_validation(experiment_data, EXPERIMENT_SCHEMA): + if val_errors := schema_validation(experiment_data, EXPERIMENT_SCHEMA): raise IngestError( data=experiment_data, schema=EXPERIMENT_SCHEMA, diff --git a/chord_metadata_service/chord/ingest/phenopackets.py b/chord_metadata_service/chord/ingest/phenopackets.py index 14b87f65c..133b71bf0 100644 --- a/chord_metadata_service/chord/ingest/phenopackets.py +++ b/chord_metadata_service/chord/ingest/phenopackets.py @@ -60,7 +60,7 @@ def validate_phenopacket(phenopacket_data: dict[str, Any], schema: dict = PHENOPACKET_SCHEMA, idx: Optional[int] = None) -> None: # Validate phenopacket data against phenopackets schema. - if val_errors:= schema_validation(phenopacket_data, schema): + if val_errors := schema_validation(phenopacket_data, schema): raise IngestError( data=phenopacket_data, schema=PHENOPACKET_SCHEMA, diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py index 15cbb2f10..ced50a5c0 100644 --- a/chord_metadata_service/chord/tests/test_api_ingest.py +++ b/chord_metadata_service/chord/tests/test_api_ingest.py @@ -3,9 +3,8 @@ from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase -from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET - -from chord_metadata_service.restapi.tests.utils import load_local_json +from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \ + EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET from .constants import VALID_PROJECT_1, valid_dataset_1 from ..workflows.metadata import METADATA_WORKFLOWS @@ -88,7 +87,7 @@ def test_phenopackets_ingest(self): c = r.json() self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) self.assertEqual(c["success"], False) - self.assertEqual(len(c["errors"]), 1) # 1 required property + self.assertEqual(len(c["errors"]), 1) # 1 required property # Invalid phenopacket JSON validation r = self.client.post( @@ -113,7 +112,6 @@ def test_phenopackets_ingest(self): self.assertEqual(len(c["warnings"]), 0) self.assertEqual(r.status_code, status.HTTP_201_CREATED) - def test_experiments_ingest_failures(self): # Invalid workflow ID r = self.client.post( @@ -133,7 +131,7 @@ def test_experiments_ingest_failures(self): c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(c["success"], False) - self.assertEqual(len(c["errors"]), 2) # 2 required properties + self.assertEqual(len(c["errors"]), 2) # 2 required properties # Bad ingestion body JSON r = self.client.post( @@ -156,7 +154,7 @@ def test_experiments_ingest_failures(self): self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(c["success"], False) self.assertEqual(len(c["errors"]), 4) - + # Two of the errors concern experiment schema changes warnings = c["warnings"] self.assertEqual(len(warnings), 2) @@ -174,7 +172,7 @@ def test_experiments_ingest_failures(self): self.assertEqual(c["success"], False) self.assertEqual(len(c["errors"]), 1) self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) - + def test_experiment_ingest_success(self): # Create the required phenopacket with a biosample first r = self.client.post( From db378d249986c3565cfeb19ec674dca87996cd97 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 18:36:41 +0000 Subject: [PATCH 18/25] add ingestion tests, exp workflow payload schema --- .../chord/ingest/exceptions.py | 2 +- .../chord/ingest/experiments.py | 23 ++++-- chord_metadata_service/chord/ingest/views.py | 13 ++-- .../example_experiment bad_resource.json | 78 +++++++++++++++++++ .../chord/tests/example_ingest.py | 1 + .../chord/tests/test_api_ingest.py | 28 ++++++- chord_metadata_service/experiments/schemas.py | 21 +++++ 7 files changed, 150 insertions(+), 16 deletions(-) create mode 100644 chord_metadata_service/chord/tests/example_experiment bad_resource.json diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index 954118629..a359d36fe 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -60,7 +60,7 @@ def parse_property_warnings(data: dict, prop_name: str, property_changes: List[t if value.lower() == old_value.lower(): return property_warning - # Only warn when mecessary + # Only warn when necessary return None diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index df9713547..183c668c7 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -4,7 +4,8 @@ from chord_metadata_service.chord.models import Dataset from chord_metadata_service.experiments import models as em -from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, EXPERIMENT_RESULT_SCHEMA +from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA, \ + EXPERIMENT_RESULT_SCHEMA, EXPERIMENT_WORKFLOW_SCHEMA from chord_metadata_service.phenopackets import models as pm from typing import Optional @@ -66,6 +67,16 @@ def validate_experiment(experiment_data, idx: Optional[int] = None) -> None: ) +def validate_experiment_workflow(json_data: dict) -> None: + if val_errors := schema_validation(json_data, EXPERIMENT_WORKFLOW_SCHEMA): + raise IngestError( + data=json_data, + schema=EXPERIMENT_WORKFLOW_SCHEMA, + schema_validation_errors=val_errors, + message="Failed schema validation for experiments ingestion workflow payload.", + ) + + def ingest_experiment( experiment_data: dict, dataset_id: str, @@ -141,6 +152,9 @@ def ingest_experiment( def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experiment]: + # First, validate the workflow's json_data + validate_experiment_workflow(json_data) + dataset = Dataset.objects.get(identifier=dataset_id) for rs in json_data.get("resources", []): @@ -148,12 +162,7 @@ def ingest_experiments_workflow(json_data, dataset_id: str) -> list[em.Experimen exps = json_data.get("experiments", []) - if len(exps) == 0: - # If empty experiments array - # Validate an empty json to raise an IngestError with validation details - validate_experiment({}) - - # First, validate all experiments with the schema before creating anything in the database. + # Second, validate all experiments with the schema before creating anything in the database. for idx, exp in enumerate(exps): validate_experiment(exp, idx) diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index 2441d89e2..d587e216e 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -7,6 +7,7 @@ from django.core.exceptions import ValidationError from django.db import transaction from jsonschema import Draft7Validator +from rest_framework import status from rest_framework.decorators import api_view, permission_classes from rest_framework.permissions import AllowAny from rest_framework.response import Response @@ -73,12 +74,12 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): # Check that the workflow exists if workflow_id not in WORKFLOW_INGEST_FUNCTION_MAP: response_builder.add_error(f"Ingestion workflow ID {workflow_id} does not exist") - return response_builder.as_response(400) + return response_builder.as_response(status.HTTP_400_BAD_REQUEST) if dataset_id not in DATASET_ID_OVERRIDES: if not Dataset.objects.filter(identifier=dataset_id).exists(): response_builder.add_error(f"Dataset with ID {dataset_id} does not exist") - return response_builder.as_response(400) + return response_builder.as_response(status.HTTP_400_BAD_REQUEST) dataset_id = str(uuid.UUID(dataset_id)) # Normalize dataset ID to UUID's str format. try: @@ -88,17 +89,17 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): except IngestError as e: response_builder.add_ingest_error(e) - return response_builder.as_response(400) + return response_builder.as_response(status.HTTP_400_BAD_REQUEST) except ValidationError as e: response_builder.add_errors(e.error_list if hasattr(e, "error_list") else e.error_dict.items()) - return response_builder.as_response(400) + return response_builder.as_response(status.HTTP_400_BAD_REQUEST) except Exception as e: # Encountered some other error from the ingestion attempt, return a somewhat detailed message logger.error(f"Encountered an exception while processing an ingest attempt:\n{traceback.format_exc()}") response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})") - return response_builder.as_response(500) + return response_builder.as_response(status.HTTP_500_INTERNAL_SERVER_ERROR) response_builder.set_success(True) - return response_builder.as_response(201) + return response_builder.as_response(status.HTTP_201_CREATED) diff --git a/chord_metadata_service/chord/tests/example_experiment bad_resource.json b/chord_metadata_service/chord/tests/example_experiment bad_resource.json new file mode 100644 index 000000000..910f42417 --- /dev/null +++ b/chord_metadata_service/chord/tests/example_experiment bad_resource.json @@ -0,0 +1,78 @@ +{ + "experiments": [ + { + "id": "experiment:1", + "biosample": "sample1", + "study_type": "Epigenomics", + "experiment_type": "Other", + "experiment_ontology": [ + { + "id": "http://www.ebi.ac.uk/efo/EFO_0002692", + "label": "ChIP-seq" + } + ], + "library_strategy": "ChIP-Seq", + "library_source": "Genomic", + "library_selection": "RANDOM", + "library_layout": "Single", + "extraction_protocol": "NGS", + "molecule": "genomic DNA", + "molecule_ontology": [ + { + "id": "SO:0000991", + "label": "genomic DNA" + } + ], + "experiment_results": [ + { + "identifier": "sample1_01", + "description": "test", + "filename": "sample1_01.vcf.gz", + "file_format": "VCF", + "data_output_type": "Derived data", + "usage": "Visualized", + "creation_date": "01-09-2021", + "created_by": "Admin", + "extra_properties": { + "test": "test" + } + }, + { + "identifier": "sample1_02", + "description": "test2", + "filename": "sample1_02.vcf.gz", + "file_format": "CRAM", + "data_output_type": "Raw data", + "usage": "Visualized", + "creation_date": "01-09-2021", + "created_by": "Admin", + "extra_properties": { + "test": "test" + } + } + ], + "instrument": { + "identifier": "instrument:01", + "platform": "Illumina", + "description": "Test description", + "model": "Illumina HiSeq 4000", + "extra_properties": { + "date": "2021-06-21" + } + }, + "extra_properties": { + "date_uploaded": "2021-03-16" + } + } + ], + "resources": [ + { + "name": "Sequence types and features ontology", + "version": "THIS_VALUE_BREAKS_VALIDATION", + "namespace_prefix": "SO", + "id": "SO:2021-02-16", + "iri_prefix": "http://purl.obolibrary.org/obo/so.owl#", + "url": "http://purl.obolibrary.org/obo/so.owl" + } + ] +} diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py index 00ea712b8..6e812db42 100644 --- a/chord_metadata_service/chord/tests/example_ingest.py +++ b/chord_metadata_service/chord/tests/example_ingest.py @@ -16,6 +16,7 @@ EXAMPLE_INGEST_EXPERIMENT = load_local_json("example_experiment.json") EXAMPLE_INGEST_EXPERIMENT_BAD_BIOSAMPLE = load_local_json("example_experiment_bad_biosample.json") +EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE = load_local_json("example_experiment bad_resource.json") EXAMPLE_INGEST_INVALID_EXPERIMENT = load_local_json("example_invalid_experiment.json") EXAMPLE_INGEST_EXPERIMENT_RESULT = load_local_json("example_derived_experiment_result.json") diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py index ced50a5c0..02addf6c3 100644 --- a/chord_metadata_service/chord/tests/test_api_ingest.py +++ b/chord_metadata_service/chord/tests/test_api_ingest.py @@ -1,10 +1,12 @@ import json +import uuid from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \ - EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET + EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE, EXAMPLE_INGEST_INVALID_EXPERIMENT, \ + EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET from .constants import VALID_PROJECT_1, valid_dataset_1 from ..workflows.metadata import METADATA_WORKFLOWS @@ -131,7 +133,7 @@ def test_experiments_ingest_failures(self): c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(c["success"], False) - self.assertEqual(len(c["errors"]), 2) # 2 required properties + self.assertEqual(len(c["errors"]), 1) # Bad ingestion body JSON r = self.client.post( @@ -173,6 +175,28 @@ def test_experiments_ingest_failures(self): self.assertEqual(len(c["errors"]), 1) self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) + # Wrong dataset id + r = self.client.post( + reverse("ingest-into-dataset", args=(uuid.uuid4(), "experiments_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_EXPERIMENT), + ) + c = r.json() + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + + # Invalid resource + r = self.client.post( + reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), + content_type="application/json", + data=json.dumps(EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE), + ) + c = r.json() + self.assertEqual(c["success"], False) + self.assertEqual(len(c["errors"]), 1) + self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + def test_experiment_ingest_success(self): # Create the required phenopacket with a biosample first r = self.client.post( diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index fe81c4cc7..c288f3c07 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -157,6 +157,27 @@ }, EXPERIMENT) +EXPERIMENT_WORKFLOW_SCHEMA = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "katsu:experiments:experiment_workflow_schema", + "title": "Experiment workflow schema", + "description": "Schema that describes the shape \ + of an experiment workflow ingestion", + "type": "object", + "properties": { + "experiments": { + "type": "array", + "items": {"type": "object"}, + "minItems": 1, + }, + "resources": { + "type": "array", + "items": {"type": "object"}, + } + }, + "required": ["experiments"] +} + """ Dictionary of schema changes for warnings. """ From 881f792ca57e049d3d93c91be0d27cc52a9cc509 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 18:40:37 +0000 Subject: [PATCH 19/25] infer success from status code --- chord_metadata_service/chord/ingest/views.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index d587e216e..a6f669987 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -32,13 +32,9 @@ class IngestResponseBuilder: def __init__(self, workflow_id: str, dataset_id: str): self.workflow_id = workflow_id self.dataset_id = dataset_id - self.success = False self.errors = [] self.warnings = [] - def set_success(self, success: bool): - self.success = success - def add_error(self, error): self.errors.append(error) @@ -56,7 +52,7 @@ def add_ingest_error(self, error: IngestError): def as_response(self, status_code: int) -> Response: body = { - "success": self.success, + "success": status_code < status.HTTP_400_BAD_REQUEST, "warnings": self.warnings, "errors": self.errors, } From b35997332ae496f1df1ec17ec3b79a1543a33d68 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 18:42:28 +0000 Subject: [PATCH 20/25] remove line call --- chord_metadata_service/chord/ingest/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chord_metadata_service/chord/ingest/views.py b/chord_metadata_service/chord/ingest/views.py index a6f669987..cb65e23dd 100644 --- a/chord_metadata_service/chord/ingest/views.py +++ b/chord_metadata_service/chord/ingest/views.py @@ -97,5 +97,4 @@ def ingest_into_dataset(request, dataset_id: str, workflow_id: str): response_builder.add_error(f"Encountered an exception while processing an ingest attempt (error: {repr(e)})") return response_builder.as_response(status.HTTP_500_INTERNAL_SERVER_ERROR) - response_builder.set_success(True) return response_builder.as_response(status.HTTP_201_CREATED) From 9b58dc4efbeb5cc2b1dee0c36a508426bbab93a2 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 18:49:57 +0000 Subject: [PATCH 21/25] get ingestion warnings from derived experiment results ingestion --- chord_metadata_service/chord/ingest/experiments.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chord_metadata_service/chord/ingest/experiments.py b/chord_metadata_service/chord/ingest/experiments.py index 183c668c7..c852840ee 100644 --- a/chord_metadata_service/chord/ingest/experiments.py +++ b/chord_metadata_service/chord/ingest/experiments.py @@ -184,6 +184,8 @@ def ingest_derived_experiment_results(json_data: list[dict]) -> list[em.Experime if val_errors: # TODO: Report more precise errors raise IngestError( + data=exp_result, + schema=EXPERIMENT_RESULT_SCHEMA, schema_validation_errors=val_errors, message=f"Failed schema validation for experiment result {idx} " f"(check Katsu logs for more information)" From dd0f66c9fb0154c4277314dee51db0805527b208 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Wed, 20 Sep 2023 21:19:32 +0000 Subject: [PATCH 22/25] save ingest report to file and output --- .../chord/workflows/wdls/experiments_json.wdl | 12 +++++++++--- .../chord/workflows/wdls/phenopackets_json.wdl | 8 +++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl index eeb26318a..b8a1427d6 100644 --- a/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl +++ b/chord_metadata_service/chord/workflows/wdls/experiments_json.wdl @@ -15,12 +15,14 @@ workflow experiments_json { json_document = json_document, katsu_url = katsu_url, dataset_id = dataset_id, - token = secret__access_token + token = secret__access_token, + ingest_report = "~{run_dir}/ingest_report.json" } output { File stdout = ingest_task.txt_output File stderr = ingest_task.err_output + File ingest_report = ingest_task.ingest_report } } @@ -30,14 +32,17 @@ task ingest_task { String katsu_url String dataset_id String token + String ingest_report } command <<< RESPONSE=$(curl -X POST -k -s -w "%{http_code}" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer ~{token}" \ --data "@~{json_document}" \ - "~{katsu_url}/ingest/~{dataset_id}/experiments_json") - if [[ "${RESPONSE}" != "201" ]] + -o "~{ingest_report}" \ + "~{katsu_url}/ingest/~{dataset_id}/experiments_json" | jq) + + if [[ "${RESPONSE}" != true ]] then echo "Error: Metadata service replied with ${RESPONSE}" 1>&2 # to stderr exit 1 @@ -48,5 +53,6 @@ task ingest_task { output { File txt_output = stdout() File err_output = stderr() + File ingest_report = "~{ingest_report}" } } diff --git a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl index f423fb673..80c6d08d0 100644 --- a/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl +++ b/chord_metadata_service/chord/workflows/wdls/phenopackets_json.wdl @@ -15,12 +15,14 @@ workflow phenopackets_json { json_document = json_document, katsu_url = katsu_url, dataset_id = dataset_id, - token = secret__access_token + token = secret__access_token, + ingest_report = "~{run_dir}/ingest_report.json" } output { File stdout = ingest_task.txt_output File stderr = ingest_task.err_output + File stderr = ingest_task.err_output } } @@ -30,13 +32,16 @@ task ingest_task { String katsu_url String dataset_id String token + String ingest_report } command <<< RESPONSE=$(curl -X POST -k -s -w "%{http_code}" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer ~{token}" \ --data "@~{json_document}" \ + -o "~{ingest_report}" \ "~{katsu_url}/ingest/~{dataset_id}/phenopackets_json") + if [[ "${RESPONSE}" != "201" ]] then echo "Error: Metadata service replied with ${RESPONSE}" 1>&2 # to stderr @@ -48,5 +53,6 @@ task ingest_task { output { File txt_output = stdout() File err_output = stderr() + File ingest_report = "~{ingest_report}" } } From f9fa0f9930875a170eb5a273db248f84a25bebb2 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 18 Jan 2024 16:30:23 -0500 Subject: [PATCH 23/25] fix migrations, lint --- chord_metadata_service/chord/ingest/schema.py | 2 -- .../experiments/migrations/{0009_v4_1_0.py => 0010_v6_2_0.py} | 2 +- chord_metadata_service/phenopackets/schemas.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) rename chord_metadata_service/experiments/migrations/{0009_v4_1_0.py => 0010_v6_2_0.py} (96%) diff --git a/chord_metadata_service/chord/ingest/schema.py b/chord_metadata_service/chord/ingest/schema.py index dbf531a64..577ad1e5a 100644 --- a/chord_metadata_service/chord/ingest/schema.py +++ b/chord_metadata_service/chord/ingest/schema.py @@ -1,4 +1,3 @@ -from ctypes import Array from jsonschema import Draft7Validator from jsonschema.exceptions import ValidationError @@ -32,4 +31,3 @@ def schema_validation(obj, schema, registry=None): for i, error in enumerate(errors, 1): logger.error(f"{i} Validation error in {'.'.join(str(v) for v in error.path)}: {error.message}") return errors - diff --git a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py similarity index 96% rename from chord_metadata_service/experiments/migrations/0009_v4_1_0.py rename to chord_metadata_service/experiments/migrations/0010_v6_2_0.py index 532935887..edadedcad 100644 --- a/chord_metadata_service/experiments/migrations/0009_v4_1_0.py +++ b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py @@ -24,7 +24,7 @@ def set_experiment_library(apps, _schema_editor): class Migration(migrations.Migration): dependencies = [ - ('experiments', '0007_v4_0_0'), + ('experiments', '0009_v6_0_0'), ] operations = [ diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py index 51be759b9..a5dc4d042 100644 --- a/chord_metadata_service/phenopackets/schemas.py +++ b/chord_metadata_service/phenopackets/schemas.py @@ -658,7 +658,7 @@ "required": ["id", "meta_data"], }, descriptions.PHENOPACKET) -VRS_REF_RESOURCE = Resource.from_contents(contents=vrs_schema_definitions, default_specification=DRAFT_07) +VRS_REF_RESOURCE = Resource.from_contents(contents=vrs_schema_definitions) VRS_REF_REGISTRY = VRS_REF_RESOURCE @ Registry() resolver = VRS_REF_REGISTRY.resolver() From 4148d36823c04432a7838ff6f98df1b0f0e701ea Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 18 Jan 2024 16:59:49 -0500 Subject: [PATCH 24/25] schema changes version update --- chord_metadata_service/chord/ingest/exceptions.py | 2 +- .../experiments/migrations/0010_v6_2_0.py | 6 +++--- chord_metadata_service/experiments/schemas.py | 2 +- pyproject.toml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/chord/ingest/exceptions.py b/chord_metadata_service/chord/ingest/exceptions.py index a359d36fe..7f362a715 100644 --- a/chord_metadata_service/chord/ingest/exceptions.py +++ b/chord_metadata_service/chord/ingest/exceptions.py @@ -84,7 +84,7 @@ def parse_schema_warnings(data: dict, schema: dict) -> Optional[List[dict]]: if not data or not schema: return None - data_type = schema.get("$id", "").split(":")[-1] + data_type = schema.get("$id", "").split("/")[-1] applicable_changes = DATA_TYPE_SCHEMA_CHANGES.get(data_type, None) if not applicable_changes or __version__ not in applicable_changes: diff --git a/chord_metadata_service/experiments/migrations/0010_v6_2_0.py b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py index edadedcad..c8754cc66 100644 --- a/chord_metadata_service/experiments/migrations/0010_v6_2_0.py +++ b/chord_metadata_service/experiments/migrations/0010_v6_2_0.py @@ -2,9 +2,9 @@ from django.db import migrations from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA_CHANGES -V4_1_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["4.1.0"]["properties"] -LIB_STRATEGY_CONVERSIONS = V4_1_0_PROPERTIES["library_strategy"] -LIB_SELECTION_CONVERIONS = V4_1_0_PROPERTIES["library_selection"] +V6_2_0_PROPERTIES = EXPERIMENT_SCHEMA_CHANGES["6.2.0"]["properties"] +LIB_STRATEGY_CONVERSIONS = V6_2_0_PROPERTIES["library_strategy"] +LIB_SELECTION_CONVERIONS = V6_2_0_PROPERTIES["library_selection"] def set_experiment_library(apps, _schema_editor): diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 2c7724ef5..92b780ea0 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -187,7 +187,7 @@ Dictionary of schema changes for warnings. """ EXPERIMENT_SCHEMA_CHANGES = { - "4.1.0": { + "6.2.0": { "properties": { "library_strategy": [ ("WES", "WXS"), diff --git a/pyproject.toml b/pyproject.toml index e4d4cae3f..3160308fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "chord_metadata_service" # can be renamed to katsu if inner module directory is renamed too -version = "6.0.0" +version = "6.2.0" description = "An implementation of a clin/pheno metadata store for the Bento platform." authors = [ "Ksenia Zaytseva", From bbef730d1db8042583083ce4d70d5e0071c6a929 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 19 Jan 2024 12:19:06 -0500 Subject: [PATCH 25/25] fix api ingest tests --- .../example_experiment bad_resource.json | 78 ------------------- .../chord/tests/example_ingest.py | 1 - .../chord/tests/test_api_ingest.py | 13 ++-- 3 files changed, 6 insertions(+), 86 deletions(-) delete mode 100644 chord_metadata_service/chord/tests/example_experiment bad_resource.json diff --git a/chord_metadata_service/chord/tests/example_experiment bad_resource.json b/chord_metadata_service/chord/tests/example_experiment bad_resource.json deleted file mode 100644 index 910f42417..000000000 --- a/chord_metadata_service/chord/tests/example_experiment bad_resource.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "experiments": [ - { - "id": "experiment:1", - "biosample": "sample1", - "study_type": "Epigenomics", - "experiment_type": "Other", - "experiment_ontology": [ - { - "id": "http://www.ebi.ac.uk/efo/EFO_0002692", - "label": "ChIP-seq" - } - ], - "library_strategy": "ChIP-Seq", - "library_source": "Genomic", - "library_selection": "RANDOM", - "library_layout": "Single", - "extraction_protocol": "NGS", - "molecule": "genomic DNA", - "molecule_ontology": [ - { - "id": "SO:0000991", - "label": "genomic DNA" - } - ], - "experiment_results": [ - { - "identifier": "sample1_01", - "description": "test", - "filename": "sample1_01.vcf.gz", - "file_format": "VCF", - "data_output_type": "Derived data", - "usage": "Visualized", - "creation_date": "01-09-2021", - "created_by": "Admin", - "extra_properties": { - "test": "test" - } - }, - { - "identifier": "sample1_02", - "description": "test2", - "filename": "sample1_02.vcf.gz", - "file_format": "CRAM", - "data_output_type": "Raw data", - "usage": "Visualized", - "creation_date": "01-09-2021", - "created_by": "Admin", - "extra_properties": { - "test": "test" - } - } - ], - "instrument": { - "identifier": "instrument:01", - "platform": "Illumina", - "description": "Test description", - "model": "Illumina HiSeq 4000", - "extra_properties": { - "date": "2021-06-21" - } - }, - "extra_properties": { - "date_uploaded": "2021-03-16" - } - } - ], - "resources": [ - { - "name": "Sequence types and features ontology", - "version": "THIS_VALUE_BREAKS_VALIDATION", - "namespace_prefix": "SO", - "id": "SO:2021-02-16", - "iri_prefix": "http://purl.obolibrary.org/obo/so.owl#", - "url": "http://purl.obolibrary.org/obo/so.owl" - } - ] -} diff --git a/chord_metadata_service/chord/tests/example_ingest.py b/chord_metadata_service/chord/tests/example_ingest.py index 4511f6527..3d57e6024 100644 --- a/chord_metadata_service/chord/tests/example_ingest.py +++ b/chord_metadata_service/chord/tests/example_ingest.py @@ -16,7 +16,6 @@ EXAMPLE_INGEST_EXPERIMENT = load_local_json("example_experiment.json") EXAMPLE_INGEST_EXPERIMENT_BAD_BIOSAMPLE = load_local_json("example_experiment_bad_biosample.json") -EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE = load_local_json("example_experiment bad_resource.json") EXAMPLE_INGEST_INVALID_EXPERIMENT = load_local_json("example_invalid_experiment.json") EXAMPLE_INGEST_EXPERIMENT_RESULT = load_local_json("example_derived_experiment_result.json") diff --git a/chord_metadata_service/chord/tests/test_api_ingest.py b/chord_metadata_service/chord/tests/test_api_ingest.py index e080d5ef5..e90bc4196 100644 --- a/chord_metadata_service/chord/tests/test_api_ingest.py +++ b/chord_metadata_service/chord/tests/test_api_ingest.py @@ -5,8 +5,7 @@ from rest_framework import status from rest_framework.test import APITestCase from chord_metadata_service.chord.tests.example_ingest import EXAMPLE_INGEST_EXPERIMENT, \ - EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE, EXAMPLE_INGEST_INVALID_EXPERIMENT, \ - EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET + EXAMPLE_INGEST_INVALID_EXPERIMENT, EXAMPLE_INGEST_INVALID_PHENOPACKET, EXAMPLE_INGEST_PHENOPACKET from .constants import VALID_PROJECT_1, valid_dataset_1 from ..workflows.metadata import workflow_set, WORKFLOW_PHENOPACKETS_JSON @@ -75,7 +74,7 @@ def test_phenopackets_ingest(self): c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(c["success"], False) - self.assertEqual(len(c["errors"]), 1) + self.assertEqual(len(c["errors"]), 2) # 2 required properties missing: 'id' and 'meta_data' # Bad ingestion body JSON r = self.client.post( @@ -97,7 +96,7 @@ def test_phenopackets_ingest(self): c = r.json() self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(c["success"], False) - self.assertEqual(len(c["errors"]), 2) + self.assertEqual(len(c["errors"]), 1) # missing required phenopacket ID # Success r = self.client.post( @@ -183,16 +182,16 @@ def test_experiments_ingest_failures(self): self.assertEqual(len(c["errors"]), 1) self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) - # Invalid resource + # Missing biosample r = self.client.post( reverse("ingest-into-dataset", args=(self.dataset["identifier"], "experiments_json")), content_type="application/json", - data=json.dumps(EXAMPLE_INGEST_EXPERIMENT_BAD_RESOURCE), + data=json.dumps(EXAMPLE_INGEST_EXPERIMENT), ) c = r.json() self.assertEqual(c["success"], False) self.assertEqual(len(c["errors"]), 1) - self.assertEqual(r.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(r.status_code, status.HTTP_500_INTERNAL_SERVER_ERROR) def test_experiment_ingest_success(self): # Create the required phenopacket with a biosample first