diff --git a/chord_metadata_service/chord/api_views.py b/chord_metadata_service/chord/api_views.py index 3886ce984..1c05e3b90 100644 --- a/chord_metadata_service/chord/api_views.py +++ b/chord_metadata_service/chord/api_views.py @@ -31,6 +31,7 @@ from chord_metadata_service.restapi.utils import response_optionally_as_attachment from .models import Project, Dataset, ProjectJsonSchema +from .related_fields import DATASET_PREFETCH, PROJECT_PREFETCH from .serializers import ( ProjectJsonSchemaSerializer, ProjectSerializer, @@ -74,7 +75,7 @@ class ProjectViewSet(CHORDPublicModelViewSet): Create a new project """ - queryset = Project.objects.all().order_by("identifier") + queryset = Project.objects.all().order_by("identifier").prefetch_related(*PROJECT_PREFETCH) serializer_class = ProjectSerializer @async_to_sync @@ -130,7 +131,7 @@ class DatasetViewSet(CHORDPublicModelViewSet): serializer_class = DatasetSerializer renderer_classes = tuple(CHORDPublicModelViewSet.renderer_classes) + (JSONLDDatasetRenderer, RDFDatasetRenderer,) - queryset = Dataset.objects.all().order_by("title") + queryset = Dataset.objects.all().order_by("title").prefetch_related(*DATASET_PREFETCH) @action(detail=True, methods=['get']) def dats(self, request: DrfRequest, *_args, **_kwargs): diff --git a/chord_metadata_service/chord/related_fields.py b/chord_metadata_service/chord/related_fields.py new file mode 100644 index 000000000..543a247d9 --- /dev/null +++ b/chord_metadata_service/chord/related_fields.py @@ -0,0 +1,3 @@ +DATASET_PREFETCH = ("additional_resources",) + +PROJECT_PREFETCH = ("project_schemas", "datasets", *(f"datasets__{p}" for p in DATASET_PREFETCH)) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index a414a64f8..6766ebfd9 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -28,14 +28,12 @@ from chord_metadata_service.discovery.scope import ValidatedDiscoveryScope, get_request_discovery_scope -from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH from chord_metadata_service.experiments.models import Experiment from chord_metadata_service.experiments.serializers import ExperimentSerializer from chord_metadata_service.experiments.summaries import dt_experiment_summary from chord_metadata_service.logger import logger as katsu_logger -from chord_metadata_service.phenopackets.api_views import PHENOPACKET_SELECT_REL, PHENOPACKET_PREFETCH from chord_metadata_service.phenopackets.models import Phenopacket from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer from chord_metadata_service.phenopackets.summaries import dt_phenopacket_summary @@ -90,23 +88,26 @@ async def experiment_query_results( ): # TODO: possibly a quite inefficient way of doing things... # TODO: Prefetch related biosample or no? - queryset = Experiment.get_model_scoped_queryset(scope).filter( + queryset = Experiment.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").filter( id__in=await sync_to_async(data_type_results)(query, params, "id", logger)) output_format = options.get("output") if options else None if output_format == OUTPUT_FORMAT_VALUES_LIST: return get_values_list(queryset, options) - return queryset.select_related(*EXPERIMENT_SELECT_REL).prefetch_related(*EXPERIMENT_PREFETCH) + return queryset async def phenopacket_query_results( scope: ValidatedDiscoveryScope, query: sql.Composable, params, logger: BoundLogger, options: dict | None = None ): - queryset = Phenopacket.get_model_scoped_queryset(scope).filter( - id__in=await sync_to_async(data_type_results)(query, params, "id", logger)) - output_format = options.get("output") if options else None + + queryset = ( + Phenopacket.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level") + .filter(id__in=await sync_to_async(data_type_results)(query, params, "id", logger)) + ) + if output_format == OUTPUT_FORMAT_VALUES_LIST: return get_values_list(queryset, options) @@ -136,7 +137,7 @@ async def phenopacket_query_results( return results else: - return queryset.select_related(*PHENOPACKET_SELECT_REL).prefetch_related(*PHENOPACKET_PREFETCH) + return queryset QUERY_RESULTS_FN: dict[ diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 796f8da7d..ba26d2590 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -39,7 +39,7 @@ from .filtering import discovery_filter_queryset from .full_text_search import full_text_search_vector from .matches import DISCOVERY_ENTITY_TO_MATCH_FN, DISCOVERY_ENTITY_TO_CSV_RENDERER -from .model_lookups import DISCOVERY_ENTITY_NAMES_TO_DATA_TYPE +from .model_lookups import DISCOVERY_ENTITY_NAMES_TO_DATA_TYPE, DISCOVERY_ENTITY_NAMES_TO_MODEL from .pydantic_models import ( DiscoveryFieldResponse, DiscoveryFieldResponses, @@ -195,7 +195,10 @@ async def get_query_queryset_and_queried_entities( entity: DiscoveryEntity, lg: BoundLogger | None = None, validate_field: bool = True, + for_full_response: bool = False, ) -> tuple[QuerySet, frozenset[DiscoveryEntity]]: + lg = (lg or self._logger).bind(entity=entity) + # We use an async lock here to prevent executing the same entity query multiple times if we have parallel async # requests happening (liable to happen with field-level data collection in discovery_field_response, where we do # an asyncio.gather across all the fields). @@ -203,9 +206,8 @@ async def get_query_queryset_and_queried_entities( # "promise"/awaitable if one already exists. async with self._queryset_locks[entity]: if entity not in self._queryset_cache: - await (lg or self._logger).adebug( + await lg.adebug( "QueryHelper executing query", - entity=entity, query=self._query, cache_keys=tuple(self._queryset_cache.keys()), ) @@ -213,7 +215,16 @@ async def get_query_queryset_and_queried_entities( entity, lg, validate_field=validate_field ) - return self._queryset_cache[entity] + qs, es = self._queryset_cache[entity] + if for_full_response: + await lg.adebug("adding full response prefetch/select_related") + m = DISCOVERY_ENTITY_NAMES_TO_MODEL[entity] + qs = ( + qs.prefetch_related(*m.get_prefetch(top_level=True)) + .select_related(*m.get_select_related()) + ) + + return qs, es async def _get_entity_counts(self) -> EntityCounts: """ @@ -671,7 +682,7 @@ async def discovery_matches( try: query = DiscoveryQuery.from_drf_request(request) qh = QueryHelper(query, scope, dt_permissions, lg) - queryset, _ = await qh.get_query_queryset_and_queried_entities(queried_entity) + queryset, _ = await qh.get_query_queryset_and_queried_entities(queried_entity, for_full_response=True) queryset = queryset.order_by("pk") except ValidationError as e: return await dres.django_validation_error( diff --git a/chord_metadata_service/discovery/scopeable_model.py b/chord_metadata_service/discovery/scopeable_model.py index ad8764974..3fcb6e668 100644 --- a/chord_metadata_service/discovery/scopeable_model.py +++ b/chord_metadata_service/discovery/scopeable_model.py @@ -29,6 +29,22 @@ def get_scope_filters() -> ModelScopeFilters: # pragma: no cover """ pass + @staticmethod + def get_select_related() -> tuple[str, ...]: + """ + Returns a tuple of Django-formatted field paths to pass to .select_related(...) when querying this model for + "deep" access/serialization purposes. + """ + return () + + @staticmethod + def get_prefetch(top_level: bool) -> tuple[str, ...]: + """ + Returns a tuple of Django-formatted field paths to pass to .prefetch_related(...) when querying this model for + "deep" access/serialization purposes. + """ + return () + async def scope_contains_object(self, scope: ValidatedDiscoveryScope) -> bool: """ Returns whether the scoped queryset for the model and the passed scope contains this particular object. @@ -36,47 +52,87 @@ async def scope_contains_object(self, scope: ValidatedDiscoveryScope) -> bool: """ return await self.get_model_scoped_queryset(scope).filter(pk=self.pk).aexists() + @staticmethod + def _query_for_one_or_more_paths_to_the_same_field(field: str | tuple[str, ...], value: str) -> Q: + """ + Helper utility for get_model_scoped_queryset(...). Builds a Django Q object using one or more paths to a field + holding the same semantic information (e.g., one or multiple paths to the dataset ID field) that should be + filtered to a specific value (e.g., a specific dataset ID). + """ + q: Q + if isinstance(field, tuple): + # If filter is a tuple, the field contains multiple filters that are ORed together. This is useful for, + # e.g., the Resource model, where there are multiple possible paths one can take from the object to the + # parent dataset(s). + q = Q(**{field[0]: value}) + for fq in field[1:]: + q = q | Q(**{fq: value}) + else: + # Just one filter to get the scoped queryset + q = Q(**{field: value}) + return q + @classmethod - def get_model_scoped_queryset(cls, scope: ValidatedDiscoveryScope) -> QuerySet: + def get_model_scoped_queryset( + cls, + scope: ValidatedDiscoveryScope, + # what related model fields to prefetch/select when building the queryset + # scope_only: only perform prefetches related to scope (`dataset` in most cases, to access dataset.project_id) + # nested: TODO + # top_level: TODO + prefetch_and_select_related: Literal["scope_only", "nested", "top_level"] = "scope_only", + ) -> QuerySet: """ Returns a queryset (and subset) of objects of this model which belong to the passed scope. This method uses the defined get_scope_filters() function to narrow the queryset. """ + # We will progressively build up the queryset by adding prefetch_related/select_related/filters as needed given + # the current scope and level of detail required by the caller. + qs = cls.objects.distinct() + class_scope_filters_and_prefetches = cls.get_scope_filters() base_prefetch_related = class_scope_filters_and_prefetches["base_prefetch_related"] + data_prefetch_related = ( + cls.get_prefetch(top_level=prefetch_and_select_related == "top_level") + if prefetch_and_select_related != "scope_only" + else () + ) + + prefetch_related: list[str] = [*base_prefetch_related, *data_prefetch_related] + + should_select_related = prefetch_and_select_related != "scope_only" filter_scope: PublicScopeFilterKeys - if scope.dataset_id: + value: str + if (dataset_id := scope.dataset_id) is not None: filter_scope = "dataset" - value = scope.dataset_id - elif scope.project_id and not scope.dataset_id: + value = dataset_id + elif (project_id := scope.project_id) is not None: # and dataset_id is None, because of the above branch filter_scope = "project" - value = scope.project_id - else: - return cls.objects.distinct().prefetch_related(*base_prefetch_related) + value = project_id + else: # node-level scope - no filtering to be done, so just return the queryset + qs = qs.prefetch_related(*prefetch_related) + if should_select_related: + qs = qs.select_related(*cls.get_select_related()) + return qs scope_filter_spec = class_scope_filters_and_prefetches[filter_scope] - prefetch = ( - *base_prefetch_related, - *(p for p in scope_filter_spec.get("prefetch_related", ()) if p not in base_prefetch_related) - ) + prefetch_related.extend(p for p in scope_filter_spec.get("prefetch_related", ()) if p not in prefetch_related) - filter_query = scope_filter_spec["filter"] - if isinstance(filter_query, tuple): - # If filter is a tuple, the field contains multiple filters that are ORed together. This is useful for, - # e.g., the Resource model, where there are multiple possible paths one can take from the object to the - # parent dataset(s). - obj_q = Q(**{filter_query[0]: value}) - for fq in filter_query[1:]: - obj_q = obj_q | Q(**{fq: value}) - else: - # Just one filter to get the scoped queryset - obj_q = Q(**{filter_query: value}) + # We now have all prefetch_related/select_related fields we need based on the current parameters, so we can add + # them to the queryset: + qs = qs.prefetch_related(*prefetch_related) + if should_select_related: + qs = qs.select_related(*cls.get_select_related()) + + # Finally, we need to build a filter query for the current discovery scope: + filter_query = cls._query_for_one_or_more_paths_to_the_same_field(scope_filter_spec["filter"], value) - return cls.objects.distinct().prefetch_related(*prefetch).filter(obj_q) + # ... and we can return the finalized queryset: + return qs.filter(filter_query) # Common model scope filters for phenopacket + experiment, which share a top-level dataset property. diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index fdc377f4b..d3d6f6bc9 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -22,6 +22,7 @@ from .serializers import ExperimentSerializer, ExperimentResultSerializer from .models import Experiment, ExperimentResult +from .related_fields import EXPERIMENT_PREFETCH, EXPERIMENT_SELECT_REL from .schemas import EXPERIMENT_SCHEMA, experiment_resolver, experiment_base_uri from .filters import ExperimentFilter, ExperimentResultFilter @@ -33,16 +34,6 @@ ] -EXPERIMENT_SELECT_REL = ( - "instrument", -) - -EXPERIMENT_PREFETCH = ( - "experiment_results", - "biosample__individual" -) - - class ExperimentViewSet(BentoAuthzScopedModelViewSet): """ get: @@ -63,13 +54,8 @@ class ExperimentViewSet(BentoAuthzScopedModelViewSet): @async_to_sync async def get_queryset(self): - return ( - Experiment - .get_model_scoped_queryset(await get_request_discovery_scope(self.request)) - .select_related(*EXPERIMENT_SELECT_REL) - .prefetch_related(*EXPERIMENT_PREFETCH) - .order_by("id") - ) + scope = await get_request_discovery_scope(self.request) + return Experiment.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").order_by("id") class ExperimentBatchViewSet(BentoAuthzScopedModelGenericListViewSet): @@ -92,12 +78,14 @@ class ExperimentBatchViewSet(BentoAuthzScopedModelGenericListViewSet): async def _get_filtered_queryset(self, ids_list: list[str] | None = None): # We pre-filter experiments to the scope. This way, if they specify an ID outside the scope, it's just ignored # - the requester won't even know if it exists. - queryset = Experiment.get_model_scoped_queryset(await get_request_discovery_scope(self.request)) + queryset = Experiment.get_model_scoped_queryset( + await get_request_discovery_scope(self.request), prefetch_and_select_related="top_level" + ) if ids_list: queryset = queryset.filter(id__in=ids_list) - return queryset.select_related(*EXPERIMENT_SELECT_REL).prefetch_related(*EXPERIMENT_PREFETCH).order_by("id") + return queryset.order_by("id") @async_to_sync async def get_queryset(self): @@ -142,7 +130,9 @@ class ExperimentResultViewSet(BentoAuthzScopedModelViewSet): async def get_queryset(self): return ( ExperimentResult - .get_model_scoped_queryset(await get_request_discovery_scope(self.request)) + .get_model_scoped_queryset( + await get_request_discovery_scope(self.request), prefetch_and_select_related="top_level" + ) .order_by("id") ) diff --git a/chord_metadata_service/experiments/models.py b/chord_metadata_service/experiments/models.py index 0fd5201f8..434c28a4b 100644 --- a/chord_metadata_service/experiments/models.py +++ b/chord_metadata_service/experiments/models.py @@ -17,6 +17,7 @@ from chord_metadata_service.phenopackets.models import Biosample from . import descriptions as d +from .related_fields import EXPERIMENT_PREFETCH, EXPERIMENT_SELECT_REL from .validators import file_index_list_validator __all__ = ["Experiment", "ExperimentResult", "Instrument"] @@ -41,6 +42,14 @@ class Experiment(BaseScopeableModel, BaseFTSModel, IndexableMixin): def get_scope_filters() -> ModelScopeFilters: return TOP_LEVEL_MODEL_SCOPE_FILTERS + @staticmethod + def get_select_related() -> tuple[str, ...]: + return EXPERIMENT_SELECT_REL + + @staticmethod + def get_prefetch(top_level: bool) -> tuple[str, ...]: + return EXPERIMENT_PREFETCH + id = CharField( primary_key=True, max_length=200, help_text=rec_help(d.EXPERIMENT, "id") ) diff --git a/chord_metadata_service/experiments/related_fields.py b/chord_metadata_service/experiments/related_fields.py new file mode 100644 index 000000000..12bbfebea --- /dev/null +++ b/chord_metadata_service/experiments/related_fields.py @@ -0,0 +1,8 @@ +EXPERIMENT_SELECT_REL = ( + "instrument", +) + +EXPERIMENT_PREFETCH = ( + "experiment_results", + "biosample__individual" +) diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index 78ef87b2c..aa6b934eb 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -30,10 +30,10 @@ from chord_metadata_service.discovery.stats import individual_biosample_tissue_stats, individual_experiment_type_stats from chord_metadata_service.discovery.utils import get_discovery_data_type_permissions from chord_metadata_service.logger import logger -from chord_metadata_service.phenopackets.api_views import ( +from chord_metadata_service.phenopackets.models import Phenopacket +from chord_metadata_service.phenopackets.related_fields import ( BIOSAMPLE_PREFETCH, BIOSAMPLE_SELECT_REL, PHENOPACKET_PREFETCH, PHENOPACKET_SELECT_REL ) -from chord_metadata_service.phenopackets.models import Phenopacket from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer from chord_metadata_service.restapi.api_renderers import ( PhenopacketsRenderer, @@ -91,14 +91,7 @@ def permission_from_request(self, request: DrfRequest) -> Permission | None: @async_to_sync async def get_queryset(self): scope = await get_request_discovery_scope(self.request) - return ( - Individual.get_model_scoped_queryset(scope) - .prefetch_related( - *(f"biosamples__{p}" for p in BIOSAMPLE_PREFETCH), - *(f"phenopackets__{p}" for p in PHENOPACKET_PREFETCH if p != "subject"), - ) - .order_by("id") - ) + return Individual.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").order_by("id") def list(self, request, *args, **kwargs): if request.query_params.get("format") == OUTPUT_FORMAT_BENTO_SEARCH_RESULT: @@ -120,8 +113,7 @@ def list(self, request, *args, **kwargs): biosamples_experiments_details = get_biosamples_with_experiment_details(individual_ids) qs = ( Phenopacket - .get_model_scoped_queryset(scope) - .prefetch_related("dataset__project") + .get_model_scoped_queryset(scope, prefetch_and_select_related="top_level") .filter(subject__id__in=individual_ids) .values( "subject_id", @@ -157,10 +149,8 @@ def phenopackets(self, request: DrfRequest, *_args, **_kwargs): individual = self.get_object() phenopackets = ( - Phenopacket.get_model_scoped_queryset(scope) + Phenopacket.get_model_scoped_queryset(scope, prefetch_and_select_related="nested") .filter(subject=individual) - .prefetch_related(*PHENOPACKET_PREFETCH) - .select_related(*PHENOPACKET_SELECT_REL) .annotate(project=F("dataset__project_id")) .order_by("id") ) @@ -202,7 +192,7 @@ async def get_queryset(self): *(f"phenopackets__{p}" for p in PHENOPACKET_PREFETCH), *(f"phenopackets__{p}" for p in PHENOPACKET_SELECT_REL), ) - .select_related("vital_status") + .select_related(*Individual.get_select_related()) .filter(**filter_by_id) .order_by("id") ) diff --git a/chord_metadata_service/patients/models.py b/chord_metadata_service/patients/models.py index 552489f36..021e44dab 100644 --- a/chord_metadata_service/patients/models.py +++ b/chord_metadata_service/patients/models.py @@ -5,9 +5,11 @@ from chord_metadata_service.discovery.scopeable_model import BaseScopeableModel from chord_metadata_service.discovery.full_text_search import BaseFTSModel, ToFTSReprMixin from chord_metadata_service.discovery.types import ModelScopeFilters +from chord_metadata_service.phenopackets.related_fields import PHENOPACKET_PREFETCH from chord_metadata_service.restapi.models import BaseTimeStamp, IndexableMixin, SchemaType, BaseExtraProperties from chord_metadata_service.restapi.schema_ref import SchemaRefs from chord_metadata_service.restapi.validators import JsonSchemaValidator, ontology_validator +from .related_fields import INDIVIDUAL_SELECT_REL from .values import PatientStatus, Sex, KaryotypicSex @@ -47,6 +49,19 @@ def get_scope_filters() -> ModelScopeFilters: } } + @staticmethod + def get_select_related() -> tuple[str, ...]: + return INDIVIDUAL_SELECT_REL + + @staticmethod + def get_prefetch(top_level: bool) -> tuple[str, ...]: + if top_level: + return ( + "phenopackets", + *(f"phenopackets__{p}" for p in PHENOPACKET_PREFETCH if not p.startswith("subject")), + ) + return () + def get_project_id(self) -> str | None: if not self.phenopackets.count(): # Need to wait for phenopacket to exist diff --git a/chord_metadata_service/patients/related_fields.py b/chord_metadata_service/patients/related_fields.py new file mode 100644 index 000000000..9e743f60d --- /dev/null +++ b/chord_metadata_service/patients/related_fields.py @@ -0,0 +1 @@ +INDIVIDUAL_SELECT_REL = ("vital_status",) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index 83a12a8f6..0ca2e922d 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -33,20 +33,6 @@ class PhenopacketsModelViewSet(BentoAuthzScopedModelViewSet): pagination_class = LargeResultsSetPagination -BIOSAMPLE_PREFETCH = ( - "phenotypic_features", - "experiments", - "experiments__experiment_results", - "experiments__instrument", -) - -BIOSAMPLE_SELECT_REL = ( - "individual", - "derived_from_id", - "location_collected", -) - - class BiosampleViewSet(PhenopacketsModelViewSet): """ get: @@ -64,12 +50,8 @@ class BiosampleViewSet(PhenopacketsModelViewSet): # required to have discovery-scope-enabled queryset here to use a BentoAuthzScopedModelViewSet-derived viewset @async_to_sync async def get_queryset(self): - return ( - m.Biosample.get_model_scoped_queryset(await get_request_discovery_scope(self.request)) - .prefetch_related(*BIOSAMPLE_PREFETCH) - .select_related(*BIOSAMPLE_SELECT_REL) - .order_by("id") - ) + scope = await get_request_discovery_scope(self.request) + return m.Biosample.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").order_by("id") class BiosampleBatchViewSet(BentoAuthzScopedModelGenericListViewSet): @@ -99,12 +81,13 @@ class BiosampleBatchViewSet(BentoAuthzScopedModelGenericListViewSet): async def _get_filtered_queryset(self, ids_list: list[str] | None = None): # We pre-filter biosamples to the scope. This way, if they specify an ID outside the scope, it's just ignored # - the requester won't even know if it exists. - queryset = m.Biosample.get_model_scoped_queryset(await get_request_discovery_scope(self.request)) + scope = await get_request_discovery_scope(self.request) + queryset = m.Biosample.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level") if ids_list: queryset = queryset.filter(id__in=ids_list) - return queryset.prefetch_related(*BIOSAMPLE_PREFETCH).select_related(*BIOSAMPLE_SELECT_REL).order_by("id") + return queryset.order_by("id") def get_queryset(self): return self._get_filtered_queryset(ids_list=self.request.data.get("id", None)) @@ -128,28 +111,6 @@ def create(self, request, *args, **kwargs): return Response(serializer.data, status=status.HTTP_200_OK) -PHENOPACKET_PREFETCH = ( - *(f"biosamples__{p}" for p in BIOSAMPLE_PREFETCH), - *(f"biosamples__{p}" for p in BIOSAMPLE_SELECT_REL), - "meta_data__resources", - "diseases", - "phenotypic_features", - "interpretations", - "interpretations__diagnosis", - "interpretations__diagnosis__genomic_interpretations", - "interpretations__diagnosis__genomic_interpretations__biosample", - "interpretations__diagnosis__genomic_interpretations__subject", - "interpretations__diagnosis__genomic_interpretations__gene_descriptor", - "interpretations__diagnosis__genomic_interpretations__variant_interpretation__variation_descriptor", -) - -PHENOPACKET_SELECT_REL = ( - "dataset", - "subject", - "meta_data", -) - - class PhenopacketViewSet(PhenopacketsModelViewSet): """ get: @@ -167,10 +128,9 @@ class PhenopacketViewSet(PhenopacketsModelViewSet): # required to have discovery-scope-enabled queryset here to use a BentoAuthzScopedModelViewSet-derived viewset @async_to_sync async def get_queryset(self): + scope = await get_request_discovery_scope(self.request) return ( - m.Phenopacket.get_model_scoped_queryset(await get_request_discovery_scope(self.request)) - .prefetch_related(*PHENOPACKET_PREFETCH) - .select_related(*PHENOPACKET_SELECT_REL) + m.Phenopacket.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level") .annotate(project=F("dataset__project_id")) .order_by("id") ) diff --git a/chord_metadata_service/phenopackets/models.py b/chord_metadata_service/phenopackets/models.py index 168d47425..dfdbb5007 100644 --- a/chord_metadata_service/phenopackets/models.py +++ b/chord_metadata_service/phenopackets/models.py @@ -18,6 +18,7 @@ ontology_list_validator ) from . import descriptions as d +from .related_fields import BIOSAMPLE_SELECT_REL, PHENOPACKET_SELECT_REL, PHENOPACKET_PREFETCH, BIOSAMPLE_PREFETCH from .utils import time_element_to_str from .validators import vrs_variation_validator @@ -176,6 +177,14 @@ def get_scope_filters() -> ModelScopeFilters: }, } + @staticmethod + def get_select_related() -> tuple[str, ...]: + return BIOSAMPLE_SELECT_REL + + @staticmethod + def get_prefetch(top_level: bool) -> tuple[str, ...]: + return BIOSAMPLE_PREFETCH + id = models.CharField(primary_key=True, max_length=200, help_text=rec_help(d.BIOSAMPLE, "id")) # if Individual instance is deleted Biosample instance is deleted too individual = models.ForeignKey( @@ -545,6 +554,14 @@ def schema_type(self) -> SchemaType: def get_scope_filters() -> ModelScopeFilters: return TOP_LEVEL_MODEL_SCOPE_FILTERS + @staticmethod + def get_select_related() -> tuple[str, ...]: + return PHENOPACKET_SELECT_REL + + @staticmethod + def get_prefetch(top_level: bool) -> tuple[str, ...]: + return PHENOPACKET_PREFETCH + def get_project_id(self) -> str | None: model = apps.get_model("chord.Project") try: diff --git a/chord_metadata_service/phenopackets/related_fields.py b/chord_metadata_service/phenopackets/related_fields.py new file mode 100644 index 000000000..c6b82d78e --- /dev/null +++ b/chord_metadata_service/phenopackets/related_fields.py @@ -0,0 +1,36 @@ +from chord_metadata_service.patients.related_fields import INDIVIDUAL_SELECT_REL + +BIOSAMPLE_PREFETCH = ( + "phenotypic_features", + "experiments", + "experiments__experiment_results", + "experiments__instrument", +) + +BIOSAMPLE_SELECT_REL = ( + "individual", + "derived_from_id", + "location_collected", +) + +PHENOPACKET_PREFETCH = ( + *(f"biosamples__{p}" for p in BIOSAMPLE_PREFETCH), + *(f"biosamples__{p}" for p in BIOSAMPLE_SELECT_REL), + *(f"subject__{p}" for p in INDIVIDUAL_SELECT_REL), + "meta_data__resources", + "diseases", + "phenotypic_features", + "interpretations", + "interpretations__diagnosis", + "interpretations__diagnosis__genomic_interpretations", + "interpretations__diagnosis__genomic_interpretations__biosample", + "interpretations__diagnosis__genomic_interpretations__subject", + "interpretations__diagnosis__genomic_interpretations__gene_descriptor", + "interpretations__diagnosis__genomic_interpretations__variant_interpretation__variation_descriptor", +) + +PHENOPACKET_SELECT_REL = ( + "dataset", + "subject", + "meta_data", +)