Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from chord_metadata_service.restapi.utils import response_optionally_as_attachment

from .models import Project, Dataset, ProjectJsonSchema
from .related_fields import DATASET_PREFETCH, PROJECT_PREFETCH
from .serializers import (
ProjectJsonSchemaSerializer,
ProjectSerializer,
Expand Down Expand Up @@ -74,7 +75,7 @@ class ProjectViewSet(CHORDPublicModelViewSet):
Create a new project
"""

queryset = Project.objects.all().order_by("identifier")
queryset = Project.objects.all().order_by("identifier").prefetch_related(*PROJECT_PREFETCH)
serializer_class = ProjectSerializer

@async_to_sync
Expand Down Expand Up @@ -130,7 +131,7 @@ class DatasetViewSet(CHORDPublicModelViewSet):

serializer_class = DatasetSerializer
renderer_classes = tuple(CHORDPublicModelViewSet.renderer_classes) + (JSONLDDatasetRenderer, RDFDatasetRenderer,)
queryset = Dataset.objects.all().order_by("title")
queryset = Dataset.objects.all().order_by("title").prefetch_related(*DATASET_PREFETCH)

@action(detail=True, methods=['get'])
def dats(self, request: DrfRequest, *_args, **_kwargs):
Expand Down
3 changes: 3 additions & 0 deletions chord_metadata_service/chord/related_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DATASET_PREFETCH = ("additional_resources",)

PROJECT_PREFETCH = ("project_schemas", "datasets", *(f"datasets__{p}" for p in DATASET_PREFETCH))
17 changes: 9 additions & 8 deletions chord_metadata_service/chord/views_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,12 @@

from chord_metadata_service.discovery.scope import ValidatedDiscoveryScope, get_request_discovery_scope

from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH
from chord_metadata_service.experiments.models import Experiment
from chord_metadata_service.experiments.serializers import ExperimentSerializer
from chord_metadata_service.experiments.summaries import dt_experiment_summary

from chord_metadata_service.logger import logger as katsu_logger

from chord_metadata_service.phenopackets.api_views import PHENOPACKET_SELECT_REL, PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
from chord_metadata_service.phenopackets.serializers import PhenopacketSerializer
from chord_metadata_service.phenopackets.summaries import dt_phenopacket_summary
Expand Down Expand Up @@ -90,23 +88,26 @@ async def experiment_query_results(
):
# TODO: possibly a quite inefficient way of doing things...
# TODO: Prefetch related biosample or no?
queryset = Experiment.get_model_scoped_queryset(scope).filter(
queryset = Experiment.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").filter(
id__in=await sync_to_async(data_type_results)(query, params, "id", logger))

output_format = options.get("output") if options else None
if output_format == OUTPUT_FORMAT_VALUES_LIST:
return get_values_list(queryset, options)

return queryset.select_related(*EXPERIMENT_SELECT_REL).prefetch_related(*EXPERIMENT_PREFETCH)
return queryset


async def phenopacket_query_results(
scope: ValidatedDiscoveryScope, query: sql.Composable, params, logger: BoundLogger, options: dict | None = None
):
queryset = Phenopacket.get_model_scoped_queryset(scope).filter(
id__in=await sync_to_async(data_type_results)(query, params, "id", logger))

output_format = options.get("output") if options else None

queryset = (
Phenopacket.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level")
.filter(id__in=await sync_to_async(data_type_results)(query, params, "id", logger))
)

if output_format == OUTPUT_FORMAT_VALUES_LIST:
return get_values_list(queryset, options)

Expand Down Expand Up @@ -136,7 +137,7 @@ async def phenopacket_query_results(

return results
else:
return queryset.select_related(*PHENOPACKET_SELECT_REL).prefetch_related(*PHENOPACKET_PREFETCH)
return queryset


QUERY_RESULTS_FN: dict[
Expand Down
21 changes: 16 additions & 5 deletions chord_metadata_service/discovery/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from .filtering import discovery_filter_queryset
from .full_text_search import full_text_search_vector
from .matches import DISCOVERY_ENTITY_TO_MATCH_FN, DISCOVERY_ENTITY_TO_CSV_RENDERER
from .model_lookups import DISCOVERY_ENTITY_NAMES_TO_DATA_TYPE
from .model_lookups import DISCOVERY_ENTITY_NAMES_TO_DATA_TYPE, DISCOVERY_ENTITY_NAMES_TO_MODEL
from .pydantic_models import (
DiscoveryFieldResponse,
DiscoveryFieldResponses,
Expand Down Expand Up @@ -195,25 +195,36 @@ async def get_query_queryset_and_queried_entities(
entity: DiscoveryEntity,
lg: BoundLogger | None = None,
validate_field: bool = True,
for_full_response: bool = False,
) -> tuple[QuerySet, frozenset[DiscoveryEntity]]:
lg = (lg or self._logger).bind(entity=entity)

# We use an async lock here to prevent executing the same entity query multiple times if we have parallel async
# requests happening (liable to happen with field-level data collection in discovery_field_response, where we do
# an asyncio.gather across all the fields).
# Combining the lock with the caching mechanism means this is roughly equivalent to re-using the same
# "promise"/awaitable if one already exists.
async with self._queryset_locks[entity]:
if entity not in self._queryset_cache:
await (lg or self._logger).adebug(
await lg.adebug(
"QueryHelper executing query",
entity=entity,
query=self._query,
cache_keys=tuple(self._queryset_cache.keys()),
)
self._queryset_cache[entity] = await self._execute_discovery_query(
entity, lg, validate_field=validate_field
)

return self._queryset_cache[entity]
qs, es = self._queryset_cache[entity]
if for_full_response:
await lg.adebug("adding full response prefetch/select_related")
m = DISCOVERY_ENTITY_NAMES_TO_MODEL[entity]
qs = (
qs.prefetch_related(*m.get_prefetch(top_level=True))
.select_related(*m.get_select_related())
)

return qs, es

async def _get_entity_counts(self) -> EntityCounts:
"""
Expand Down Expand Up @@ -671,7 +682,7 @@ async def discovery_matches(
try:
query = DiscoveryQuery.from_drf_request(request)
qh = QueryHelper(query, scope, dt_permissions, lg)
queryset, _ = await qh.get_query_queryset_and_queried_entities(queried_entity)
queryset, _ = await qh.get_query_queryset_and_queried_entities(queried_entity, for_full_response=True)
queryset = queryset.order_by("pk")
except ValidationError as e:
return await dres.django_validation_error(
Expand Down
102 changes: 79 additions & 23 deletions chord_metadata_service/discovery/scopeable_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,54 +29,110 @@ def get_scope_filters() -> ModelScopeFilters: # pragma: no cover
"""
pass

@staticmethod
def get_select_related() -> tuple[str, ...]:
"""
Returns a tuple of Django-formatted field paths to pass to .select_related(...) when querying this model for
"deep" access/serialization purposes.
"""
return ()

@staticmethod
def get_prefetch(top_level: bool) -> tuple[str, ...]:
"""
Returns a tuple of Django-formatted field paths to pass to .prefetch_related(...) when querying this model for
"deep" access/serialization purposes.
"""
return ()

async def scope_contains_object(self, scope: ValidatedDiscoveryScope) -> bool:
"""
Returns whether the scoped queryset for the model and the passed scope contains this particular object.
Useful for checking permissions.
"""
return await self.get_model_scoped_queryset(scope).filter(pk=self.pk).aexists()

@staticmethod
def _query_for_one_or_more_paths_to_the_same_field(field: str | tuple[str, ...], value: str) -> Q:
"""
Helper utility for get_model_scoped_queryset(...). Builds a Django Q object using one or more paths to a field
holding the same semantic information (e.g., one or multiple paths to the dataset ID field) that should be
filtered to a specific value (e.g., a specific dataset ID).
"""
q: Q
if isinstance(field, tuple):
# If filter is a tuple, the field contains multiple filters that are ORed together. This is useful for,
# e.g., the Resource model, where there are multiple possible paths one can take from the object to the
# parent dataset(s).
q = Q(**{field[0]: value})
for fq in field[1:]:
q = q | Q(**{fq: value})
else:
# Just one filter to get the scoped queryset
q = Q(**{field: value})
return q

@classmethod
def get_model_scoped_queryset(cls, scope: ValidatedDiscoveryScope) -> QuerySet:
def get_model_scoped_queryset(
cls,
scope: ValidatedDiscoveryScope,
# what related model fields to prefetch/select when building the queryset
# scope_only: only perform prefetches related to scope (`dataset` in most cases, to access dataset.project_id)
# nested: TODO
# top_level: TODO
prefetch_and_select_related: Literal["scope_only", "nested", "top_level"] = "scope_only",
) -> QuerySet:
"""
Returns a queryset (and subset) of objects of this model which belong to the passed scope. This method uses the
defined get_scope_filters() function to narrow the queryset.
"""

# We will progressively build up the queryset by adding prefetch_related/select_related/filters as needed given
# the current scope and level of detail required by the caller.
qs = cls.objects.distinct()

class_scope_filters_and_prefetches = cls.get_scope_filters()

base_prefetch_related = class_scope_filters_and_prefetches["base_prefetch_related"]
data_prefetch_related = (
cls.get_prefetch(top_level=prefetch_and_select_related == "top_level")
if prefetch_and_select_related != "scope_only"
else ()
)

prefetch_related: list[str] = [*base_prefetch_related, *data_prefetch_related]

should_select_related = prefetch_and_select_related != "scope_only"

filter_scope: PublicScopeFilterKeys
if scope.dataset_id:
value: str
if (dataset_id := scope.dataset_id) is not None:
filter_scope = "dataset"
value = scope.dataset_id
elif scope.project_id and not scope.dataset_id:
value = dataset_id
elif (project_id := scope.project_id) is not None: # and dataset_id is None, because of the above branch
filter_scope = "project"
value = scope.project_id
else:
return cls.objects.distinct().prefetch_related(*base_prefetch_related)
value = project_id
else: # node-level scope - no filtering to be done, so just return the queryset
qs = qs.prefetch_related(*prefetch_related)
if should_select_related:
qs = qs.select_related(*cls.get_select_related())
return qs

scope_filter_spec = class_scope_filters_and_prefetches[filter_scope]

prefetch = (
*base_prefetch_related,
*(p for p in scope_filter_spec.get("prefetch_related", ()) if p not in base_prefetch_related)
)
prefetch_related.extend(p for p in scope_filter_spec.get("prefetch_related", ()) if p not in prefetch_related)

filter_query = scope_filter_spec["filter"]
if isinstance(filter_query, tuple):
# If filter is a tuple, the field contains multiple filters that are ORed together. This is useful for,
# e.g., the Resource model, where there are multiple possible paths one can take from the object to the
# parent dataset(s).
obj_q = Q(**{filter_query[0]: value})
for fq in filter_query[1:]:
obj_q = obj_q | Q(**{fq: value})
else:
# Just one filter to get the scoped queryset
obj_q = Q(**{filter_query: value})
# We now have all prefetch_related/select_related fields we need based on the current parameters, so we can add
# them to the queryset:
qs = qs.prefetch_related(*prefetch_related)
if should_select_related:
qs = qs.select_related(*cls.get_select_related())

# Finally, we need to build a filter query for the current discovery scope:
filter_query = cls._query_for_one_or_more_paths_to_the_same_field(scope_filter_spec["filter"], value)

return cls.objects.distinct().prefetch_related(*prefetch).filter(obj_q)
# ... and we can return the finalized queryset:
return qs.filter(filter_query)


# Common model scope filters for phenopacket + experiment, which share a top-level dataset property.
Expand Down
30 changes: 10 additions & 20 deletions chord_metadata_service/experiments/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from .serializers import ExperimentSerializer, ExperimentResultSerializer
from .models import Experiment, ExperimentResult
from .related_fields import EXPERIMENT_PREFETCH, EXPERIMENT_SELECT_REL
from .schemas import EXPERIMENT_SCHEMA, experiment_resolver, experiment_base_uri
from .filters import ExperimentFilter, ExperimentResultFilter

Expand All @@ -33,16 +34,6 @@
]


EXPERIMENT_SELECT_REL = (
"instrument",
)

EXPERIMENT_PREFETCH = (
"experiment_results",
"biosample__individual"
)


class ExperimentViewSet(BentoAuthzScopedModelViewSet):
"""
get:
Expand All @@ -63,13 +54,8 @@ class ExperimentViewSet(BentoAuthzScopedModelViewSet):

@async_to_sync
async def get_queryset(self):
return (
Experiment
.get_model_scoped_queryset(await get_request_discovery_scope(self.request))
.select_related(*EXPERIMENT_SELECT_REL)
.prefetch_related(*EXPERIMENT_PREFETCH)
.order_by("id")
)
scope = await get_request_discovery_scope(self.request)
return Experiment.get_model_scoped_queryset(scope, prefetch_and_select_related="top_level").order_by("id")


class ExperimentBatchViewSet(BentoAuthzScopedModelGenericListViewSet):
Expand All @@ -92,12 +78,14 @@ class ExperimentBatchViewSet(BentoAuthzScopedModelGenericListViewSet):
async def _get_filtered_queryset(self, ids_list: list[str] | None = None):
# We pre-filter experiments to the scope. This way, if they specify an ID outside the scope, it's just ignored
# - the requester won't even know if it exists.
queryset = Experiment.get_model_scoped_queryset(await get_request_discovery_scope(self.request))
queryset = Experiment.get_model_scoped_queryset(
await get_request_discovery_scope(self.request), prefetch_and_select_related="top_level"
)

if ids_list:
queryset = queryset.filter(id__in=ids_list)

return queryset.select_related(*EXPERIMENT_SELECT_REL).prefetch_related(*EXPERIMENT_PREFETCH).order_by("id")
return queryset.order_by("id")

@async_to_sync
async def get_queryset(self):
Expand Down Expand Up @@ -142,7 +130,9 @@ class ExperimentResultViewSet(BentoAuthzScopedModelViewSet):
async def get_queryset(self):
return (
ExperimentResult
.get_model_scoped_queryset(await get_request_discovery_scope(self.request))
.get_model_scoped_queryset(
await get_request_discovery_scope(self.request), prefetch_and_select_related="top_level"
)
.order_by("id")
)

Expand Down
9 changes: 9 additions & 0 deletions chord_metadata_service/experiments/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from chord_metadata_service.phenopackets.models import Biosample

from . import descriptions as d
from .related_fields import EXPERIMENT_PREFETCH, EXPERIMENT_SELECT_REL
from .validators import file_index_list_validator

__all__ = ["Experiment", "ExperimentResult", "Instrument"]
Expand All @@ -41,6 +42,14 @@ class Experiment(BaseScopeableModel, BaseFTSModel, IndexableMixin):
def get_scope_filters() -> ModelScopeFilters:
return TOP_LEVEL_MODEL_SCOPE_FILTERS

@staticmethod
def get_select_related() -> tuple[str, ...]:
return EXPERIMENT_SELECT_REL

@staticmethod
def get_prefetch(top_level: bool) -> tuple[str, ...]:
return EXPERIMENT_PREFETCH

id = CharField(
primary_key=True, max_length=200, help_text=rec_help(d.EXPERIMENT, "id")
)
Expand Down
8 changes: 8 additions & 0 deletions chord_metadata_service/experiments/related_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
EXPERIMENT_SELECT_REL = (
"instrument",
)

EXPERIMENT_PREFETCH = (
"experiment_results",
"biosample__individual"
)
Loading