Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
6f0a0bf
feat(chord): validate translation constraints on create/update
SanjeevLakhwani Jun 8, 2026
a3f9391
style: fix E501 line too long in test_api_translations
SanjeevLakhwani Jun 8, 2026
8e9e57c
feat(chord): infer identifier and project on translation from URL
SanjeevLakhwani Jun 8, 2026
396f224
feat(chord): expose available translations in dataset serializer resp…
SanjeevLakhwani Jun 9, 2026
32ecd85
feat(chord): extend translation constraints to all shared fields
SanjeevLakhwani Jun 9, 2026
7f9f2b5
feat(chord): reject discovery config and immutable fields in translat…
SanjeevLakhwani Jun 10, 2026
a00bfac
feat(chord): allow omitting immutable fields in translations
SanjeevLakhwani Jun 10, 2026
8de4a1f
feat(chord): extend translation constraints to discovery and dac_id
SanjeevLakhwani Jun 10, 2026
d7cb3a0
fix(chord): update dac_id field name to pcgl_dac_id in immutable cons…
SanjeevLakhwani Jun 10, 2026
402fe68
docs(chord): add docstring to _roles_for helper
SanjeevLakhwani Jun 15, 2026
72576ba
fix(chord): discovery translation was forced to be not included, immu…
SanjeevLakhwani Jun 15, 2026
12f4f1b
Merge remote-tracking branch 'origin/develop' into feature/dataset-tr…
SanjeevLakhwani Jun 15, 2026
75e3880
chore: trigger CI
SanjeevLakhwani Jun 16, 2026
df31022
Revert "chore: trigger CI"
SanjeevLakhwani Jun 16, 2026
f57f90e
fix(dataset): add discovery to immutable fields to allow omission in …
SanjeevLakhwani Jun 23, 2026
d874052
fix(dataset): block translations from introducing fields absent in ca…
SanjeevLakhwani Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions chord_metadata_service/chord/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def get_queryset(self):
project_id = self.request.query_params.get("project_id")
if project_id:
queryset = queryset.filter(project_id=project_id)
queryset = queryset.prefetch_related("translations")
language = _get_preferred_language(self.request)
if language != "en":
queryset = queryset.prefetch_related(
Expand Down Expand Up @@ -426,7 +427,7 @@ def _list():
)):
return forbidden(request)
authz.mark_authz_done(request)
serializer = DatasetTranslationSerializer(data=request.data)
serializer = DatasetTranslationSerializer(data=request.data, context=self.get_serializer_context())

def _create():
serializer.is_valid(raise_exception=True)
Expand Down Expand Up @@ -460,7 +461,7 @@ async def translation_detail(self, request, language, **kwargs):
if request.method == "DELETE":
await translation.adelete()
return Response(status=status.HTTP_204_NO_CONTENT)
serializer = DatasetTranslationSerializer(translation, data=request.data)
serializer = DatasetTranslationSerializer(translation, data=request.data, context=self.get_serializer_context())

def _update():
serializer.is_valid(raise_exception=True)
Expand Down
101 changes: 101 additions & 0 deletions chord_metadata_service/chord/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def to_representation(self, instance):
data['created_at'] = instance.created_at
data['updated_at'] = instance.updated_at
data['counts_by_entity'] = self.get_counts_by_entity(instance)
data['translations'] = [t.language for t in instance.translations.all()]
return data

def get_counts_by_entity(self, obj):
Expand Down Expand Up @@ -126,6 +127,84 @@ def update(self, instance, validated_data):
return instance


def _roles_for(contact) -> list:
"""Return roles list for a contact, defaulting to empty list."""
return getattr(contact, "roles", []) or []


_IMMUTABLE_FIELDS = frozenset({"version", "release_date", "last_modified", "study_status", "study_context",
Comment thread
SanjeevLakhwani marked this conversation as resolved.
"discovery", "pcgl_dac_id"})


def _check_translation_constraints(translation: ProjectScopedDatasetModel):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is done in serialization, but I feel like it should be done in the actual model save method instead to prevent invalid translations from ever being created, not just in the API (e.g., in a hypothetical CLI.) this is my general preference for MVC apps, to have validation closer to the data. however, if there's a reason this is not as good, i'm fine with being convinced otherwise.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The model is just taking a JSON field, I don't see how close to the model we can get with that. Given that in the past we discussed about the effort to be done in implementing this and was asked to be low. It is known that a tech debt is being added. My idea for the future for this is to separate main model from translation. Everything gets serialized into a different translation model, which allows to not store duplicates of data that shouldn't be and have validation against canonical.

"""
Validate three invariants that translations must respect vs. the canonical (English) dataset:
1. roles on primary_contact and each stakeholder cannot change.
2. no field present in canonical may be removed (set to None) in a translation;
for list fields, lengths must also match exactly.
3. non-translatable fields (version, release_date, last_modified, study_status,
study_context, discovery, dac_id) must equal the canonical value exactly if
provided; omitting them is allowed.
"""
try:
dataset = Dataset.objects.get(identifier=str(translation.identifier))
except Dataset.DoesNotExist:
return

canonical = dataset.to_schema()
errors = {}

# Rule 1: roles immutable
c_roles = _roles_for(canonical.primary_contact)
t_roles = _roles_for(translation.primary_contact)
if c_roles != t_roles:
errors["primary_contact"] = ["Roles cannot change in a translation."]

c_stakeholders = canonical.stakeholders or []
t_stakeholders = translation.stakeholders or []
for i, (c_sh, t_sh) in enumerate(zip(c_stakeholders, t_stakeholders)):
if _roles_for(c_sh) != _roles_for(t_sh):
errors.setdefault("stakeholders", []).append(
f"Stakeholder at index {i}: roles cannot change in a translation."
)

# Rule 2: translations cannot remove or add data to any shared field
# (immutable fields are exempt — omitting them is always allowed)
shared_fields = canonical.model_fields.keys() & translation.model_fields.keys()
for field in shared_fields:
if field in _IMMUTABLE_FIELDS:
continue

c_val = getattr(canonical, field, None)
t_val = getattr(translation, field, None)

if c_val is None or (isinstance(c_val, list) and len(c_val) == 0):
continue # canonical has nothing here; translation free to omit too

if t_val is None:
errors[field] = [f"Translation cannot remove '{field}' (present in canonical)."]
elif isinstance(c_val, list):
t_len = len(t_val) if isinstance(t_val, list) else 0
if t_len != len(c_val):
errors[field] = [
f"Translation must have the same number of items in '{field}' as canonical "
f"(canonical has {len(c_val)}, translation has {t_len})."
]

# Rule 3: if an immutable field is present in the translation it must match canonical exactly
for field in _IMMUTABLE_FIELDS:
c_val = getattr(canonical, field, None)
t_val = getattr(translation, field, None)
if t_val is not None and t_val != c_val:
Comment thread
SanjeevLakhwani marked this conversation as resolved.
errors[field] = [
f"'{field}' cannot change in a translation "
f"(expected {c_val!r}, got {t_val!r})."
]

if errors:
raise serializers.ValidationError(errors)


class DatasetTranslationSerializer(PydanticJSONBSerializer):
schema_class = ProjectScopedDatasetModel

Expand All @@ -134,6 +213,28 @@ class Meta:
fields = "__all__"
read_only_fields = ['created_at', 'updated_at', 'dataset']

def _resolve_dataset_id(self) -> str | None:
if self.instance is not None:
return str(self.instance.dataset_id)
view = self.context.get("view")
if view is not None:
return view.kwargs.get("identifier")
return None

def to_internal_value(self, data):
if "discovery" in data:
raise serializers.ValidationError({"discovery": ["Translations cannot include a discovery configuration."]})
dataset_id = self._resolve_dataset_id()
if dataset_id is not None:
try:
dataset = Dataset.objects.get(identifier=dataset_id)
data = {**data, "identifier": dataset_id, "project": str(dataset.project_id)}
except Dataset.DoesNotExist:
pass # view handles 404
result = super().to_internal_value(data)
_check_translation_constraints(self._validated_schema)
return result

def to_representation(self, instance):
data = super().to_representation(instance)
data['created_at'] = instance.created_at
Expand Down
Loading