-
Notifications
You must be signed in to change notification settings - Fork 8
feat(chord): validate translation constraints on create/update #705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 10 commits
6f0a0bf
a3f9391
8e9e57c
396f224
32ecd85
7f9f2b5
a00bfac
8de4a1f
d7cb3a0
402fe68
72576ba
12f4f1b
75e3880
df31022
f57f90e
d874052
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,6 +89,7 @@ def to_representation(self, instance): | |
| data['created_at'] = instance.created_at | ||
| data['updated_at'] = instance.updated_at | ||
| data['counts_by_entity'] = self.get_counts_by_entity(instance) | ||
| data['translations'] = [t.language for t in instance.translations.all()] | ||
| return data | ||
|
|
||
| def get_counts_by_entity(self, obj): | ||
|
|
@@ -126,6 +127,84 @@ def update(self, instance, validated_data): | |
| return instance | ||
|
|
||
|
|
||
| def _roles_for(contact) -> list: | ||
| """Return roles list for a contact, defaulting to empty list.""" | ||
| return getattr(contact, "roles", []) or [] | ||
|
|
||
|
|
||
| _IMMUTABLE_FIELDS = frozenset({"version", "release_date", "last_modified", "study_status", "study_context", | ||
| "discovery", "pcgl_dac_id"}) | ||
|
|
||
|
|
||
| def _check_translation_constraints(translation: ProjectScopedDatasetModel): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is done in serialization, but I feel like it should be done in the actual model save method instead to prevent invalid translations from ever being created, not just in the API (e.g., in a hypothetical CLI.) this is my general preference for MVC apps, to have validation closer to the data. however, if there's a reason this is not as good, i'm fine with being convinced otherwise.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The model is just taking a JSON field, I don't see how close to the model we can get with that. Given that in the past we discussed about the effort to be done in implementing this and was asked to be low. It is known that a tech debt is being added. My idea for the future for this is to separate main model from translation. Everything gets serialized into a different translation model, which allows to not store duplicates of data that shouldn't be and have validation against canonical. |
||
| """ | ||
| Validate three invariants that translations must respect vs. the canonical (English) dataset: | ||
| 1. roles on primary_contact and each stakeholder cannot change. | ||
| 2. no field present in canonical may be removed (set to None) in a translation; | ||
| for list fields, lengths must also match exactly. | ||
| 3. non-translatable fields (version, release_date, last_modified, study_status, | ||
| study_context, discovery, dac_id) must equal the canonical value exactly if | ||
| provided; omitting them is allowed. | ||
| """ | ||
| try: | ||
| dataset = Dataset.objects.get(identifier=str(translation.identifier)) | ||
| except Dataset.DoesNotExist: | ||
| return | ||
|
|
||
| canonical = dataset.to_schema() | ||
| errors = {} | ||
|
|
||
| # Rule 1: roles immutable | ||
| c_roles = _roles_for(canonical.primary_contact) | ||
| t_roles = _roles_for(translation.primary_contact) | ||
| if c_roles != t_roles: | ||
| errors["primary_contact"] = ["Roles cannot change in a translation."] | ||
|
|
||
| c_stakeholders = canonical.stakeholders or [] | ||
| t_stakeholders = translation.stakeholders or [] | ||
| for i, (c_sh, t_sh) in enumerate(zip(c_stakeholders, t_stakeholders)): | ||
| if _roles_for(c_sh) != _roles_for(t_sh): | ||
| errors.setdefault("stakeholders", []).append( | ||
| f"Stakeholder at index {i}: roles cannot change in a translation." | ||
| ) | ||
|
|
||
| # Rule 2: translations cannot remove or add data to any shared field | ||
| # (immutable fields are exempt — omitting them is always allowed) | ||
| shared_fields = canonical.model_fields.keys() & translation.model_fields.keys() | ||
| for field in shared_fields: | ||
| if field in _IMMUTABLE_FIELDS: | ||
| continue | ||
|
|
||
| c_val = getattr(canonical, field, None) | ||
| t_val = getattr(translation, field, None) | ||
|
|
||
| if c_val is None or (isinstance(c_val, list) and len(c_val) == 0): | ||
| continue # canonical has nothing here; translation free to omit too | ||
|
|
||
| if t_val is None: | ||
| errors[field] = [f"Translation cannot remove '{field}' (present in canonical)."] | ||
| elif isinstance(c_val, list): | ||
| t_len = len(t_val) if isinstance(t_val, list) else 0 | ||
| if t_len != len(c_val): | ||
| errors[field] = [ | ||
| f"Translation must have the same number of items in '{field}' as canonical " | ||
| f"(canonical has {len(c_val)}, translation has {t_len})." | ||
| ] | ||
|
|
||
| # Rule 3: if an immutable field is present in the translation it must match canonical exactly | ||
| for field in _IMMUTABLE_FIELDS: | ||
| c_val = getattr(canonical, field, None) | ||
| t_val = getattr(translation, field, None) | ||
| if t_val is not None and t_val != c_val: | ||
|
SanjeevLakhwani marked this conversation as resolved.
|
||
| errors[field] = [ | ||
| f"'{field}' cannot change in a translation " | ||
| f"(expected {c_val!r}, got {t_val!r})." | ||
| ] | ||
|
|
||
| if errors: | ||
| raise serializers.ValidationError(errors) | ||
|
|
||
|
|
||
| class DatasetTranslationSerializer(PydanticJSONBSerializer): | ||
| schema_class = ProjectScopedDatasetModel | ||
|
|
||
|
|
@@ -134,6 +213,28 @@ class Meta: | |
| fields = "__all__" | ||
| read_only_fields = ['created_at', 'updated_at', 'dataset'] | ||
|
|
||
| def _resolve_dataset_id(self) -> str | None: | ||
| if self.instance is not None: | ||
| return str(self.instance.dataset_id) | ||
| view = self.context.get("view") | ||
| if view is not None: | ||
| return view.kwargs.get("identifier") | ||
| return None | ||
|
|
||
| def to_internal_value(self, data): | ||
| if "discovery" in data: | ||
| raise serializers.ValidationError({"discovery": ["Translations cannot include a discovery configuration."]}) | ||
| dataset_id = self._resolve_dataset_id() | ||
| if dataset_id is not None: | ||
| try: | ||
| dataset = Dataset.objects.get(identifier=dataset_id) | ||
| data = {**data, "identifier": dataset_id, "project": str(dataset.project_id)} | ||
| except Dataset.DoesNotExist: | ||
| pass # view handles 404 | ||
| result = super().to_internal_value(data) | ||
| _check_translation_constraints(self._validated_schema) | ||
| return result | ||
|
|
||
| def to_representation(self, instance): | ||
| data = super().to_representation(instance) | ||
| data['created_at'] = instance.created_at | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.