Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore published version emulation for validation purposes #1459

Merged
merged 1 commit into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion dandiapi/api/services/metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from celery.utils.log import get_task_logger
import dandischema.exceptions
from dandischema.metadata import aggregate_assets_summary, validate
from django.conf import settings
from django.db import transaction
from django.utils import timezone
import jsonschema.exceptions

from dandiapi.api.models import Asset, Version
from dandiapi.api.services.metadata.exceptions import AssetHasBeenPublished, VersionHasBeenPublished
from dandiapi.api.services.publish import _build_publishable_version_from_draft

logger = get_task_logger(__name__)

Expand Down Expand Up @@ -83,6 +85,28 @@ def version_aggregate_assets_summary(version: Version):


def validate_version_metadata(*, version: Version) -> None:
def _build_validatable_version_metadata(version: Version) -> dict:
# since Version.Status.VALID is a proxy for a version being publishable, we need to
# validate against the PublishedDandiset schema even though we lack several things
# at validation time: id, url, doi, and assetsSummary. this tricks the validator into
# giving us the useful errors we need but ignoring the other errors we can't satisfy yet.
publishable_version = _build_publishable_version_from_draft(version)
metadata_for_validation = publishable_version.metadata

metadata_for_validation[
'id'
] = f'DANDI:{publishable_version.dandiset.identifier}/{publishable_version.version}' # noqa
metadata_for_validation[
'url'
] = f'{settings.DANDI_WEB_APP_URL}/dandiset/{publishable_version.dandiset.identifier}/{publishable_version.version}' # noqa
metadata_for_validation['doi'] = '10.80507/dandi.123456/0.123456.1234'
metadata_for_validation['assetsSummary'] = {
'schemaKey': 'AssetsSummary',
'numberOfBytes': 1 if version.assets.filter(blob__size__gt=0).exists() else 0,
'numberOfFiles': 1 if version.assets.exists() else 0,
}
return metadata_for_validation

logger.info('Validating dandiset metadata for version %s', version.id)

# Published versions are immutable
Expand All @@ -101,7 +125,11 @@ def validate_version_metadata(*, version: Version) -> None:
version.save()

try:
validate(version.metadata, schema_key='Dandiset', json_validation=True)
validate(
_build_validatable_version_metadata(version),
schema_key='PublishedDandiset',
json_validation=True,
)
except dandischema.exceptions.ValidationError as e:
logger.info('Error while validating version %s', version.id)
version.status = Version.Status.INVALID
Expand Down
16 changes: 16 additions & 0 deletions dandiapi/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,22 @@ def test_validate_version_metadata_malformed_license(draft_version: Version, ass
]


@pytest.mark.django_db
def test_validate_version_metadata_no_assets(
draft_version: Version,
):
# Validate the metadata to mark version as `VALID`
tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()
assert draft_version.status == Version.Status.INVALID
assert draft_version.validation_errors == [
{
'field': 'assetsSummary',
'message': 'A Dandiset containing no files or zero bytes is not publishable',
}
]


@pytest.mark.django_db
def test_publish_task(
api_client: APIClient,
Expand Down