Skip to content

Commit

Permalink
Merge pull request #1459 from dandi/fix-metadata-validation
Browse files Browse the repository at this point in the history
  • Loading branch information
danlamanna authored Jan 31, 2023
2 parents d27785d + 6209e16 commit d22220c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
30 changes: 29 additions & 1 deletion dandiapi/api/services/metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from celery.utils.log import get_task_logger
import dandischema.exceptions
from dandischema.metadata import aggregate_assets_summary, validate
from django.conf import settings
from django.db import transaction
from django.utils import timezone
import jsonschema.exceptions

from dandiapi.api.models import Asset, Version
from dandiapi.api.services.metadata.exceptions import AssetHasBeenPublished, VersionHasBeenPublished
from dandiapi.api.services.publish import _build_publishable_version_from_draft

logger = get_task_logger(__name__)

Expand Down Expand Up @@ -83,6 +85,28 @@ def version_aggregate_assets_summary(version: Version):


def validate_version_metadata(*, version: Version) -> None:
def _build_validatable_version_metadata(version: Version) -> dict:
# since Version.Status.VALID is a proxy for a version being publishable, we need to
# validate against the PublishedDandiset schema even though we lack several things
# at validation time: id, url, doi, and assetsSummary. this tricks the validator into
# giving us the useful errors we need but ignoring the other errors we can't satisfy yet.
publishable_version = _build_publishable_version_from_draft(version)
metadata_for_validation = publishable_version.metadata

metadata_for_validation[
'id'
] = f'DANDI:{publishable_version.dandiset.identifier}/{publishable_version.version}' # noqa
metadata_for_validation[
'url'
] = f'{settings.DANDI_WEB_APP_URL}/dandiset/{publishable_version.dandiset.identifier}/{publishable_version.version}' # noqa
metadata_for_validation['doi'] = '10.80507/dandi.123456/0.123456.1234'
metadata_for_validation['assetsSummary'] = {
'schemaKey': 'AssetsSummary',
'numberOfBytes': 1 if version.assets.filter(blob__size__gt=0).exists() else 0,
'numberOfFiles': 1 if version.assets.exists() else 0,
}
return metadata_for_validation

logger.info('Validating dandiset metadata for version %s', version.id)

# Published versions are immutable
Expand All @@ -101,7 +125,11 @@ def validate_version_metadata(*, version: Version) -> None:
version.save()

try:
validate(version.metadata, schema_key='Dandiset', json_validation=True)
validate(
_build_validatable_version_metadata(version),
schema_key='PublishedDandiset',
json_validation=True,
)
except dandischema.exceptions.ValidationError as e:
logger.info('Error while validating version %s', version.id)
version.status = Version.Status.INVALID
Expand Down
16 changes: 16 additions & 0 deletions dandiapi/api/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,22 @@ def test_validate_version_metadata_malformed_license(draft_version: Version, ass
]


@pytest.mark.django_db
def test_validate_version_metadata_no_assets(
draft_version: Version,
):
# Validate the metadata to mark version as `VALID`
tasks.validate_version_metadata_task(draft_version.id)
draft_version.refresh_from_db()
assert draft_version.status == Version.Status.INVALID
assert draft_version.validation_errors == [
{
'field': 'assetsSummary',
'message': 'A Dandiset containing no files or zero bytes is not publishable',
}
]


@pytest.mark.django_db
def test_publish_task(
api_client: APIClient,
Expand Down

0 comments on commit d22220c

Please sign in to comment.