diff --git a/changelog.d/20240930_171153_mzhiltso_validation_management_core.md b/changelog.d/20240930_171153_mzhiltso_validation_management_core.md new file mode 100644 index 000000000000..73b4011bc459 --- /dev/null +++ b/changelog.d/20240930_171153_mzhiltso_validation_management_core.md @@ -0,0 +1,14 @@ +### Added + +- \[Server API\] An option to change real frames for honeypot frames in tasks with honeypots + () +- \[Server API\] New endpoints for validation configuration management in tasks and jobs + `/api/tasks/{id}/validation_layout`, `/api/jobs/{id}/validation_layout` + () + +### Changed +- \[Server API\] Now chunks in tasks can be changed. + There are new API elements to check chunk relevancy, if they are cached: + `/api/tasks/{id}/data/meta` got a new field `chunks_updated_date`, + `/api/tasks/{id}/data/?type=chunk` got 2 new headers: `X-Updated-Date`, `X-Checksum` + () diff --git a/cvat-core/src/api.ts b/cvat-core/src/api.ts index fe3975217ce4..5f624ad0e8ae 100644 --- a/cvat-core/src/api.ts +++ b/cvat-core/src/api.ts @@ -26,6 +26,7 @@ import QualityReport from './quality-report'; import QualityConflict from './quality-conflict'; import QualitySettings from './quality-settings'; import AnalyticsReport from './analytics-report'; +import ValidationLayout from './validation-layout'; import { Request } from './request'; import * as enums from './enums'; @@ -426,6 +427,7 @@ function build(): CVATCore { QualityReport, Request, FramesMetaData, + ValidationLayout, }, utils: { mask2Rle, diff --git a/cvat-core/src/frames.ts b/cvat-core/src/frames.ts index dda847cf7a72..88c0097929cf 100644 --- a/cvat-core/src/frames.ts +++ b/cvat-core/src/frames.ts @@ -16,6 +16,7 @@ import { FieldUpdateTrigger } from './common'; // frame storage by job id const frameDataCache: Record frameStep, }, + chunksUpdatedDate: { + get: () => data.chunks_updated_date, + }, }), ); @@ -592,6 +598,37 @@ function getFrameMeta(jobID, frame): SerializedFramesMetaData['frames'][0] { return frameMeta; } +async function refreshJobCacheIfOutdated(jobID: number): Promise { + const cached = frameDataCache[jobID]; + if (!cached) { + throw new Error('Frame data cache is abscent'); + } + + const META_DATA_RELOAD_PERIOD = 1 * 60 * 60 * 1000; // 1 hour + const isOutdated = (Date.now() - cached.metaFetchedTimestamp) > META_DATA_RELOAD_PERIOD; + + if (isOutdated) { + // get metadata again if outdated + const meta = await getFramesMeta('job', jobID, true); + if (new Date(meta.chunksUpdatedDate) > new Date(cached.meta.chunksUpdatedDate)) { + // chunks were re-defined. Existing data not relevant anymore + // currently we only re-write meta, remove all cached frames from provider and clear cached context images + // other parameters (e.g. chunkSize) are not supposed to be changed + cached.meta = meta; + cached.provider.cleanup(Number.MAX_SAFE_INTEGER); + for (const frame of Object.keys(cached.contextCache)) { + for (const image of Object.values(cached.contextCache[+frame].data)) { + // close images to immediate memory release + image.close(); + } + } + cached.contextCache = {}; + } + + cached.metaFetchedTimestamp = Date.now(); + } +} + export function getContextImage(jobID: number, frame: number): Promise> { return new Promise>((resolve, reject) => { if (!(jobID in frameDataCache)) { @@ -599,6 +636,7 @@ export function getContextImage(jobID: number, frame: number): Promise Promise, ): Promise { - if (!(jobID in frameDataCache)) { + const dataCacheExists = jobID in frameDataCache; + + if (!dataCacheExists) { const blockType = chunkType === 'video' ? BlockType.MP4VIDEO : BlockType.ARCHIVE; const meta = await getFramesMeta('job', jobID); @@ -718,6 +758,7 @@ export async function getFrame( frameDataCache[jobID] = { meta, + metaFetchedTimestamp: Date.now(), chunkSize, mode, startFrame, @@ -743,6 +784,22 @@ export async function getFrame( }; } + // basically the following functions may be affected if job cache is outdated + // - getFrame + // - getContextImage + // - getCachedChunks + // And from this idea we should call refreshJobCacheIfOutdated from each one + // Hovewer, following from the order, these methods are usually called + // it may lead to even more confusing behaviour + // + // Usually user first receives frame, then user receives ranges and finally user receives context images + // In this case (extremely rare, but nevertheless possible) user may get context images related to another frame + // - if cache gets outdated after getFrame() call + // - and before getContextImage() call + // - and both calls refer to the same frame that is refreshed honeypot frame and this frame has context images + // Thus, it is better to only call `refreshJobCacheIfOutdated` from getFrame() + await refreshJobCacheIfOutdated(jobID); + const frameMeta = getFrameMeta(jobID, frame); frameDataCache[jobID].provider.setRenderSize(frameMeta.width, frameMeta.height); frameDataCache[jobID].decodeForward = isPlaying; @@ -759,7 +816,7 @@ export async function getFrame( }); } -export async function getDeletedFrames(instanceType: 'job' | 'task', id): Promise> { +export async function getDeletedFrames(instanceType: 'job' | 'task', id: number): Promise> { if (instanceType === 'job') { const { meta } = frameDataCache[id]; return meta.deletedFrames; diff --git a/cvat-core/src/index.ts b/cvat-core/src/index.ts index 4c68e2b23582..f361f194df73 100644 --- a/cvat-core/src/index.ts +++ b/cvat-core/src/index.ts @@ -32,6 +32,7 @@ import QualityConflict from './quality-conflict'; import QualitySettings from './quality-settings'; import AnalyticsReport from './analytics-report'; import AnnotationGuide from './guide'; +import ValidationLayout from './validation-layout'; import { Request } from './request'; import BaseSingleFrameAction, { listActions, registerAction, runActions } from './annotations-actions'; import { @@ -215,6 +216,7 @@ export default interface CVATCore { AnalyticsReport: typeof AnalyticsReport; Request: typeof Request; FramesMetaData: typeof FramesMetaData; + ValidationLayout: typeof ValidationLayout; }; utils: { mask2Rle: typeof mask2Rle; diff --git a/cvat-core/src/server-proxy.ts b/cvat-core/src/server-proxy.ts index 9e280ecf481a..7e8819808649 100644 --- a/cvat-core/src/server-proxy.ts +++ b/cvat-core/src/server-proxy.ts @@ -19,7 +19,7 @@ import { SerializedInvitationData, SerializedCloudStorage, SerializedFramesMetaData, SerializedCollection, SerializedQualitySettingsData, APIQualitySettingsFilter, SerializedQualityConflictData, APIQualityConflictsFilter, SerializedQualityReportData, APIQualityReportsFilter, SerializedAnalyticsReport, APIAnalyticsReportFilter, - SerializedRequest, + SerializedRequest, SerializedValidationLayout, } from './server-response-types'; import { PaginatedResource } from './core-types'; import { Request } from './request'; @@ -1382,6 +1382,24 @@ async function deleteJob(jobID: number): Promise { } } +const validationLayout = (instance: 'tasks' | 'jobs') => async ( + id: number, +): Promise => { + const { backendAPI } = config; + + try { + const response = await Axios.get(`${backendAPI}/${instance}/${id}/validation_layout`, { + params: { + ...enableOrganization(), + }, + }); + + return response.data; + } catch (errorData) { + throw generateError(errorData); + } +}; + async function getUsers(filter = { page_size: 'all' }): Promise { const { backendAPI } = config; @@ -2376,6 +2394,7 @@ export default Object.freeze({ getPreview: getPreview('tasks'), backup: backupTask, restore: restoreTask, + validationLayout: validationLayout('tasks'), }), labels: Object.freeze({ @@ -2391,6 +2410,7 @@ export default Object.freeze({ create: createJob, delete: deleteJob, exportDataset: exportDataset('jobs'), + validationLayout: validationLayout('jobs'), }), users: Object.freeze({ diff --git a/cvat-core/src/server-response-types.ts b/cvat-core/src/server-response-types.ts index 5ec9e21d0b7d..5dd8cc3d54d2 100644 --- a/cvat-core/src/server-response-types.ts +++ b/cvat-core/src/server-response-types.ts @@ -459,6 +459,7 @@ export interface SerializedFramesMetaData { deleted_frames: number[]; included_frames: number[]; frame_filter: string; + chunks_updated_date: string; frames: { width: number; height: number; @@ -522,3 +523,9 @@ export interface SerializedRequest { expiry_date?: string; owner?: any; } + +export interface SerializedValidationLayout { + honeypot_count?: number; + honeypot_frames?: number[]; + honeypot_real_frames?: number[]; +} diff --git a/cvat-core/src/session-implementation.ts b/cvat-core/src/session-implementation.ts index e317bcdfb7cd..7901d99a99f1 100644 --- a/cvat-core/src/session-implementation.ts +++ b/cvat-core/src/session-implementation.ts @@ -27,7 +27,7 @@ import { decodePreview, } from './frames'; import Issue from './issue'; -import { SerializedLabel, SerializedTask } from './server-response-types'; +import { SerializedLabel, SerializedTask, SerializedValidationLayout } from './server-response-types'; import { checkInEnum, checkObjectType } from './common'; import { getCollection, getSaver, clearAnnotations, getAnnotations, @@ -37,6 +37,7 @@ import AnnotationGuide from './guide'; import requestsManager from './requests-manager'; import { Request } from './request'; import User from './user'; +import ValidationLayout from './validation-layout'; // must be called with task/job context async function deleteFrameWrapper(jobID, frame): Promise { @@ -164,6 +165,19 @@ export function implementJob(Job: typeof JobClass): typeof JobClass { }, }); + Object.defineProperty(Job.prototype.validationLayout, 'implementation', { + value: async function validationLayoutImplementation( + this: JobClass, + ): ReturnType { + const result = await serverProxy.jobs.validationLayout(this.id); + if (Object.keys(result).length) { + return new ValidationLayout(result as Required); + } + + return null; + }, + }); + Object.defineProperty(Job.prototype.frames.get, 'implementation', { value: function getFrameImplementation( this: JobClass, @@ -624,6 +638,19 @@ export function implementTask(Task: typeof TaskClass): typeof TaskClass { }, }); + Object.defineProperty(Task.prototype.validationLayout, 'implementation', { + value: async function validationLayoutImplementation( + this: TaskClass, + ): ReturnType { + const result = await serverProxy.tasks.validationLayout(this.id); + if (Object.keys(result).length) { + return new ValidationLayout(result as Required); + } + + return null; + }, + }); + Object.defineProperty(Task.prototype.save, 'implementation', { value: async function saveImplementation( this: TaskClass, diff --git a/cvat-core/src/session.ts b/cvat-core/src/session.ts index cef5af49431e..cf82aa9a050c 100644 --- a/cvat-core/src/session.ts +++ b/cvat-core/src/session.ts @@ -28,6 +28,7 @@ import { Request } from './request'; import logger from './logger'; import Issue from './issue'; import ObjectState from './object-state'; +import ValidationLayout from './validation-layout'; function buildDuplicatedAPI(prototype) { Object.defineProperties(prototype, { @@ -685,6 +686,11 @@ export class Job extends Session { return result; } + async validationLayout(): Promise { + const result = await PluginRegistry.apiWrapper.call(this, Job.prototype.validationLayout); + return result; + } + async openIssue(issue: Issue, message: string): Promise { const result = await PluginRegistry.apiWrapper.call(this, Job.prototype.openIssue, issue, message); return result; @@ -1179,6 +1185,11 @@ export class Task extends Session { const result = await PluginRegistry.apiWrapper.call(this, Task.prototype.guide); return result; } + + async validationLayout(): Promise { + const result = await PluginRegistry.apiWrapper.call(this, Task.prototype.validationLayout); + return result; + } } buildDuplicatedAPI(Job.prototype); diff --git a/cvat-core/src/validation-layout.ts b/cvat-core/src/validation-layout.ts new file mode 100644 index 000000000000..ba5a94aa03a9 --- /dev/null +++ b/cvat-core/src/validation-layout.ts @@ -0,0 +1,44 @@ +// Copyright (C) 2024 CVAT.ai Corporation +// +// SPDX-License-Identifier: MIT + +import { SerializedValidationLayout } from 'server-response-types'; +import PluginRegistry from './plugins'; + +export default class ValidationLayout { + #honeypotFrames: number[]; + #honeypotRealFrames: number[]; + + public constructor(data: Required) { + this.#honeypotFrames = [...data.honeypot_frames]; + this.#honeypotRealFrames = [...data.honeypot_real_frames]; + } + + public get honeypotFrames() { + return [...this.#honeypotFrames]; + } + + public get honeypotRealFrames() { + return [...this.#honeypotRealFrames]; + } + + async getRealFrame(frame: number): Promise { + const result = await PluginRegistry.apiWrapper.call(this, ValidationLayout.prototype.getRealFrame, frame); + return result; + } +} + +Object.defineProperties(ValidationLayout.prototype.getRealFrame, { + implementation: { + writable: false, + enumerable: false, + value: function implementation(this: ValidationLayout, frame: number): number | null { + const index = this.honeypotFrames.indexOf(frame); + if (index !== -1) { + return this.honeypotRealFrames[index]; + } + + return null; + }, + }, +}); diff --git a/cvat-ui/src/actions/annotation-actions.ts b/cvat-ui/src/actions/annotation-actions.ts index e9af61adb7f9..42f52f0aed59 100644 --- a/cvat-ui/src/actions/annotation-actions.ts +++ b/cvat-ui/src/actions/annotation-actions.ts @@ -11,8 +11,8 @@ import { RectDrawingMethod, CuboidDrawingMethod, Canvas, CanvasMode as Canvas2DMode, } from 'cvat-canvas-wrapper'; import { - getCore, MLModel, JobType, Job, - QualityConflict, ObjectState, JobState, + getCore, MLModel, JobType, Job, QualityConflict, + ObjectState, JobState, ValidationLayout, } from 'cvat-core-wrapper'; import logger, { EventScope } from 'cvat-logger'; import { getCVATStore } from 'cvat-store'; @@ -38,6 +38,7 @@ interface AnnotationsParameters { showGroundTruth: boolean; jobInstance: Job; groundTruthInstance: Job | null; + validationLayout: ValidationLayout | null; } const cvat = getCore(); @@ -58,7 +59,7 @@ export function receiveAnnotationsParameters(): AnnotationsParameters { player: { frame: { number: frame }, }, - job: { instance: jobInstance, groundTruthInstance }, + job: { instance: jobInstance, groundTruthInfo: { groundTruthInstance, validationLayout } }, }, settings: { workspace: { showAllInterpolationTracks }, @@ -71,6 +72,7 @@ export function receiveAnnotationsParameters(): AnnotationsParameters { frame, jobInstance: jobInstance as Job, groundTruthInstance, + validationLayout, showAllInterpolationTracks, showGroundTruth, }; @@ -261,8 +263,8 @@ async function fetchAnnotations(predefinedFrame?: number): Promise<{ maxZ: number; }> { const { - filters, frame, showAllInterpolationTracks, - jobInstance, showGroundTruth, groundTruthInstance, + filters, frame, showAllInterpolationTracks, jobInstance, + showGroundTruth, groundTruthInstance, validationLayout, } = receiveAnnotationsParameters(); const fetchFrame = typeof predefinedFrame === 'undefined' ? frame : predefinedFrame; @@ -272,8 +274,16 @@ async function fetchAnnotations(predefinedFrame?: number): Promise<{ if (jobInstance.type === JobType.GROUND_TRUTH) { states = wrapAnnotationsInGTJob(states); } else if (showGroundTruth && groundTruthInstance) { - const gtStates = await groundTruthInstance.annotations.get(fetchFrame, showAllInterpolationTracks, filters); - states.push(...gtStates); + let gtFrame: number | null = fetchFrame; + + if (validationLayout) { + gtFrame = await validationLayout.getRealFrame(gtFrame); + } + + if (gtFrame !== null) { + const gtStates = await groundTruthInstance.annotations.get(gtFrame, showAllInterpolationTracks, filters); + states.push(...gtStates); + } } const history = await jobInstance.actions.get(); @@ -364,7 +374,7 @@ export function removeAnnotationsAsync( dispatch(fetchAnnotationsAsync()); const state = getState(); - if (!state.annotation.job.groundTruthInstance) { + if (!state.annotation.job.groundTruthInfo.groundTruthInstance) { getCore().config.globalObjectsCounter = 0; } @@ -926,9 +936,11 @@ export function getJobAsync({ const colors = [...cvat.enums.colors]; let groundTruthJobFramesMeta = null; + let validationLayout = null; if (gtJob) { await gtJob.annotations.clear({ reload: true }); // fetch gt annotations from the server groundTruthJobFramesMeta = await cvat.frames.getMeta('job', gtJob.id); + validationLayout = await job.validationLayout(); } let conflicts: QualityConflict[] = []; @@ -950,6 +962,7 @@ export function getJobAsync({ queryParameters, groundTruthInstance: gtJob || null, groundTruthJobFramesMeta, + validationLayout, issues, conflicts, frameNumber, diff --git a/cvat-ui/src/cvat-core-wrapper.ts b/cvat-ui/src/cvat-core-wrapper.ts index 295135e23e15..dac86011953a 100644 --- a/cvat-ui/src/cvat-core-wrapper.ts +++ b/cvat-ui/src/cvat-core-wrapper.ts @@ -35,6 +35,7 @@ import Comment from 'cvat-core/src/comment'; import User from 'cvat-core/src/user'; import Organization, { Membership, Invitation } from 'cvat-core/src/organization'; import AnnotationGuide from 'cvat-core/src/guide'; +import ValidationLayout from 'cvat-core/src/validation-layout'; import AnalyticsReport, { AnalyticsEntryViewType, AnalyticsEntry } from 'cvat-core/src/analytics-report'; import { Dumper } from 'cvat-core/src/annotation-formats'; import { Event } from 'cvat-core/src/event'; @@ -105,6 +106,7 @@ export { ActionParameterType, FrameSelectionType, Request, + ValidationLayout, }; export type { diff --git a/cvat-ui/src/reducers/annotation-reducer.ts b/cvat-ui/src/reducers/annotation-reducer.ts index 54d94ee7ce82..cb53de5a63f0 100644 --- a/cvat-ui/src/reducers/annotation-reducer.ts +++ b/cvat-ui/src/reducers/annotation-reducer.ts @@ -56,13 +56,16 @@ const defaultState: AnnotationState = { openTime: null, labels: [], requestedId: null, - groundTruthJobFramesMeta: null, - groundTruthInstance: null, queryParameters: { initialOpenGuide: false, defaultLabel: null, defaultPointsCount: null, }, + groundTruthInfo: { + validationLayout: null, + groundTruthJobFramesMeta: null, + groundTruthInstance: null, + }, instance: null, attributes: {}, fetching: false, @@ -165,6 +168,7 @@ export default (state = defaultState, action: AnyAction): AnnotationState => { queryParameters, groundTruthInstance, groundTruthJobFramesMeta, + validationLayout, } = action.payload; const defaultLabel = job.labels.length ? job.labels[0] : null; @@ -207,8 +211,11 @@ export default (state = defaultState, action: AnyAction): AnnotationState => { acc[label.id] = label.attributes; return acc; }, {}), - groundTruthInstance, - groundTruthJobFramesMeta, + groundTruthInfo: { + validationLayout, + groundTruthInstance, + groundTruthJobFramesMeta, + }, queryParameters: { initialOpenGuide: queryParameters.initialOpenGuide, defaultLabel: queryParameters.defaultLabel, diff --git a/cvat-ui/src/reducers/index.ts b/cvat-ui/src/reducers/index.ts index 6da90eb551cd..85c471ef70e6 100644 --- a/cvat-ui/src/reducers/index.ts +++ b/cvat-ui/src/reducers/index.ts @@ -8,7 +8,7 @@ import { Canvas, RectDrawingMethod, CuboidDrawingMethod } from 'cvat-canvas-wrap import { Webhook, MLModel, Organization, Job, Task, Project, Label, User, QualityConflict, FramesMetaData, RQStatus, Event, Invitation, SerializedAPISchema, - Request, TargetMetric, + Request, TargetMetric, ValidationLayout, } from 'cvat-core-wrapper'; import { IntelligentScissors } from 'utils/opencv-wrapper/intelligent-scissors'; import { KeyMap, KeyMapItem } from 'utils/mousetrap-react'; @@ -728,8 +728,11 @@ export interface AnnotationState { defaultLabel: string | null; defaultPointsCount: number | null; }; - groundTruthJobFramesMeta: FramesMetaData | null; - groundTruthInstance: Job | null; + groundTruthInfo: { + validationLayout: ValidationLayout | null; + groundTruthJobFramesMeta: FramesMetaData | null; + groundTruthInstance: Job | null; + }, attributes: Record; fetching: boolean; saving: boolean; diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 06bf1a2dac5e..b2a3437ddf3f 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -252,9 +252,7 @@ def remove_segment_chunk( self, db_segment: models.Segment, chunk_number: str, *, quality: str ) -> None: self._delete_cache_item( - self._make_segment_chunk_key( - db_segment=db_segment, chunk_number=chunk_number, quality=quality - ) + self._make_chunk_key(db_segment, chunk_number=chunk_number, quality=quality) ) def get_cloud_preview(self, db_storage: models.CloudStorage) -> Optional[DataWithMime]: @@ -473,22 +471,35 @@ def prepare_custom_masked_range_segment_chunk( # Optimize frame access if all the required frames are already cached # Otherwise we might need to download files. - # This is not needed for video tasks, as it will reduce performance - from cvat.apps.engine.frame_provider import FrameOutputType, TaskFrameProvider + # This is not needed for video tasks, as it will reduce performance, + # because of reading multiple files (chunks) + from cvat.apps.engine.frame_provider import FrameOutputType, make_frame_provider - task_frame_provider = TaskFrameProvider(db_task) + task_frame_provider = make_frame_provider(db_task) use_cached_data = False if db_task.mode != "interpolation": required_frame_set = set(frame_ids) - available_chunks = [ - self._has_key(self._make_chunk_key(db_segment, chunk_number, quality=quality)) - for db_segment in db_task.segment_set.filter(type=models.SegmentType.RANGE).all() - for chunk_number, _ in groupby( + available_chunks = [] + for db_segment in db_task.segment_set.filter(type=models.SegmentType.RANGE).all(): + segment_frame_provider = make_frame_provider(db_segment) + + for i, chunk_frames in groupby( sorted(required_frame_set.intersection(db_segment.frame_set)), - key=lambda frame: frame // db_data.chunk_size, - ) - ] + key=lambda abs_frame: ( + segment_frame_provider.validate_frame_number( + task_frame_provider.get_rel_frame_number(abs_frame) + )[1] + ), + ): + if not list(chunk_frames): + continue + + chunk_available = self._has_key( + self._make_chunk_key(db_segment, i, quality=quality) + ) + available_chunks.append(chunk_available) + use_cached_data = bool(available_chunks) and all(available_chunks) if hasattr(db_data, "video"): diff --git a/cvat/apps/engine/field_validation.py b/cvat/apps/engine/field_validation.py index bac48af4301b..bbfa58b5f3ea 100644 --- a/cvat/apps/engine/field_validation.py +++ b/cvat/apps/engine/field_validation.py @@ -20,19 +20,26 @@ def require_one_of_fields(data: dict[str, Any], keys: Sequence[str]) -> None: raise serializers.ValidationError(f"Only 1 of the fields {options} can be used") -def require_field(data: dict[str, Any], key: Sequence[str]) -> None: +def require_field(data: dict[str, Any], key: str) -> None: if key not in data: raise serializers.ValidationError(f'The "{key}" field is required') def require_one_of_values(data: dict[str, Any], key: str, values: Sequence[Any]) -> None: - if data[key] not in values: - raise serializers.ValidationError( - '"{}" must be one of {}'.format(key, ", ".join(f"{k}" for k in values)) - ) + assert values + if data.get(key) not in values: + if len(values) == 1: + raise serializers.ValidationError( + 'The "{}" field must be {}'.format(key, ", ".join(f"{k}" for k in values)) + ) + else: + raise serializers.ValidationError( + 'The "{}" field must be one of {}'.format(key, ", ".join(f"{k}" for k in values)) + ) -def validate_percent(value: float) -> float: + +def validate_share(value: float) -> float: if not 0 <= value <= 1: raise serializers.ValidationError("Value must be in the range [0; 1]") diff --git a/cvat/apps/engine/frame_provider.py b/cvat/apps/engine/frame_provider.py index b9c953626613..f397f0d568b1 100644 --- a/cvat/apps/engine/frame_provider.py +++ b/cvat/apps/engine/frame_provider.py @@ -9,6 +9,7 @@ import itertools import math from abc import ABCMeta, abstractmethod +from bisect import bisect from dataclasses import dataclass from enum import Enum, auto from io import BytesIO @@ -484,20 +485,28 @@ def unload(self): def __len__(self): return self._db_segment.frame_count - def validate_frame_number(self, frame_number: int) -> Tuple[int, int, int]: - frame_sequence = sorted(self._db_segment.frame_set) + def get_frame_index(self, frame_number: int) -> Optional[int]: + segment_frames = sorted(self._db_segment.frame_set) abs_frame_number = self._get_abs_frame_number(self._db_segment.task.data, frame_number) - if abs_frame_number not in frame_sequence: + frame_index = bisect(segment_frames, abs_frame_number) - 1 + if not ( + 0 <= frame_index < len(segment_frames) + and segment_frames[frame_index] == abs_frame_number + ): + return None + + return frame_index + + def validate_frame_number(self, frame_number: int) -> Tuple[int, int, int]: + frame_index = self.get_frame_index(frame_number) + if frame_index is None: raise ValidationError(f"Incorrect requested frame number: {frame_number}") - # TODO: maybe optimize search - chunk_number, frame_position = divmod( - frame_sequence.index(abs_frame_number), self._db_segment.task.data.chunk_size - ) + chunk_number, frame_position = divmod(frame_index, self._db_segment.task.data.chunk_size) return frame_number, chunk_number, frame_position def get_chunk_number(self, frame_number: int) -> int: - return int(frame_number) // self._db_segment.task.data.chunk_size + return self.get_frame_index(frame_number) // self._db_segment.task.data.chunk_size def find_matching_chunk(self, frames: Sequence[int]) -> Optional[int]: return next( diff --git a/cvat/apps/engine/migrations/0085_segment_chunks_updated_date.py b/cvat/apps/engine/migrations/0085_segment_chunks_updated_date.py new file mode 100644 index 000000000000..52342d7db774 --- /dev/null +++ b/cvat/apps/engine/migrations/0085_segment_chunks_updated_date.py @@ -0,0 +1,58 @@ +# Generated by Django 4.2.15 on 2024-09-25 13:52 + +from datetime import datetime +from django.db import migrations, models + + +def _get_0083_move_to_segment_chunks_migration_date(apps, schema_editor) -> datetime: + with schema_editor.connection.cursor() as cursor: + cursor.execute("""\ + SELECT applied + FROM django_migrations + WHERE app = %s AND name = %s + """, ['engine', '0083_move_to_segment_chunks']) + return cursor.fetchone()[0] + + +def init_chunks_updated_date(apps, schema_editor): + # The 0083 migration changed data distribution by chunks + migration_0083_date = _get_0083_move_to_segment_chunks_migration_date(apps, schema_editor) + + Segment = apps.get_model("engine", "Segment") + task_created_date_subquery = models.Subquery( + Segment.objects + .select_related("task") + .filter(pk=models.OuterRef("pk")) + .values('task__created_date')[:1] + ) + + Segment.objects.update( + chunks_updated_date=models.functions.Greatest( + task_created_date_subquery, migration_0083_date, + ) + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("engine", "0084_honeypot_support"), + ] + + operations = [ + migrations.AddField( + model_name="segment", + name="chunks_updated_date", + field=models.DateTimeField(default=None, null=True), + preserve_default=False, + ), + migrations.RunPython( + init_chunks_updated_date, + reverse_code=migrations.RunPython.noop, + ), + migrations.AlterField( + model_name="segment", + name="chunks_updated_date", + field=models.DateTimeField(null=False, auto_now_add=True), + ), + ] diff --git a/cvat/apps/engine/models.py b/cvat/apps/engine/models.py index 6e03def0fad6..647a0ada552a 100644 --- a/cvat/apps/engine/models.py +++ b/cvat/apps/engine/models.py @@ -5,6 +5,7 @@ from __future__ import annotations +import datetime import os import re import shutil @@ -610,6 +611,11 @@ def delete(self, using=None, keep_parents=False): clear_annotations_in_jobs(job_ids) super().delete(using, keep_parents) + def get_chunks_updated_date(self) -> datetime.datetime: + return self.segment_set.aggregate( + chunks_updated_date=models.Max('chunks_updated_date') + )['chunks_updated_date'] + # Redefined a couple of operation for FileSystemStorage to avoid renaming # or other side effects. class MyFileSystemStorage(FileSystemStorage): @@ -698,9 +704,9 @@ class Segment(models.Model): task = models.ForeignKey(Task, on_delete=models.CASCADE) # TODO: add related name start_frame = models.IntegerField() stop_frame = models.IntegerField() + chunks_updated_date = models.DateTimeField(null=False, auto_now_add=True) type = models.CharField(choices=SegmentType.choices(), default=SegmentType.RANGE, max_length=32) - # TODO: try to reuse this field for custom task segments (aka job_file_mapping) # SegmentType.SPECIFIC_FRAMES fields frames = IntArrayField(store_sorted=True, unique_values=True, default='', blank=True) diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index cd4cb671d95e..d01036fc9004 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -387,6 +387,8 @@ class Scopes(StrEnum): UPLOAD_DATA = 'upload:data' IMPORT_BACKUP = 'import:backup' EXPORT_BACKUP = 'export:backup' + VIEW_VALIDATION_LAYOUT = 'view:validation_layout' + UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' @classmethod def create(cls, request, view, obj, iam_context): @@ -496,6 +498,8 @@ def get_scopes(request, view, obj) -> List[Scopes]: ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, ('export_backup_v2', 'POST'): Scopes.EXPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, + ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, + ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, }[(view.action, request.method)] scopes = [] @@ -616,6 +620,8 @@ class Scopes(StrEnum): VIEW_DATA = 'view:data' VIEW_METADATA = 'view:metadata' UPDATE_METADATA = 'update:metadata' + VIEW_VALIDATION_LAYOUT = 'view:validation_layout' + UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' @classmethod def create(cls, request, view, obj, iam_context): @@ -711,6 +717,8 @@ def get_scopes(request, view, obj): ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('preview', 'GET'): Scopes.VIEW, + ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, + ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, }[(view.action, request.method)] scopes = [] diff --git a/cvat/apps/engine/rules/jobs.rego b/cvat/apps/engine/rules/jobs.rego index 22b91a3a1050..8068f7d6fdf9 100644 --- a/cvat/apps/engine/rules/jobs.rego +++ b/cvat/apps/engine/rules/jobs.rego @@ -253,3 +253,23 @@ allow if { organizations.has_perm(organizations.WORKER) is_task_staff } + +allow if { + input.scope in {utils.VIEW_VALIDATION_LAYOUT, utils.UPDATE_VALIDATION_LAYOUT} + utils.is_sandbox + is_task_staff +} + +allow if { + input.scope in {utils.VIEW_VALIDATION_LAYOUT, utils.UPDATE_VALIDATION_LAYOUT} + input.auth.organization.id == input.resource.organization.id + organizations.has_perm(organizations.WORKER) + is_task_staff +} + +allow if { + input.scope in {utils.VIEW_VALIDATION_LAYOUT, utils.UPDATE_VALIDATION_LAYOUT} + input.auth.organization.id == input.resource.organization.id + organizations.has_perm(organizations.MAINTAINER) + utils.has_perm(utils.USER) +} diff --git a/cvat/apps/engine/rules/tasks.rego b/cvat/apps/engine/rules/tasks.rego index 7f7d592bdb01..99d126d2b443 100644 --- a/cvat/apps/engine/rules/tasks.rego +++ b/cvat/apps/engine/rules/tasks.rego @@ -183,7 +183,8 @@ filter := [] if { # Django Q object to filter list of entries allow if { input.scope in { utils.VIEW, utils.VIEW_ANNOTATIONS, utils.EXPORT_DATASET, utils.VIEW_METADATA, - utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP + utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP, + utils.VIEW_VALIDATION_LAYOUT } utils.is_sandbox is_task_staff @@ -192,7 +193,8 @@ allow if { allow if { input.scope in { utils.VIEW, utils.VIEW_ANNOTATIONS, utils.EXPORT_DATASET, utils.VIEW_METADATA, - utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP + utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP, + utils.VIEW_VALIDATION_LAYOUT } input.auth.organization.id == input.resource.organization.id utils.has_perm(utils.USER) @@ -202,7 +204,8 @@ allow if { allow if { input.scope in { utils.VIEW, utils.VIEW_ANNOTATIONS, utils.EXPORT_DATASET, utils.VIEW_METADATA, - utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP + utils.VIEW_DATA, utils.EXPORT_ANNOTATIONS, utils.EXPORT_BACKUP, + utils.VIEW_VALIDATION_LAYOUT } input.auth.organization.id == input.resource.organization.id organizations.has_perm(organizations.WORKER) @@ -212,7 +215,8 @@ allow if { allow if { input.scope in { utils.UPDATE_DESC, utils.UPDATE_ANNOTATIONS, utils.DELETE_ANNOTATIONS, - utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS + utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS, + utils.UPDATE_VALIDATION_LAYOUT } utils.is_sandbox is_task_staff @@ -222,7 +226,8 @@ allow if { allow if { input.scope in { utils.UPDATE_DESC, utils.UPDATE_ANNOTATIONS, utils.DELETE_ANNOTATIONS, - utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS + utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS, + utils.UPDATE_VALIDATION_LAYOUT } input.auth.organization.id == input.resource.organization.id utils.has_perm(utils.USER) @@ -232,7 +237,8 @@ allow if { allow if { input.scope in { utils.UPDATE_DESC, utils.UPDATE_ANNOTATIONS, utils.DELETE_ANNOTATIONS, - utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS + utils.UPLOAD_DATA, utils.UPDATE_METADATA, utils.IMPORT_ANNOTATIONS, + utils.UPDATE_VALIDATION_LAYOUT } is_task_staff input.auth.organization.id == input.resource.organization.id @@ -305,4 +311,3 @@ allow if { organizations.has_perm(organizations.WORKER) is_project_staff } - diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 3ba2a87da77c..6b858ceb25e4 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -3,6 +3,7 @@ # # SPDX-License-Identifier: MIT +from contextlib import closing import warnings from copy import copy from inspect import isclass @@ -24,6 +25,7 @@ from django.contrib.auth.models import User, Group from django.db import transaction from django.utils import timezone +from numpy import random from cvat.apps.dataset_manager.formats.utils import get_label_color from cvat.apps.engine.frame_provider import TaskFrameProvider @@ -651,6 +653,7 @@ class JobWriteSerializer(WriteOnceMixin, serializers.ModelSerializer): frames = serializers.ListField( child=serializers.IntegerField(min_value=0), required=False, + allow_empty=False, help_text=textwrap.dedent("""\ The list of frame ids. Applicable only to the "{}" frame selection method """.format(models.JobFrameSelectionMethod.MANUAL)) @@ -665,7 +668,7 @@ class JobWriteSerializer(WriteOnceMixin, serializers.ModelSerializer): ) frame_share = serializers.FloatField( required=False, - validators=[field_validation.validate_percent], + validators=[field_validation.validate_share], help_text=textwrap.dedent("""\ The share of frames included in the GT job. Applicable only to the "{}" frame selection method @@ -681,7 +684,7 @@ class JobWriteSerializer(WriteOnceMixin, serializers.ModelSerializer): ) frames_per_job_share = serializers.FloatField( required=False, - validators=[field_validation.validate_percent], + validators=[field_validation.validate_share], help_text=textwrap.dedent("""\ The share of frames included in the GT job from each annotation job. Applicable only to the "{}" frame selection method @@ -732,13 +735,9 @@ def validate(self, attrs): elif frame_selection_method == models.JobFrameSelectionMethod.MANUAL: field_validation.require_field(attrs, "frames") - frames = attrs['frames'] - if not frames: - raise serializers.ValidationError("The list of frames cannot be empty") - if ( - frame_selection_method != models.JobFrameSelectionMethod.MANUAL and - attrs.get('frames') + 'frames' in attrs and + frame_selection_method != models.JobFrameSelectionMethod.MANUAL ): raise serializers.ValidationError( '"frames" can only be used when "frame_selection_method" is "{}"'.format( @@ -795,7 +794,6 @@ def create(self, validated_data): # The RNG backend must not change to yield reproducible results, # so here we specify it explicitly - from numpy import random rng = random.Generator(random.MT19937(seed=seed)) if deprecated_seed is not None and frame_count < task_size: @@ -827,7 +825,6 @@ def create(self, validated_data): # The RNG backend must not change to yield reproducible results, # so here we specify it explicitly - from numpy import random rng = random.Generator(random.MT19937(seed=seed)) frames: list[int] = [] @@ -861,7 +858,7 @@ def create(self, validated_data): if invalid_ids: raise serializers.ValidationError( "The following frames do not exist in the task: {}".format( - format_list(tuple(map(str, invalid_ids))) + format_list(tuple(map(str, sorted(invalid_ids)))) ) ) @@ -933,6 +930,508 @@ class Meta: fields = ('url', 'id', 'assignee', 'status', 'stage', 'state', 'type') read_only_fields = fields +class JobValidationLayoutWriteSerializer(serializers.Serializer): + frame_selection_method = serializers.ChoiceField( + choices=models.JobFrameSelectionMethod.choices(), + required=True, + help_text=textwrap.dedent("""\ + The method to use for frame selection of new real frames for honeypots in the job + """) + ) + honeypot_real_frames = serializers.ListSerializer( + child=serializers.IntegerField(min_value=0), + required=False, + allow_empty=False, + help_text=textwrap.dedent("""\ + The list of frame ids. Applicable only to the "{}" frame selection method + """.format(models.JobFrameSelectionMethod.MANUAL)) + ) + + def validate(self, attrs): + frame_selection_method = attrs["frame_selection_method"] + if frame_selection_method == models.JobFrameSelectionMethod.MANUAL: + field_validation.require_field(attrs, "honeypot_real_frames") + elif frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM: + pass + else: + assert False + + if ( + 'honeypot_real_frames' in attrs and + frame_selection_method != models.JobFrameSelectionMethod.MANUAL + ): + raise serializers.ValidationError( + '"honeypot_real_frames" can only be used when ' + f'"frame_selection_method" is "{models.JobFrameSelectionMethod.MANUAL}"' + ) + + return super().validate(attrs) + + @transaction.atomic + def update(self, instance: models.Job, validated_data: dict[str, Any]) -> models.Job: + from cvat.apps.engine.cache import MediaCache + from cvat.apps.engine.frame_provider import FrameQuality, JobFrameProvider, prepare_chunk + from cvat.apps.dataset_manager.task import JobAnnotation, AnnotationManager + + db_job = instance + db_segment = db_job.segment + db_task = db_segment.task + db_data = db_task.data + + if not ( + hasattr(db_job.segment.task.data, 'validation_layout') and + db_job.segment.task.data.validation_layout.mode == models.ValidationMode.GT_POOL + ): + raise serializers.ValidationError( + "Honeypots can only be modified if the task " + f"validation mode is '{models.ValidationMode.GT_POOL}'" + ) + + if db_job.type == models.JobType.GROUND_TRUTH: + raise serializers.ValidationError( + f"Honeypots cannot exist in {models.JobType.GROUND_TRUTH} jobs" + ) + + frame_step = db_data.get_frame_step() + + def _to_rel_frame(abs_frame: int) -> int: + return (abs_frame - db_data.start_frame) // frame_step + + all_task_frames: dict[int, models.Image] = { + _to_rel_frame(frame.frame): frame + for frame in db_data.images.all() + } + task_honeypot_frames = set( + _to_rel_frame(frame_id) + for frame_id, frame in all_task_frames.items() + if frame.is_placeholder + ) + segment_frame_set = set(map(_to_rel_frame, db_segment.frame_set)) + segment_honeypots = sorted(segment_frame_set & task_honeypot_frames) + + deleted_task_frames = db_data.deleted_frames + task_all_validation_frames = set(map(_to_rel_frame, db_task.gt_job.segment.frame_set)) + task_active_validation_frames = task_all_validation_frames.difference(deleted_task_frames) + + segment_honeypots_count = len(segment_honeypots) + + frame_selection_method = validated_data['frame_selection_method'] + if frame_selection_method == models.JobFrameSelectionMethod.MANUAL: + requested_frames: list[int] = validated_data['honeypot_real_frames'] + requested_inactive_frames: set[int] = set() + requested_normal_frames: set[int] = set() + for requested_validation_frame in requested_frames: + if requested_validation_frame not in task_all_validation_frames: + requested_normal_frames.add(requested_validation_frame) + continue + + if requested_validation_frame not in task_active_validation_frames: + requested_inactive_frames.add(requested_validation_frame) + continue + + if requested_normal_frames: + raise serializers.ValidationError( + "Could not update honeypot frames: " + "frames {} are not from the validation pool".format( + format_list(tuple(map(str, sorted(requested_normal_frames)))) + ) + ) + + if requested_inactive_frames: + raise serializers.ValidationError( + "Could not update honeypot frames: " + "frames {} are disabled. Restore them in the validation pool first".format( + format_list(tuple(map(str, sorted(requested_inactive_frames)))) + ) + ) + + if len(requested_frames) != segment_honeypots_count: + raise serializers.ValidationError( + "Could not update honeypot frames: " + "the number of honeypots must remain the same. " + "Requested {}, current {}".format( + len(requested_frames), segment_honeypots_count + ) + ) + + elif frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM: + if len(task_active_validation_frames) < segment_honeypots_count: + raise serializers.ValidationError( + "Can't select validation frames: " + "the remaining number of validation frames ({}) " + "is less than the number of honeypots in a job ({}). " + "Try to restore some validation frames".format( + len(task_active_validation_frames), segment_honeypots_count + ) + ) + + # Guarantee uniformness by using a known distribution + # overall task honeypot distribution is not guaranteed though + rng = random.Generator(random.MT19937()) + requested_frames = rng.choice( + tuple(task_active_validation_frames), size=segment_honeypots_count, + shuffle=False, replace=False + ).tolist() + else: + assert False + + # Replace validation frames in the job + old_honeypot_real_ids = [] + updated_db_frames = [] + for frame, requested_validation_frame in zip(segment_honeypots, requested_frames): + db_requested_frame = all_task_frames[requested_validation_frame] + db_segment_frame = all_task_frames[frame] + assert db_segment_frame.is_placeholder + + old_honeypot_real_ids.append(_to_rel_frame(db_segment_frame.real_frame)) + + # Change image in the current segment honeypot frame + db_segment_frame.path = db_requested_frame.path + db_segment_frame.width = db_requested_frame.width + db_segment_frame.height = db_requested_frame.height + db_segment_frame.real_frame = db_requested_frame.frame + db_segment_frame.related_files.set(db_requested_frame.related_files.all()) + + updated_db_frames.append(db_segment_frame) + + updated_validation_frames = [ + frame + for new_validation_frame, old_validation_frame, frame in zip( + requested_frames, old_honeypot_real_ids, segment_honeypots + ) + if new_validation_frame != old_validation_frame + ] + if updated_validation_frames: + models.Image.objects.bulk_update( + updated_db_frames, fields=['path', 'width', 'height', 'real_frame'] + ) + + # Remove annotations on changed validation frames + job_annotation = JobAnnotation(db_job.id) + job_annotation.init_from_db() + job_annotation_manager = AnnotationManager( + job_annotation.ir_data, dimension=db_task.dimension + ) + job_annotation_manager.clear_frames( + segment_frame_set.difference(updated_validation_frames) + ) + job_annotation.delete(job_annotation_manager.data) + + # Update chunks + task_frame_provider = TaskFrameProvider(db_task) + job_frame_provider = JobFrameProvider(db_job) + updated_segment_chunk_ids = set( + job_frame_provider.get_chunk_number(updated_segment_frame_id) + for updated_segment_frame_id in updated_validation_frames + ) + segment_frames = sorted(segment_frame_set) + segment_frame_map = dict(zip(segment_honeypots, requested_frames)) + + media_cache = MediaCache() + for chunk_id in sorted(updated_segment_chunk_ids): + chunk_frames = segment_frames[ + chunk_id * db_data.chunk_size : + (chunk_id + 1) * db_data.chunk_size + ] + + for quality in FrameQuality.__members__.values(): + def _write_updated_static_chunk(): + def _iterate_chunk_frames(): + for chunk_frame in chunk_frames: + db_frame = all_task_frames[chunk_frame] + chunk_real_frame = segment_frame_map.get(chunk_frame, chunk_frame) + yield ( + task_frame_provider.get_frame( + chunk_real_frame, quality=quality + ).data, + os.path.basename(db_frame.path), + chunk_frame, + ) + + with closing(_iterate_chunk_frames()) as frame_iter: + chunk, _ = prepare_chunk( + frame_iter, quality=quality, db_task=db_task, dump_unchanged=True, + ) + + get_chunk_path = { + FrameQuality.COMPRESSED: db_data.get_compressed_segment_chunk_path, + FrameQuality.ORIGINAL: db_data.get_original_segment_chunk_path, + }[quality] + + with open(get_chunk_path(chunk_id, db_segment.id), 'wb') as f: + f.write(chunk.getvalue()) + + if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM: + _write_updated_static_chunk() + + media_cache.remove_segment_chunk(db_segment, chunk_id, quality=quality) + + db_segment.chunks_updated_date = timezone.now() + db_segment.save(update_fields=['chunks_updated_date']) + + if updated_validation_frames or ( + # even if the randomly selected frames were the same as before, we should still + # consider it an update to the validation frames and restore them, if they were deleted + frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM + ): + if set(deleted_task_frames).intersection(updated_validation_frames): + db_data.deleted_frames = sorted( + set(deleted_task_frames).difference(updated_validation_frames) + ) + db_data.save(update_fields=['deleted_frames']) + + db_job.touch() + db_segment.job_set.exclude(id=db_job.id).update(updated_date=timezone.now()) + db_task.touch() + if db_task.project: + db_task.project.touch() + + return instance + +class JobValidationLayoutReadSerializer(serializers.Serializer): + honeypot_count = serializers.IntegerField(min_value=0, required=False) + honeypot_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of frame ids for honeypots in the job + """) + ) + honeypot_real_frames = serializers.ListSerializer( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of real (validation) frame ids for honeypots in the job + """) + ) + + def to_representation(self, instance: models.Job): + validation_layout = getattr(instance.segment.task.data, 'validation_layout', None) + if not validation_layout: + return {} + + data = {} + + if validation_layout.mode == models.ValidationMode.GT_POOL: + db_segment = instance.segment + segment_frame_set = db_segment.frame_set + + db_data = db_segment.task.data + frame_step = db_data.get_frame_step() + + def _to_rel_frame(abs_frame: int) -> int: + return (abs_frame - db_data.start_frame) // frame_step + + segment_honeypot_frames = [] + for frame in db_segment.task.data.images.all(): + if not frame.is_placeholder: + continue + + if not frame.frame in segment_frame_set: + continue + + segment_honeypot_frames.append( + (_to_rel_frame(frame.frame), _to_rel_frame(frame.real_frame)) + ) + + segment_honeypot_frames.sort(key=lambda v: v[0]) + + data = { + 'honeypot_count': len(segment_honeypot_frames), + 'honeypot_frames': [v[0] for v in segment_honeypot_frames], + 'honeypot_real_frames': [v[1] for v in segment_honeypot_frames], + } + + return super().to_representation(data) + +class TaskValidationLayoutWriteSerializer(serializers.Serializer): + disabled_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of frame ids to be excluded from validation + """) + ) + frame_selection_method = serializers.ChoiceField( + choices=models.JobFrameSelectionMethod.choices(), required=False, + help_text=textwrap.dedent("""\ + The method to use for frame selection of new real frames for honeypots in the task + """) + ) + honeypot_real_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of frame ids. Applicable only to the "{}" frame selection method + """.format(models.JobFrameSelectionMethod.MANUAL)) + ) + + def validate(self, attrs): + frame_selection_method = attrs.get("frame_selection_method") + if frame_selection_method == models.JobFrameSelectionMethod.MANUAL: + field_validation.require_field(attrs, "honeypot_real_frames") + elif frame_selection_method == models.JobFrameSelectionMethod.RANDOM_UNIFORM: + pass + + if ( + 'honeypot_real_frames' in attrs and + frame_selection_method != models.JobFrameSelectionMethod.MANUAL + ): + raise serializers.ValidationError( + '"honeypot_real_frames" can only be used when ' + f'"frame_selection_method" is "{models.JobFrameSelectionMethod.MANUAL}"' + ) + + return super().validate(attrs) + + @transaction.atomic + def update(self, instance: models.Task, validated_data: dict[str, Any]) -> models.Task: + validation_layout = getattr(instance.data, 'validation_layout', None) + if not validation_layout: + raise serializers.ValidationError("Validation is not configured in the task") + + if 'disabled_frames' in validated_data: + requested_disabled_frames = validated_data['disabled_frames'] + unknown_requested_disabled_frames = ( + set(requested_disabled_frames).difference(validation_layout.frames) + ) + if unknown_requested_disabled_frames: + raise serializers.ValidationError( + "Unknown frames requested for exclusion from the validation set {}".format( + format_list(tuple(map(str, sorted(unknown_requested_disabled_frames)))) + ) + ) + + gt_job_meta_serializer = JobDataMetaWriteSerializer(instance.gt_job, { + "deleted_frames": requested_disabled_frames + }) + gt_job_meta_serializer.is_valid(raise_exception=True) + gt_job_meta_serializer.save() + + frame_selection_method = validated_data.get('frame_selection_method') + if frame_selection_method and not ( + validation_layout and + instance.data.validation_layout.mode == models.ValidationMode.GT_POOL + ): + raise serializers.ValidationError( + "Honeypots can only be modified if the task " + f"validation mode is '{models.ValidationMode.GT_POOL}'" + ) + + if frame_selection_method == models.JobFrameSelectionMethod.MANUAL: + requested_honeypot_real_frames = validated_data['honeypot_real_frames'] + + task_honeypot_abs_frames = ( + instance.data.images + .filter(is_placeholder=True) + .order_by('frame') + .values_list('frame', flat=True) + ) + + task_honeypot_frames_count = len(task_honeypot_abs_frames) + if task_honeypot_frames_count != len(requested_honeypot_real_frames): + raise serializers.ValidationError( + "Invalid size of 'honeypot_real_frames' array, " + f"expected {task_honeypot_frames_count}" + ) + + if frame_selection_method: + for db_job in ( + models.Job.objects.select_related("segment") + .filter(segment__task_id=instance.id, type=models.JobType.ANNOTATION) + .order_by("segment__start_frame") + .all() + ): + job_serializer_params = { + 'frame_selection_method': frame_selection_method + } + + if frame_selection_method == models.JobFrameSelectionMethod.MANUAL: + segment_frame_set = db_job.segment.frame_set + job_serializer_params['honeypot_real_frames'] = [ + requested_frame + for abs_frame, requested_frame in zip( + task_honeypot_abs_frames, requested_honeypot_real_frames + ) + if abs_frame in segment_frame_set + ] + + job_validation_layout_serializer = JobValidationLayoutWriteSerializer( + db_job, job_serializer_params + ) + job_validation_layout_serializer.is_valid(raise_exception=True) + job_validation_layout_serializer.save() + + return instance + +class TaskValidationLayoutReadSerializer(serializers.ModelSerializer): + validation_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), source='frames', required=False, + help_text=textwrap.dedent("""\ + The list of frame ids to be used for validation + """) + ) + disabled_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of frame ids excluded from validation + """) + ) + honeypot_count = serializers.IntegerField(min_value=0, required=False) + honeypot_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of frame ids for all honeypots in the task + """) + ) + honeypot_real_frames = serializers.ListField( + child=serializers.IntegerField(min_value=0), required=False, + help_text=textwrap.dedent("""\ + The list of real (validation) frame ids for all honeypots in the task + """) + ) + + class Meta: + model = models.ValidationLayout + fields = ( + 'mode', + 'frames_per_job_count', + 'validation_frames', + 'disabled_frames', + 'honeypot_count', + 'honeypot_frames', + 'honeypot_real_frames', + ) + read_only_fields = fields + extra_kwargs = { + 'mode': { 'allow_null': True }, + } + + def to_representation(self, instance: models.ValidationLayout): + if instance.mode == models.ValidationMode.GT_POOL: + db_data: models.Data = instance.task_data + frame_step = db_data.get_frame_step() + + def _to_rel_frame(abs_frame: int) -> int: + return (abs_frame - db_data.start_frame) // frame_step + + placeholder_queryset = models.Image.objects.filter( + data_id=instance.task_data_id, is_placeholder=True + ) + honeypot_count = placeholder_queryset.count() + + instance.honeypot_count = honeypot_count + + # TODO: make this information optional, if there are use cases with too big responses + instance.honeypot_frames = [] + instance.honeypot_real_frames = [] + for frame, real_frame in ( + placeholder_queryset + .order_by('frame') + .values_list('frame', 'real_frame') + .iterator(chunk_size=10000) + ): + instance.honeypot_frames.append(_to_rel_frame(frame)) + instance.honeypot_real_frames.append(_to_rel_frame(real_frame)) + + return super().to_representation(instance) + class SegmentSerializer(serializers.ModelSerializer): jobs = SimpleJobSerializer(many=True, source='job_set') frames = serializers.ListSerializer(child=serializers.IntegerField(), allow_empty=True) @@ -1064,7 +1563,7 @@ class ValidationParamsSerializer(serializers.ModelSerializer): ) frame_share = serializers.FloatField( required=False, - validators=[field_validation.validate_percent], + validators=[field_validation.validate_share], help_text=textwrap.dedent("""\ The share of frames to be included in the validation set. Applicable only to the "{}" frame selection method @@ -1080,7 +1579,7 @@ class ValidationParamsSerializer(serializers.ModelSerializer): ) frames_per_job_share = serializers.FloatField( required=False, - validators=[field_validation.validate_percent], + validators=[field_validation.validate_share], help_text=textwrap.dedent("""\ The share of frames to be included in the validation set from each annotation job. Applicable only to the "{}" frame selection method @@ -1140,8 +1639,8 @@ def validate(self, attrs): field_validation.require_field(attrs, "frames") if ( - attrs['frame_selection_method'] != models.JobFrameSelectionMethod.MANUAL and - attrs.get('frames') + 'frames' in attrs and + attrs['frame_selection_method'] != models.JobFrameSelectionMethod.MANUAL ): raise serializers.ValidationError( '"frames" can only be used when "frame_selection_method" is "{}"'.format( @@ -1803,10 +2302,12 @@ class DataMetaReadSerializer(serializers.ModelSerializer): help_text=textwrap.dedent("""\ A list of valid frame ids. The None value means all frames are included. """)) + chunks_updated_date = serializers.DateTimeField() class Meta: model = models.Data fields = ( + 'chunks_updated_date', 'chunk_size', 'size', 'image_quality', @@ -1819,11 +2320,17 @@ class Meta: ) read_only_fields = fields extra_kwargs = { + 'chunks_updated_date': { + 'help_text': textwrap.dedent("""\ + The date of the last chunk data update. + Chunks downloaded before this date are outdated and should be redownloaded. + """) + }, 'size': { 'help_text': textwrap.dedent("""\ The number of frames included. Deleted frames do not affect this value. """) - } + }, } class DataMetaWriteSerializer(serializers.ModelSerializer): @@ -1834,11 +2341,26 @@ class Meta: fields = ('deleted_frames',) def update(self, instance: models.Data, validated_data: dict[str, Any]) -> models.Data: - deleted_frames = validated_data['deleted_frames'] + requested_deleted_frames = validated_data['deleted_frames'] + + requested_deleted_frames_set = set(requested_deleted_frames) + if len(requested_deleted_frames_set) != len(requested_deleted_frames): + raise serializers.ValidationError("Deleted frames cannot repeat") + + unknown_requested_deleted_frames = ( + requested_deleted_frames_set.difference(range(instance.size)) + ) + if unknown_requested_deleted_frames: + raise serializers.ValidationError( + "Unknown frames {} requested for removal".format( + format_list(tuple(map(str, sorted(unknown_requested_deleted_frames)))) + ) + ) + validation_layout = getattr(instance, 'validation_layout', None) if validation_layout and validation_layout.mode == models.ValidationMode.GT_POOL: gt_frame_set = set(validation_layout.frames) - changed_deleted_frames = set(deleted_frames).difference(instance.deleted_frames) + changed_deleted_frames = requested_deleted_frames_set.difference(instance.deleted_frames) if not gt_frame_set.isdisjoint(changed_deleted_frames): raise serializers.ValidationError( f"When task validation mode is {models.ValidationMode.GT_POOL}, " @@ -1861,7 +2383,7 @@ def update(self, instance: models.Job, validated_data: dict[str, Any]) -> models db_task = db_segment.task db_data = db_task.data - deleted_frames = validated_data.get('deleted_frames') + deleted_frames = validated_data['deleted_frames'] task_frame_provider = TaskFrameProvider(db_task) segment_rel_frame_set = set( @@ -1874,11 +2396,11 @@ def update(self, instance: models.Job, validated_data: dict[str, Any]) -> models format_list(list(map(str, unknown_deleted_frames))) )) - updated_validation_frames = None - updated_task_frames = None + updated_deleted_validation_frames = None + updated_deleted_task_frames = None if instance.type == models.JobType.GROUND_TRUTH: - updated_validation_frames = deleted_frames + [ + updated_deleted_validation_frames = deleted_frames + [ f for f in db_data.validation_layout.disabled_frames if f not in segment_rel_frame_set @@ -1888,8 +2410,9 @@ def update(self, instance: models.Job, validated_data: dict[str, Any]) -> models # GT pool owns its frames, so we exclude them from the task # Them and the related honeypots in jobs updated_validation_abs_frame_set = set( - map(task_frame_provider.get_abs_frame_number, updated_validation_frames) + map(task_frame_provider.get_abs_frame_number, updated_deleted_validation_frames) ) + excluded_placeholder_frames = [ task_frame_provider.get_rel_frame_number(frame) for frame, real_frame in ( @@ -1900,25 +2423,25 @@ def update(self, instance: models.Job, validated_data: dict[str, Any]) -> models ) if real_frame in updated_validation_abs_frame_set ] - updated_task_frames = deleted_frames + excluded_placeholder_frames + updated_deleted_task_frames = deleted_frames + excluded_placeholder_frames elif db_data.validation_layout.mode == models.ValidationMode.GT: # Regular GT jobs only refer to the task frames, without data ownership pass else: assert False else: - updated_task_frames = deleted_frames + [ + updated_deleted_task_frames = deleted_frames + [ f for f in db_data.deleted_frames if f not in segment_rel_frame_set ] - if updated_validation_frames is not None: - db_data.validation_layout.disabled_frames = updated_validation_frames + if updated_deleted_validation_frames is not None: + db_data.validation_layout.disabled_frames = updated_deleted_validation_frames db_data.validation_layout.save(update_fields=['disabled_frames']) - if updated_task_frames is not None: - db_data.deleted_frames = updated_task_frames + if updated_deleted_task_frames is not None: + db_data.deleted_frames = updated_deleted_task_frames db_data.save(update_fields=['deleted_frames']) db_task.touch() diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index e4156c0ad30a..0dd84acab90f 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3,20 +3,20 @@ # # SPDX-License-Identifier: MIT -from abc import ABCMeta, abstractmethod +import functools import itertools import os import os.path as osp import re import shutil -import functools - +import textwrap +import traceback +import zlib +from abc import ABCMeta, abstractmethod from contextlib import suppress from PIL import Image from types import SimpleNamespace from typing import Optional, Any, Dict, List, Union, cast, Callable, Mapping, Iterable -import traceback -import textwrap from collections import namedtuple from copy import copy from datetime import datetime @@ -28,7 +28,7 @@ from django.conf import settings from django.contrib.auth.models import User from django.db import IntegrityError, transaction -from django.db.models import Count +from django.db import models as django_models from django.db.models.query import Prefetch from django.http import HttpResponse, HttpRequest, HttpResponseNotFound, HttpResponseBadRequest from django.utils import timezone @@ -38,7 +38,7 @@ from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import ( - OpenApiParameter, OpenApiResponse, PolymorphicProxySerializer, + OpenApiExample, OpenApiParameter, OpenApiResponse, PolymorphicProxySerializer, extend_schema_view, extend_schema ) @@ -60,7 +60,7 @@ from cvat.apps.dataset_manager.bindings import CvatImportError from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import ( - IFrameProvider, TaskFrameProvider, JobFrameProvider, FrameQuality + DataWithMeta, IFrameProvider, TaskFrameProvider, JobFrameProvider, FrameQuality ) from cvat.apps.engine.filters import NonModelSimpleFilter, NonModelOrderingFilter, NonModelJsonLogicFilter from cvat.apps.engine.media_extractors import get_mime @@ -74,10 +74,11 @@ from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, DataMetaReadSerializer, DataMetaWriteSerializer, DataSerializer, FileInfoSerializer, - JobDataMetaWriteSerializer, JobReadSerializer, JobWriteSerializer, LabelSerializer, - LabeledDataSerializer, + JobDataMetaWriteSerializer, JobReadSerializer, JobWriteSerializer, + JobValidationLayoutReadSerializer, JobValidationLayoutWriteSerializer, + LabelSerializer, LabeledDataSerializer, ProjectReadSerializer, ProjectWriteSerializer, - RqStatusSerializer, TaskReadSerializer, TaskWriteSerializer, + RqStatusSerializer, TaskReadSerializer, TaskValidationLayoutReadSerializer, TaskValidationLayoutWriteSerializer, TaskWriteSerializer, UserSerializer, PluginsSerializer, IssueReadSerializer, AnnotationGuideReadSerializer, AnnotationGuideWriteSerializer, AssetReadSerializer, AssetWriteSerializer, @@ -115,6 +116,9 @@ _UPLOAD_PARSER_CLASSES = api_settings.DEFAULT_PARSER_CLASSES + [MultiPartParser] +_DATA_CHECKSUM_HEADER_NAME = 'X-Checksum' +_DATA_UPDATED_DATE_HEADER_NAME = 'X-Updated-Date' + @extend_schema(tags=['server']) class ServerViewSet(viewsets.ViewSet): serializer_class = None @@ -693,7 +697,11 @@ def __call__(self): try: if self.type == 'chunk': data = frame_provider.get_chunk(self.number, quality=self.quality) - return HttpResponse(data.data.getvalue(), content_type=data.mime) + return HttpResponse( + data.data.getvalue(), + content_type=data.mime, + headers=self._get_chunk_response_headers(data), + ) elif self.type == 'frame' or self.type == 'preview': if self.type == 'preview': data = frame_provider.get_preview() @@ -716,6 +724,23 @@ def __call__(self): '\n'.join([str(d) for d in ex.detail]) return Response(data=msg, status=ex.status_code) + @abstractmethod + def _get_chunk_response_headers(self, chunk_data: DataWithMeta) -> dict[str, str]: ... + + _CHUNK_HEADER_BYTES_LENGTH = 1000 + "The number of significant bytes from the chunk header, used for checksum computation" + + def _get_chunk_checksum(self, chunk_data: DataWithMeta) -> str: + data = chunk_data.data.getbuffer() + size_checksum = zlib.crc32(str(len(data)).encode()) + return str(zlib.crc32(data[:self._CHUNK_HEADER_BYTES_LENGTH], size_checksum)) + + def _make_chunk_response_headers(self, checksum: str, updated_date: datetime) -> dict[str, str]: + return { + _DATA_CHECKSUM_HEADER_NAME: str(checksum or ''), + _DATA_UPDATED_DATE_HEADER_NAME: serializers.DateTimeField().to_representation(updated_date), + } + class _TaskDataGetter(_DataGetter): def __init__( self, @@ -731,6 +756,11 @@ def __init__( def _get_frame_provider(self) -> TaskFrameProvider: return TaskFrameProvider(self._db_task) + def _get_chunk_response_headers(self, chunk_data: DataWithMeta) -> dict[str, str]: + return self._make_chunk_response_headers( + self._get_chunk_checksum(chunk_data), self._db_task.get_chunks_updated_date(), + ) + class _JobDataGetter(_DataGetter): def __init__( @@ -785,10 +815,21 @@ def __call__(self): self.number, quality=self.quality, is_task_chunk=True ) - return HttpResponse(data.data.getvalue(), content_type=data.mime) + return HttpResponse( + data.data.getvalue(), + content_type=data.mime, + headers=self._get_chunk_response_headers(data), + ) else: return super().__call__() + def _get_chunk_response_headers(self, chunk_data: DataWithMeta) -> dict[str, str]: + return self._make_chunk_response_headers( + self._get_chunk_checksum(chunk_data), + self._db_job.segment.chunks_updated_date + ) + + @extend_schema(tags=['tasks']) @extend_schema_view( list=extend_schema( @@ -841,17 +882,18 @@ class TaskViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, ).prefetch_related( 'segment_set__job_set', 'segment_set__job_set__assignee', - ).with_job_summary().all() + ).with_job_summary() lookup_fields = { 'project_name': 'project__name', 'owner': 'owner__username', 'assignee': 'assignee__username', 'tracker_link': 'bug_tracker', + 'validation_mode': 'data__validation_layout__mode', } search_fields = ( 'project_name', 'name', 'owner', 'status', 'assignee', - 'subset', 'mode', 'dimension', 'tracker_link' + 'subset', 'mode', 'dimension', 'tracker_link', 'validation_mode' ) filter_fields = list(search_fields) + ['id', 'project_id', 'updated_date'] filter_description = textwrap.dedent(""" @@ -1289,6 +1331,18 @@ def _handle_upload_backup(request): description="Specifies the quality level of the requested data"), OpenApiParameter('number', location=OpenApiParameter.QUERY, required=False, type=OpenApiTypes.INT, description="A unique number value identifying chunk or frame"), + OpenApiParameter( + _DATA_CHECKSUM_HEADER_NAME, + location=OpenApiParameter.HEADER, type=OpenApiTypes.STR, required=False, + response=[200], + description="Data checksum, applicable for chunks only", + ), + OpenApiParameter( + _DATA_UPDATED_DATE_HEADER_NAME, + location=OpenApiParameter.HEADER, type=OpenApiTypes.DATETIME, required=False, + response=[200], + description="Data update date, applicable for chunks only", + ) ], responses={ '200': OpenApiResponse(description='Data of a specific type'), @@ -1574,6 +1628,7 @@ def _get_rq_response(queue, job_id): def metadata(self, request, pk): self.get_object() #force to call check_object_permissions db_task = models.Task.objects.prefetch_related( + 'segment_set', Prefetch('data', queryset=models.Data.objects.select_related('video').prefetch_related( Prefetch('images', queryset=models.Image.objects.prefetch_related('related_files').order_by('frame')) )) @@ -1598,6 +1653,7 @@ def metadata(self, request, pk): db_data = db_task.data db_data.frames = frame_meta + db_data.chunks_updated_date = db_task.get_chunks_updated_date() serializer = DataMetaReadSerializer(db_data) return Response(serializer.data) @@ -1673,6 +1729,59 @@ def preview(self, request, pk): ) return data_getter() + @extend_schema( + methods=["GET"], + summary="Allows getting current validation configuration", + responses={ + '200': OpenApiResponse(TaskValidationLayoutReadSerializer), + }) + @extend_schema( + methods=["PATCH"], + summary="Allows updating current validation configuration", + request=TaskValidationLayoutWriteSerializer, + responses={ + '200': OpenApiResponse(TaskValidationLayoutReadSerializer), + }, + examples=[ + OpenApiExample("set honeypots to random validation frames", { + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_UNIFORM + }), + OpenApiExample("set honeypots manually", { + "frame_selection_method": models.JobFrameSelectionMethod.MANUAL, + "honeypot_real_frames": [10, 20, 22] + }), + OpenApiExample("disable validation frames", { + "disabled_frames": [4, 5, 8] + }), + OpenApiExample("restore all validation frames", { + "disabled_frames": [] + }), + ]) + @action(detail=True, methods=["GET", "PATCH"], url_path='validation_layout') + @transaction.atomic + def validation_layout(self, request, pk): + db_task = self.get_object() # call check_object_permissions as well + + validation_layout = getattr(db_task.data, 'validation_layout', None) + + if request.method == "PATCH": + if not validation_layout: + return ValidationError( + "Task has no validation setup configured. " + "Validation must be initialized during task creation" + ) + + request_serializer = TaskValidationLayoutWriteSerializer(db_task, data=request.data) + request_serializer.is_valid(raise_exception=True) + validation_layout = request_serializer.save().data.validation_layout + + if not validation_layout: + response_serializer = TaskValidationLayoutReadSerializer(SimpleNamespace(mode=None)) + return Response(response_serializer.data, status=status.HTTP_200_OK) + + response_serializer = TaskValidationLayoutReadSerializer(validation_layout) + return Response(response_serializer.data, status=status.HTTP_200_OK) + @extend_schema(tags=['jobs']) @extend_schema_view( @@ -1681,7 +1790,43 @@ def preview(self, request, pk): request=JobWriteSerializer, responses={ '201': JobReadSerializer, # check JobWriteSerializer.to_representation - }), + }, + examples=[ + OpenApiExample("create gt job with random 10 frames", { + "type": models.JobType.GROUND_TRUTH, + "task_id": 42, + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_UNIFORM, + "frame_count": 10, + "random_seed": 1, + }), + OpenApiExample("create gt job with random 15% frames", { + "type": models.JobType.GROUND_TRUTH, + "task_id": 42, + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_UNIFORM, + "frame_share": 0.15, + "random_seed": 1, + }), + OpenApiExample("create gt job with 3 random frames in each job", { + "type": models.JobType.GROUND_TRUTH, + "task_id": 42, + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_PER_JOB, + "frames_per_job_count": 3, + "random_seed": 1, + }), + OpenApiExample("create gt job with 20% random frames in each job", { + "type": models.JobType.GROUND_TRUTH, + "task_id": 42, + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_PER_JOB, + "frames_per_job_share": 0.2, + "random_seed": 1, + }), + OpenApiExample("create gt job with manual frame selection", { + "type": models.JobType.GROUND_TRUTH, + "task_id": 42, + "frame_selection_method": models.JobFrameSelectionMethod.MANUAL, + "frames": [1, 5, 10, 18], + }), + ]), retrieve=extend_schema( summary='Get job details', responses={ @@ -1717,7 +1862,7 @@ class JobViewSet(viewsets.GenericViewSet, mixins.ListModelMixin, mixins.CreateMo queryset = Job.objects.select_related('assignee', 'segment__task__data', 'segment__task__project', 'segment__task__annotation_guide', 'segment__task__project__annotation_guide', ).annotate( - Count('issues', distinct=True), + django_models.Count('issues', distinct=True), ).all() iam_organization_field = 'segment__task__organization' @@ -2161,6 +2306,7 @@ def metadata(self, request, pk): db_data.stop_frame = data_stop_frame db_data.size = len(segment_frame_set) db_data.included_frames = db_segment.frames or None + db_data.chunks_updated_date = db_segment.chunks_updated_date frame_meta = [{ 'width': item.width, @@ -2189,6 +2335,55 @@ def preview(self, request, pk): ) return data_getter() + @extend_schema( + methods=["GET"], + summary="Allows getting current validation configuration", + responses={ + '200': OpenApiResponse(JobValidationLayoutReadSerializer), + }) + @extend_schema( + methods=["PATCH"], + summary="Allows updating current validation configuration", + request=JobValidationLayoutWriteSerializer, + responses={ + '200': OpenApiResponse(JobValidationLayoutReadSerializer), + }, + examples=[ + OpenApiExample("set honeypots to random validation frames", { + "frame_selection_method": models.JobFrameSelectionMethod.RANDOM_UNIFORM + }), + OpenApiExample("set honeypots manually", { + "frame_selection_method": models.JobFrameSelectionMethod.MANUAL, + "honeypot_real_frames": [10, 20, 22] + }), + ]) + @action(detail=True, methods=["GET", "PATCH"], url_path='validation_layout') + @transaction.atomic + def validation_layout(self, request, pk): + self.get_object() # call check_object_permissions as well + + db_job = models.Job.objects.prefetch_related( + 'segment', + 'segment__task', + Prefetch('segment__task__data', + queryset=( + models.Data.objects + .select_related('video', 'validation_layout') + .prefetch_related( + Prefetch('images', queryset=models.Image.objects.order_by('frame')) + ) + ) + ) + ).get(pk=pk) + + if request.method == "PATCH": + request_serializer = JobValidationLayoutWriteSerializer(db_job, data=request.data) + request_serializer.is_valid(raise_exception=True) + db_job = request_serializer.save() + + response_serializer = JobValidationLayoutReadSerializer(db_job) + return Response(response_serializer.data, status=status.HTTP_200_OK) + @extend_schema(tags=['issues']) @extend_schema_view( retrieve=extend_schema( diff --git a/cvat/apps/iam/rules/utils.rego b/cvat/apps/iam/rules/utils.rego index 7371f2e35edb..101f28841d4c 100644 --- a/cvat/apps/iam/rules/utils.rego +++ b/cvat/apps/iam/rules/utils.rego @@ -58,6 +58,8 @@ IMPORT_DATASET := "import:dataset" IMPORT_BACKUP := "import:backup" EXPORT_BACKUP := "export:backup" UPDATE_ORG := "update:organization" +VIEW_VALIDATION_LAYOUT := "view:validation_layout" +UPDATE_VALIDATION_LAYOUT := "update:validation_layout" get_priority(privilege) := { diff --git a/cvat/schema.yml b/cvat/schema.yml index 5f30ace5fcc7..35b01a7e9c71 100644 --- a/cvat/schema.yml +++ b/cvat/schema.yml @@ -1916,6 +1916,50 @@ paths: application/json: schema: $ref: '#/components/schemas/JobWriteRequest' + examples: + CreateGtJobWithRandom10Frames: + value: + type: ground_truth + task_id: 42 + frame_selection_method: random_uniform + frame_count: 10 + random_seed: 1 + summary: create gt job with random 10 frames + CreateGtJobWithRandom15%Frames: + value: + type: ground_truth + task_id: 42 + frame_selection_method: random_uniform + frame_share: 0.15 + random_seed: 1 + summary: create gt job with random 15% frames + CreateGtJobWith3RandomFramesInEachJob: + value: + type: ground_truth + task_id: 42 + frame_selection_method: random_per_job + frames_per_job_count: 3 + random_seed: 1 + summary: create gt job with 3 random frames in each job + CreateGtJobWith20%RandomFramesInEachJob: + value: + type: ground_truth + task_id: 42 + frame_selection_method: random_per_job + frames_per_job_share: 0.2 + random_seed: 1 + summary: create gt job with 20% random frames in each job + CreateGtJobWithManualFrameSelection: + value: + type: ground_truth + task_id: 42 + frame_selection_method: manual + frames: + - 1 + - 5 + - 10 + - 18 + summary: create gt job with manual frame selection required: true security: - sessionAuth: [] @@ -2594,6 +2638,75 @@ paths: responses: '200': description: Job image preview + /api/jobs/{id}/validation_layout: + get: + operationId: jobs_retrieve_validation_layout + summary: Allows getting current validation configuration + parameters: + - in: path + name: id + schema: + type: integer + description: A unique integer value identifying this job. + required: true + tags: + - jobs + security: + - sessionAuth: [] + csrfAuth: [] + tokenAuth: [] + - signatureAuth: [] + - basicAuth: [] + responses: + '200': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/JobValidationLayoutRead' + description: '' + patch: + operationId: jobs_partial_update_validation_layout + summary: Allows updating current validation configuration + parameters: + - in: path + name: id + schema: + type: integer + description: A unique integer value identifying this job. + required: true + tags: + - jobs + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/PatchedJobValidationLayoutWriteRequest' + examples: + SetHoneypotsToRandomValidationFrames: + value: + frame_selection_method: random_uniform + summary: set honeypots to random validation frames + SetHoneypotsManually: + value: + frame_selection_method: manual + honeypot_real_frames: + - 10 + - 20 + - 22 + summary: set honeypots manually + security: + - sessionAuth: [] + csrfAuth: [] + tokenAuth: [] + - signatureAuth: [] + - basicAuth: [] + responses: + '200': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/JobValidationLayoutRead' + description: '' /api/labels: get: operationId: labels_list @@ -4953,7 +5066,7 @@ paths: Details about the syntax used can be found at the link: https://jsonlogic.com/ - Available filter_fields: ['project_name', 'name', 'owner', 'status', 'assignee', 'subset', 'mode', 'dimension', 'tracker_link', 'id', 'project_id', 'updated_date']. + Available filter_fields: ['project_name', 'name', 'owner', 'status', 'assignee', 'subset', 'mode', 'dimension', 'tracker_link', 'validation_mode', 'id', 'project_id', 'updated_date']. There are few examples for complex filtering tasks: @@ -5015,7 +5128,7 @@ paths: in: query description: 'A search term. Available search_fields: (''project_name'', ''name'', ''owner'', ''status'', ''assignee'', ''subset'', ''mode'', ''dimension'', - ''tracker_link'')' + ''tracker_link'', ''validation_mode'')' schema: type: string - name: sort @@ -5023,7 +5136,8 @@ paths: in: query description: 'Which field to use when ordering the results. Available ordering_fields: [''project_name'', ''name'', ''owner'', ''status'', ''assignee'', ''subset'', - ''mode'', ''dimension'', ''tracker_link'', ''id'', ''project_id'', ''updated_date'']' + ''mode'', ''dimension'', ''tracker_link'', ''validation_mode'', ''id'', + ''project_id'', ''updated_date'']' schema: type: string - name: status @@ -5045,6 +5159,14 @@ paths: description: A simple equality filter for the tracker_link field schema: type: string + - name: validation_mode + in: query + description: A simple equality filter for the validation_mode field + schema: + type: string + enum: + - gt + - gt_pool tags: - tasks security: @@ -5621,6 +5743,16 @@ paths: - basicAuth: [] responses: '200': + headers: + X-Checksum: + schema: + type: string + description: Data checksum, applicable for chunks only + X-Updated-Date: + schema: + type: string + format: date-time + description: Data update date, applicable for chunks only description: Data of a specific type post: operationId: tasks_create_data @@ -5989,6 +6121,86 @@ paths: schema: $ref: '#/components/schemas/RqStatus' description: '' + /api/tasks/{id}/validation_layout: + get: + operationId: tasks_retrieve_validation_layout + summary: Allows getting current validation configuration + parameters: + - in: path + name: id + schema: + type: integer + description: A unique integer value identifying this task. + required: true + tags: + - tasks + security: + - sessionAuth: [] + csrfAuth: [] + tokenAuth: [] + - signatureAuth: [] + - basicAuth: [] + responses: + '200': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/TaskValidationLayoutRead' + description: '' + patch: + operationId: tasks_partial_update_validation_layout + summary: Allows updating current validation configuration + parameters: + - in: path + name: id + schema: + type: integer + description: A unique integer value identifying this task. + required: true + tags: + - tasks + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/PatchedTaskValidationLayoutWriteRequest' + examples: + SetHoneypotsToRandomValidationFrames: + value: + frame_selection_method: random_uniform + summary: set honeypots to random validation frames + SetHoneypotsManually: + value: + frame_selection_method: manual + honeypot_real_frames: + - 10 + - 20 + - 22 + summary: set honeypots manually + DisableValidationFrames: + value: + disabled_frames: + - 4 + - 5 + - 8 + summary: disable validation frames + RestoreAllValidationFrames: + value: + disabled_frames: [] + summary: restore all validation frames + security: + - sessionAuth: [] + csrfAuth: [] + tokenAuth: [] + - signatureAuth: [] + - basicAuth: [] + responses: + '200': + content: + application/vnd.cvat+json: + schema: + $ref: '#/components/schemas/TaskValidationLayoutRead' + description: '' /api/tasks/backup/: post: operationId: tasks_create_backup @@ -7246,6 +7458,9 @@ components: DataMetaRead: type: object properties: + chunks_updated_date: + type: string + format: date-time chunk_size: type: integer readOnly: true @@ -7287,6 +7502,7 @@ components: description: | A list of valid frame ids. The None value means all frames are included. required: + - chunks_updated_date - deleted_frames - frames - image_quality @@ -8155,6 +8371,26 @@ components: description: |- * `annotation` - ANNOTATION * `ground_truth` - GROUND_TRUTH + JobValidationLayoutRead: + type: object + properties: + honeypot_count: + type: integer + minimum: 0 + honeypot_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids for honeypots in the job + honeypot_real_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of real (validation) frame ids for honeypots in the job JobWriteRequest: type: object properties: @@ -9251,6 +9487,26 @@ components: items: type: integer minimum: 0 + PatchedJobValidationLayoutWriteRequest: + type: object + properties: + frame_selection_method: + allOf: + - $ref: '#/components/schemas/FrameSelectionMethod' + description: |- + The method to use for frame selection of new real frames for honeypots in the job + + + * `random_uniform` - RANDOM_UNIFORM + * `random_per_job` - RANDOM_PER_JOB + * `manual` - MANUAL + honeypot_real_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids. Applicable only to the "manual" frame selection method PatchedJobWriteRequest: type: object properties: @@ -9454,6 +9710,33 @@ components: compare_attributes: type: boolean description: Enables or disables annotation attribute comparison + PatchedTaskValidationLayoutWriteRequest: + type: object + properties: + disabled_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids to be excluded from validation + frame_selection_method: + allOf: + - $ref: '#/components/schemas/FrameSelectionMethod' + description: |- + The method to use for frame selection of new real frames for honeypots in the task + + + * `random_uniform` - RANDOM_UNIFORM + * `random_per_job` - RANDOM_PER_JOB + * `manual` - MANUAL + honeypot_real_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids. Applicable only to the "manual" frame selection method PatchedTaskWriteRequest: type: object properties: @@ -10569,6 +10852,50 @@ components: required: - jobs - labels + TaskValidationLayoutRead: + type: object + properties: + mode: + readOnly: true + nullable: true + oneOf: + - $ref: '#/components/schemas/ValidationMode' + - $ref: '#/components/schemas/NullEnum' + frames_per_job_count: + type: integer + readOnly: true + nullable: true + validation_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids to be used for validation + disabled_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids excluded from validation + honeypot_count: + type: integer + minimum: 0 + honeypot_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of frame ids for all honeypots in the task + honeypot_real_frames: + type: array + items: + type: integer + minimum: 0 + description: | + The list of real (validation) frame ids for all honeypots in the task TaskWriteRequest: type: object properties: diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 0a9ee61a124f..408e74d88959 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -289,15 +289,16 @@ def _get_endpoint(self, api_client: ApiClient) -> Endpoint: @pytest.mark.parametrize( "field", ( + "assignee", + "dimension", + "mode", "name", "owner", + "project_id", "status", - "assignee", "subset", - "mode", - "dimension", - "project_id", "tracker_link", + "validation_mode", ), ) def test_can_use_simple_filter_for_object_list(self, field): @@ -736,6 +737,7 @@ def interpolate(frame): @pytest.mark.usefixtures("restore_db_per_class") @pytest.mark.usefixtures("restore_redis_inmem_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_after_class") class TestGetTaskDataset: @staticmethod @@ -986,6 +988,7 @@ def test_uses_subset_name( @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_cvat_data_per_function") @pytest.mark.usefixtures("restore_redis_ondisk_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_after_class") class TestPostTaskData: _USERNAME = "admin1" @@ -2275,10 +2278,13 @@ def _create_task(): self._USERNAME, spec=deepcopy(task_params), data=deepcopy(data_params) ) task_meta = json.loads(api_client.tasks_api.retrieve_data_meta(task_id)[1].data) - return task_meta + task_validation_layout = json.loads( + api_client.tasks_api.retrieve_validation_layout(task_id)[1].data + ) + return task_meta, task_validation_layout - task1_meta = _create_task() - task2_meta = _create_task() + task1_meta, task1_validation_layout = _create_task() + task2_meta, task2_validation_layout = _create_task() assert ( DeepDiff( @@ -2289,6 +2295,7 @@ def _create_task(): ) == {} ) + assert DeepDiff(task1_validation_layout, task2_validation_layout, ignore_order=False) == {} @parametrize( "frame_selection_method, method_params", @@ -2608,8 +2615,9 @@ def read_frame(self, i: int) -> Image.Image: @pytest.mark.usefixtures("restore_db_per_class") -@pytest.mark.usefixtures("restore_redis_ondisk_per_class") @pytest.mark.usefixtures("restore_cvat_data_per_class") +@pytest.mark.usefixtures("restore_redis_ondisk_per_class") +@pytest.mark.usefixtures("restore_redis_ondisk_after_class") class TestTaskData: _USERNAME = "admin1" @@ -2708,11 +2716,12 @@ def _uploaded_images_task_with_honeypots_and_segments_base( *, start_frame: Optional[int] = None, step: Optional[int] = None, + random_seed: int = 42, ) -> Generator[Tuple[_TaskSpec, int], None, None]: validation_params = models.DataRequestValidationParams._from_openapi_data( mode="gt_pool", frame_selection_method="random_uniform", - random_seed=42, + random_seed=random_seed, frame_count=5, frames_per_job_count=2, ) @@ -2782,6 +2791,44 @@ def fxt_uploaded_images_task_with_honeypots_and_segments_start_step( request, start_frame=start_frame, step=step ) + @fixture(scope="class") + @parametrize("random_seed", [1, 2, 5]) + def fxt_uploaded_images_task_with_honeypots_and_changed_real_frames( + self, request: pytest.FixtureRequest, random_seed: int + ) -> Generator[Tuple[_TaskSpec, int], None, None]: + with closing( + self._uploaded_images_task_with_honeypots_and_segments_base( + request, start_frame=2, step=3, random_seed=random_seed + ) + ) as gen_iter: + task_spec, task_id = next(gen_iter) + + with make_api_client(self._USERNAME) as api_client: + validation_layout, _ = api_client.tasks_api.retrieve_validation_layout(task_id) + validation_frames = validation_layout.validation_frames + + new_honeypot_real_frames = [ + validation_frames[(validation_frames.index(f) + 1) % len(validation_frames)] + for f in validation_layout.honeypot_real_frames + ] + api_client.tasks_api.partial_update_validation_layout( + task_id, + patched_task_validation_layout_write_request=( + models.PatchedTaskValidationLayoutWriteRequest( + frame_selection_method="manual", + honeypot_real_frames=new_honeypot_real_frames, + ) + ), + ) + + # Get the new frame order + frame_map = dict(zip(validation_layout.honeypot_frames, new_honeypot_real_frames)) + + _get_frame = task_spec._get_frame + task_spec._get_frame = lambda i: _get_frame(frame_map.get(i, i)) + + yield task_spec, task_id + def _uploaded_images_task_with_gt_and_segments_base( self, request: pytest.FixtureRequest, @@ -3000,6 +3047,11 @@ def _get_job_abs_frame_set(self, job_meta: models.DataMetaRead) -> Sequence[int] _tasks_with_honeypots_cases = [ fixture_ref("fxt_uploaded_images_task_with_honeypots_and_segments"), fixture_ref("fxt_uploaded_images_task_with_honeypots_and_segments_start_step"), + fixture_ref("fxt_uploaded_images_task_with_honeypots_and_changed_real_frames"), + ] + + _tasks_with_simple_gt_job_cases = [ + fixture_ref("fxt_uploaded_images_task_with_gt_and_segments_start_step") ] _tasks_with_simple_gt_job_cases = [ @@ -3706,6 +3758,7 @@ def test_work_with_task_containing_non_stable_cloud_storage_files( @pytest.mark.usefixtures("restore_redis_inmem_per_function") @pytest.mark.usefixtures("restore_redis_ondisk_per_class") +@pytest.mark.usefixtures("restore_redis_ondisk_after_class") class TestTaskBackups: @pytest.fixture(autouse=True) def setup( @@ -4140,23 +4193,42 @@ def test_placeholder_frames_are_not_present_in_task_annotation_export( assert False @parametrize("task, gt_job, annotation_jobs", [fixture_ref(fxt_task_with_honeypots)]) - def test_can_exclude_and_restore_gt_frames_via_job_meta( - self, admin_user, task, gt_job, annotation_jobs + @parametrize("method", ["gt_job_meta", "task_validation_layout"]) + def test_can_exclude_and_restore_gt_frames( + self, admin_user, task, gt_job, annotation_jobs, method: str ): with make_api_client(admin_user) as api_client: task_meta, _ = api_client.tasks_api.retrieve_data_meta(task["id"]) task_frames = [f.name for f in task_meta.frames] for deleted_gt_frames in [ - [v] for v in range(gt_job["start_frame"], gt_job["stop_frame"] + 1) + [v] for v in range(gt_job["start_frame"], gt_job["stop_frame"] + 1)[:2] ] + [[]]: - updated_gt_job_meta, _ = api_client.jobs_api.partial_update_data_meta( - gt_job["id"], - patched_job_data_meta_write_request=models.PatchedJobDataMetaWriteRequest( - deleted_frames=deleted_gt_frames - ), + if method == "gt_job_meta": + api_client.jobs_api.partial_update_data_meta( + gt_job["id"], + patched_job_data_meta_write_request=models.PatchedJobDataMetaWriteRequest( + deleted_frames=deleted_gt_frames + ), + ) + elif method == "task_validation_layout": + api_client.tasks_api.partial_update_validation_layout( + task["id"], + patched_task_validation_layout_write_request=( + models.PatchedTaskValidationLayoutWriteRequest( + disabled_frames=deleted_gt_frames + ) + ), + ) + else: + assert False + + updated_validation_layout, _ = api_client.tasks_api.retrieve_validation_layout( + task["id"] ) + assert updated_validation_layout.disabled_frames == deleted_gt_frames + updated_gt_job_meta, _ = api_client.jobs_api.retrieve_data_meta(gt_job["id"]) assert updated_gt_job_meta.deleted_frames == deleted_gt_frames # the excluded GT frames must be excluded from all the jobs with the same frame @@ -4168,12 +4240,14 @@ def test_can_exclude_and_restore_gt_frames_via_job_meta( ) for j in annotation_jobs: - updated_job_meta, _ = api_client.jobs_api.retrieve_data_meta(j["id"]) - assert [ + deleted_job_frames = [ i for i in updated_task_meta.deleted_frames if j["start_frame"] <= i <= j["stop_frame"] - ] == updated_job_meta.deleted_frames + ] + + updated_job_meta, _ = api_client.jobs_api.retrieve_data_meta(j["id"]) + assert deleted_job_frames == updated_job_meta.deleted_frames @parametrize("task, gt_job, annotation_jobs", [fixture_ref(fxt_task_with_honeypots)]) def test_can_delete_honeypot_frames_by_changing_job_meta_in_annotation_job( @@ -4242,6 +4316,192 @@ def test_can_restore_gt_frames_via_task_meta_only_if_all_frames_are_restored( ) assert updated_task_meta.deleted_frames == [] + @parametrize("task, gt_job, annotation_jobs", [fixture_ref(fxt_task_with_honeypots)]) + @parametrize("frame_selection_method", ["manual", "random_uniform"]) + def test_can_change_honeypot_frames_in_task( + self, admin_user, task, gt_job, annotation_jobs, frame_selection_method: str + ): + assert gt_job["stop_frame"] - gt_job["start_frame"] + 1 >= 2 + + with make_api_client(admin_user) as api_client: + gt_frame_set = range(gt_job["start_frame"], gt_job["stop_frame"] + 1) + old_validation_layout = json.loads( + api_client.tasks_api.retrieve_validation_layout(task["id"])[1].data + ) + + params = {"frame_selection_method": frame_selection_method} + + if frame_selection_method == "manual": + requested_honeypot_real_frames = [ + gt_frame_set[(old_real_frame + 1) % len(gt_frame_set)] + for old_real_frame in old_validation_layout["honeypot_real_frames"] + ] + + params["honeypot_real_frames"] = requested_honeypot_real_frames + + new_validation_layout = json.loads( + api_client.tasks_api.partial_update_validation_layout( + task["id"], + patched_task_validation_layout_write_request=( + models.PatchedTaskValidationLayoutWriteRequest(**params) + ), + )[1].data + ) + + new_honeypot_real_frames = new_validation_layout["honeypot_real_frames"] + + assert old_validation_layout["honeypot_count"] == len(new_honeypot_real_frames) + assert all(f in gt_frame_set for f in new_honeypot_real_frames) + + if frame_selection_method == "manual": + assert new_honeypot_real_frames == requested_honeypot_real_frames + + assert ( + DeepDiff( + old_validation_layout, + new_validation_layout, + exclude_regex_paths=[r"root\['honeypot_real_frames'\]\[\d+\]"], + ) + == {} + ) + + @parametrize("task, gt_job, annotation_jobs", [fixture_ref(fxt_task_with_honeypots)]) + @parametrize("frame_selection_method", ["manual", "random_uniform"]) + def test_can_change_honeypot_frames_in_task_can_only_select_from_active_validation_frames( + self, admin_user, task, gt_job, annotation_jobs, frame_selection_method: str + ): + assert gt_job["stop_frame"] - gt_job["start_frame"] + 1 >= 2 + + with make_api_client(admin_user) as api_client: + old_validation_layout = json.loads( + api_client.tasks_api.retrieve_validation_layout(task["id"])[1].data + ) + + honeypots_per_job = old_validation_layout["frames_per_job_count"] + + gt_frame_set = range(gt_job["start_frame"], gt_job["stop_frame"] + 1) + active_gt_set = gt_frame_set[:honeypots_per_job] + + api_client.jobs_api.partial_update_data_meta( + gt_job["id"], + patched_job_data_meta_write_request=models.PatchedJobDataMetaWriteRequest( + deleted_frames=[f for f in gt_frame_set if f not in active_gt_set] + ), + ) + + params = {"frame_selection_method": frame_selection_method} + + if frame_selection_method == "manual": + requested_honeypot_real_frames = [ + active_gt_set[(old_real_frame + 1) % len(active_gt_set)] + for old_real_frame in old_validation_layout["honeypot_real_frames"] + ] + + params["honeypot_real_frames"] = requested_honeypot_real_frames + + _, response = api_client.tasks_api.partial_update_validation_layout( + task["id"], + patched_task_validation_layout_write_request=( + models.PatchedTaskValidationLayoutWriteRequest( + frame_selection_method="manual", + honeypot_real_frames=[ + next(f for f in gt_frame_set if f not in active_gt_set) + ] + * old_validation_layout["honeypot_count"], + ) + ), + _parse_response=False, + _check_status=False, + ) + assert response.status == HTTPStatus.BAD_REQUEST + assert b"are disabled. Restore them" in response.data + + new_validation_layout = json.loads( + api_client.tasks_api.partial_update_validation_layout( + task["id"], + patched_task_validation_layout_write_request=( + models.PatchedTaskValidationLayoutWriteRequest(**params) + ), + )[1].data + ) + + new_honeypot_real_frames = new_validation_layout["honeypot_real_frames"] + + assert old_validation_layout["honeypot_count"] == len(new_honeypot_real_frames) + assert all(f in active_gt_set for f in new_honeypot_real_frames) + + if frame_selection_method == "manual": + assert new_honeypot_real_frames == requested_honeypot_real_frames + else: + assert all( + [ + honeypots_per_job + == len( + set( + new_honeypot_real_frames[ + j * honeypots_per_job : (j + 1) * honeypots_per_job + ] + ) + ) + for j in range(len(annotation_jobs)) + ] + ), new_honeypot_real_frames + + @parametrize("task, gt_job, annotation_jobs", [fixture_ref(fxt_task_with_honeypots)]) + @parametrize("frame_selection_method", ["manual", "random_uniform"]) + def test_can_change_honeypot_frames_in_annotation_jobs( + self, admin_user, task, gt_job, annotation_jobs, frame_selection_method: str + ): + assert gt_job["stop_frame"] - gt_job["start_frame"] + 1 >= 2 + + with make_api_client(admin_user) as api_client: + gt_frame_set = range(gt_job["start_frame"], gt_job["stop_frame"] + 1) + + for annotation_job in annotation_jobs: + old_validation_layout = json.loads( + api_client.jobs_api.retrieve_validation_layout(annotation_job["id"])[1].data + ) + old_job_meta, _ = api_client.jobs_api.retrieve_data_meta(annotation_job["id"]) + + params = {"frame_selection_method": frame_selection_method} + + if frame_selection_method == "manual": + requested_honeypot_real_frames = [ + gt_frame_set[(gt_frame_set.index(old_real_frame) + 1) % len(gt_frame_set)] + for old_real_frame in old_validation_layout["honeypot_real_frames"] + ] + + params["honeypot_real_frames"] = requested_honeypot_real_frames + + new_validation_layout = json.loads( + api_client.jobs_api.partial_update_validation_layout( + annotation_job["id"], + patched_job_validation_layout_write_request=( + models.PatchedJobValidationLayoutWriteRequest(**params) + ), + )[1].data + ) + + new_honeypot_real_frames = new_validation_layout["honeypot_real_frames"] + + assert old_validation_layout["honeypot_count"] == len(new_honeypot_real_frames) + assert all(f in gt_frame_set for f in new_honeypot_real_frames) + + if frame_selection_method == "manual": + assert new_honeypot_real_frames == requested_honeypot_real_frames + + assert ( + DeepDiff( + old_validation_layout, + new_validation_layout, + exclude_regex_paths=[r"root\['honeypot_real_frames'\]\[\d+\]"], + ) + == {} + ) + + new_job_meta, _ = api_client.jobs_api.retrieve_data_meta(annotation_job["id"]) + assert new_job_meta.chunks_updated_date > old_job_meta.chunks_updated_date + @pytest.mark.usefixtures("restore_db_per_class") class TestGetTaskPreview: @@ -4308,6 +4568,7 @@ def test_task_unassigned_cannot_see_task_preview( @pytest.mark.usefixtures("restore_redis_ondisk_per_class") +@pytest.mark.usefixtures("restore_redis_ondisk_after_class") class TestUnequalJobs: @pytest.fixture(autouse=True) def setup(self, restore_db_per_function, tmp_path: Path, admin_user: str): diff --git a/tests/python/shared/assets/cvat_db/data.json b/tests/python/shared/assets/cvat_db/data.json index ee3850be4379..8a836d7d6958 100644 --- a/tests/python/shared/assets/cvat_db/data.json +++ b/tests/python/shared/assets/cvat_db/data.json @@ -5779,6 +5779,7 @@ "task": 2, "start_frame": 0, "stop_frame": 22, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5790,6 +5791,7 @@ "task": 5, "start_frame": 0, "stop_frame": 24, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5801,6 +5803,7 @@ "task": 6, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5812,6 +5815,7 @@ "task": 7, "start_frame": 0, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5823,6 +5827,7 @@ "task": 8, "start_frame": 0, "stop_frame": 13, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5834,6 +5839,7 @@ "task": 9, "start_frame": 0, "stop_frame": 4, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5845,6 +5851,7 @@ "task": 9, "start_frame": 5, "stop_frame": 9, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5856,6 +5863,7 @@ "task": 9, "start_frame": 10, "stop_frame": 14, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5867,6 +5875,7 @@ "task": 9, "start_frame": 15, "stop_frame": 19, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5878,6 +5887,7 @@ "task": 11, "start_frame": 0, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5889,6 +5899,7 @@ "task": 13, "start_frame": 0, "stop_frame": 4, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5900,6 +5911,7 @@ "task": 14, "start_frame": 0, "stop_frame": 7, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5911,6 +5923,7 @@ "task": 15, "start_frame": 0, "stop_frame": 24, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5922,6 +5935,7 @@ "task": 17, "start_frame": 0, "stop_frame": 4, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5933,6 +5947,7 @@ "task": 18, "start_frame": 0, "stop_frame": 1, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5944,6 +5959,7 @@ "task": 19, "start_frame": 0, "stop_frame": 1, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5955,6 +5971,7 @@ "task": 20, "start_frame": 0, "stop_frame": 1, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5966,6 +5983,7 @@ "task": 21, "start_frame": 0, "stop_frame": 5, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5977,6 +5995,7 @@ "task": 21, "start_frame": 6, "stop_frame": 9, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5988,6 +6007,7 @@ "task": 22, "start_frame": 0, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -5999,6 +6019,7 @@ "task": 22, "start_frame": 0, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "specific_frames", "frames": "0,1,2" } @@ -6010,6 +6031,7 @@ "task": 23, "start_frame": 0, "stop_frame": 4, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6021,6 +6043,7 @@ "task": 23, "start_frame": 5, "stop_frame": 9, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6032,6 +6055,7 @@ "task": 23, "start_frame": 10, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6043,6 +6067,7 @@ "task": 23, "start_frame": 0, "stop_frame": 10, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "specific_frames", "frames": "4,5,7" } @@ -6054,6 +6079,7 @@ "task": 24, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6065,6 +6091,7 @@ "task": 25, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6076,6 +6103,7 @@ "task": 26, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6087,6 +6115,7 @@ "task": 27, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6098,6 +6127,7 @@ "task": 28, "start_frame": 0, "stop_frame": 0, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6109,6 +6139,7 @@ "task": 29, "start_frame": 0, "stop_frame": 7, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6120,6 +6151,7 @@ "task": 29, "start_frame": 8, "stop_frame": 15, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6131,6 +6163,7 @@ "task": 29, "start_frame": 16, "stop_frame": 22, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } @@ -6142,6 +6175,55 @@ "task": 29, "start_frame": 23, "stop_frame": 28, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", + "type": "range", + "frames": "[]" + } +}, +{ + "model": "engine.segment", + "pk": 38, + "fields": { + "task": 29, + "start_frame": 0, + "stop_frame": 7, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", + "type": "range", + "frames": "[]" + } +}, +{ + "model": "engine.segment", + "pk": 39, + "fields": { + "task": 29, + "start_frame": 8, + "stop_frame": 15, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", + "type": "range", + "frames": "[]" + } +}, +{ + "model": "engine.segment", + "pk": 40, + "fields": { + "task": 29, + "start_frame": 16, + "stop_frame": 22, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", + "type": "range", + "frames": "[]" + } +}, +{ + "model": "engine.segment", + "pk": 41, + "fields": { + "task": 29, + "start_frame": 23, + "stop_frame": 28, + "chunks_updated_date": "2024-10-02T08:13:16.623Z", "type": "range", "frames": "[]" } diff --git a/tests/python/shared/fixtures/init.py b/tests/python/shared/fixtures/init.py index 98388c0d0cda..84e18110b0d6 100644 --- a/tests/python/shared/fixtures/init.py +++ b/tests/python/shared/fixtures/init.py @@ -634,3 +634,14 @@ def restore_redis_ondisk_per_class(request): docker_restore_redis_ondisk() else: kube_restore_redis_ondisk() + + +@pytest.fixture(scope="class") +def restore_redis_ondisk_after_class(request): + yield + + platform = request.config.getoption("--platform") + if platform == "local": + docker_restore_redis_ondisk() + else: + kube_restore_redis_ondisk()