Skip to content

Commit

Permalink
done setup for data validation
Browse files Browse the repository at this point in the history
  • Loading branch information
natek-1 committed Jul 27, 2024
1 parent f8edb52 commit 4bd4bf8
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 4 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ boto3==1.24.65
pyyaml==5.3.1
from-root==1.0.2
awscli==1.25.70
evidently==0.4.16
-e .
20 changes: 20 additions & 0 deletions src/forestCover/components/data_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
import sys
import json

import pandas as pd
from evidently.model_profile import Profile
from evidently.model_profile.sections import DataDriftProfileSection



from forestCover.entity.config_entity import DataValidationConfig
from forestCover.entity.artifacts_entity import DataValidationArtifact
from forestCover.constants import ARTIFACT_DIR, DATA_VALIDATION_DIR_NAME, DATA_VALIDATION_DIR_NAME_DRIFT_REPORT_NAME

from forestCover.utils.common import read_yaml
from forestCover.logger import logging
from forestCover.exception import CustomException

class DataValidation:
pass
24 changes: 23 additions & 1 deletion src/forestCover/constants/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
import os
from from_root import from_root
from pathlib import Path

## file names

SCHEMA_FILE_PATH = "config/schema.yaml"
FILE_NAME: str = "covertype.csv"
TRAIN_FILE_NAME: str = "train.csv"
TEST_FILE_NAME: str = "test.csv"
SCHEMA_FILE_PATH: Path = os.path.join("config", "schema.yaml")


ARTIFACT_DIR = os.path.join(from_root(), "artifacts")

## Data Ingestion

DATA_INGESTION_DIR: str = "DataIngestion"
DATA_INGESTION_FEATURE_STORE: str = "feature_store"
DATA_INGESTION_TRAIN_TEST_SPLIT: float = 0.2


## Data Validation

DATA_VALIDATION_DIR_NAME: str = "data_validation"
DATA_VALIDATION_DIR_NAME_DRIFT_REPORT_NAME: str = "report.yaml"
9 changes: 8 additions & 1 deletion src/forestCover/entity/artifacts_entity.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from dataclasses import dataclass
from pathlib import Path


@dataclass
class DataIngestionArtifact:
trained_file_path:str
test_file_path:str
test_file_path:str

@dataclass
class DataValidationArtifact:
validation_status: bool
message: str
drift_report_file_path: Path
11 changes: 9 additions & 2 deletions src/forestCover/entity/config_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from from_root import from_root
import os
from pathlib import Path
from forestCover.config import (ARTIFACT_DIR, DATA_INGESTION_DIR, DATA_INGESTION_FEATURE_STORE, TRAIN_FILE_NAME,
TEST_FILE_NAME, DATA_INGESTION_TRAIN_TEST_SPLIT)
from forestCover.constants import (ARTIFACT_DIR, DATA_INGESTION_DIR, DATA_INGESTION_FEATURE_STORE, TRAIN_FILE_NAME,
TEST_FILE_NAME, DATA_INGESTION_TRAIN_TEST_SPLIT, DATA_VALIDATION_DIR_NAME,
DATA_VALIDATION_DIR_NAME_DRIFT_REPORT_NAME)


@dataclass
Expand All @@ -13,3 +14,9 @@ class DataIngestionConfig:
train_file_path: Path = os.path.join(data_ingestion_dir, TRAIN_FILE_NAME)
test_file_path: Path = os.path.join(data_ingestion_dir, TEST_FILE_NAME)
train_test_ratio: float = DATA_INGESTION_TRAIN_TEST_SPLIT

@dataclass
class DataValidationConfig:
data_validation_dir: Path = os.path.join(from_root(), DATA_VALIDATION_DIR_NAME)
data_validation_drift_file: Path = os.path.join(data_validation_dir, DATA_VALIDATION_DIR_NAME_DRIFT_REPORT_NAME)

0 comments on commit 4bd4bf8

Please sign in to comment.