Skip to content

Commit

Permalink
setup file for data ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
natek-1 committed Jul 27, 2024
1 parent ac3e420 commit 0c42f00
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 0 deletions.
118 changes: 118 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
columns:
- Elevation: int
- Aspect: int
- Slope: int
- Horizontal_Distance_To_Hydrology: int
- Vertical_Distance_To_Hydrology: int
- Horizontal_Distance_To_Roadways: int
- Horizontal_Distance_To_Fire_Points: int
- Hillshade_9am: int
- Hillshade_Noon: int
- Hillshade_3pm: int
- Wilderness_Area1: int
- Wilderness_Area2: int
- Wilderness_Area3: int
- Wilderness_Area4: int
- Soil_Type1: int
- Soil_Type2: int
- Soil_Type3: int
- Soil_Type4: int
- Soil_Type5: int
- Soil_Type6: int
- Soil_Type8: int
- Soil_Type9: int
- Soil_Type10: int
- Soil_Type11: int
- Soil_Type12: int
- Soil_Type13: int
- Soil_Type14: int
- Soil_Type15: int
- Soil_Type16: int
- Soil_Type17: int
- Soil_Type18: int
- Soil_Type19: int
- Soil_Type20: int
- Soil_Type21: int
- Soil_Type22: int
- Soil_Type23: int
- Soil_Type24: int
- Soil_Type25: int
- Soil_Type26: int
- Soil_Type27: int
- Soil_Type28: int
- Soil_Type29: int
- Soil_Type30: int
- Soil_Type31: int
- Soil_Type32: int
- Soil_Type33: int
- Soil_Type34: int
- Soil_Type35: int
- Soil_Type36: int
- Soil_Type37: int
- Soil_Type38: int
- Soil_Type39: int
- Soil_Type40: int
- CoverType: category


numerical_columns:
- Elevation
- Aspect
- Slope
- Horizontal_Distance_To_Hydrology
- Vertical_Distance_To_Hydrology
- Horizontal_Distance_To_Roadways
- Horizontal_Distance_To_Fire_Points
- Hillshade_9am
- Hillshade_Noon
- Hillshade_3pm
- Wilderness_Area1
- Wilderness_Area2
- Wilderness_Area3
- Wilderness_Area4
- Soil_Type1
- Soil_Type2
- Soil_Type3
- Soil_Type4
- Soil_Type5
- Soil_Type6
- Soil_Type9
- Soil_Type10
- Soil_Type11
- Soil_Type12
- Soil_Type13
- Soil_Type14
- Soil_Type16
- Soil_Type17
- Soil_Type18
- Soil_Type19
- Soil_Type20
- Soil_Type21
- Soil_Type22
- Soil_Type23
- Soil_Type24
- Soil_Type25
- Soil_Type26
- Soil_Type27
- Soil_Type28
- Soil_Type29
- Soil_Type30
- Soil_Type31
- Soil_Type32
- Soil_Type33
- Soil_Type34
- Soil_Type35
- Soil_Type37
- Soil_Type38
- Soil_Type39
- Soil_Type40

categorical_columns:
- Cover_Type

drop_columns:
- Soil_Type7
- Soil_Type8
- Soil_Type15
- Soil_Type36

19 changes: 19 additions & 0 deletions src/forestCover/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
from from_root import from_root
from pathlib import Path

## file names

FILE_NAME: str = "covertype.csv"
TRAIN_FILE_NAME: str = "train.csv"
TEST_FILE_NAME: str = "test.csv"
SCHEMA_FILE_PATH: Path = os.path.join("config", "schema.yaml")


ARTIFACT_DIR = os.path.join(from_root(), "artifacts")

## Data Ingestion

DATA_INGESTION_DIR: str = "DataIngestion"
DATA_INGESTION_FEATURE_STORE: str = "feature_store"
DATA_INGESTION_TRAIN_TEST_SPLIT: float = 0.2
7 changes: 7 additions & 0 deletions src/forestCover/entity/artifacts_entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dataclasses import dataclass


@dataclass
class DataIngestionArtifact:
trained_file_path:str
test_file_path:str
15 changes: 15 additions & 0 deletions src/forestCover/entity/config_entity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dataclasses import dataclass
from from_root import from_root
import os
from pathlib import Path
from forestCover.constants import (ARTIFACT_DIR, DATA_INGESTION_DIR, DATA_INGESTION_FEATURE_STORE, TRAIN_FILE_NAME,
TEST_FILE_NAME, DATA_INGESTION_TRAIN_TEST_SPLIT)


@dataclass
class DataIngestionConfig:
data_ingestion_dir: Path = os.path.join(ARTIFACT_DIR, DATA_INGESTION_DIR)
feature_store_path: Path = os.path.join(data_ingestion_dir, DATA_INGESTION_FEATURE_STORE)
train_file_path: Path = os.path.join(data_ingestion_dir, TRAIN_FILE_NAME)
test_file_path: Path = os.path.join(data_ingestion_dir, TEST_FILE_NAME)
train_test_ratio: float = DATA_INGESTION_TRAIN_TEST_SPLIT

0 comments on commit 0c42f00

Please sign in to comment.