Skip to content
This repository has been archived by the owner on Jan 29, 2024. It is now read-only.

Commit

Permalink
implement a simple data splitter
Browse files Browse the repository at this point in the history
  • Loading branch information
wjayesh committed Mar 16, 2022
1 parent 46b1691 commit eea2f2a
Showing 1 changed file with 5 additions and 26 deletions.
31 changes: 5 additions & 26 deletions steps/splitter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import pandas as pd
from typing import Dict, List, Tuple

from datetime import date, timedelta
from zenml.steps.step_output import Output
from zenml.steps import step
from zenml.steps.base_step_config import BaseStepConfig
Expand All @@ -11,39 +9,20 @@ class TrainingSplitConfig(BaseStepConfig):
"""Split config for reference_data_splitter.
Attributes:
new_data_split_date: Date to split on.
start_reference_time_frame: Reference time to start from.
end_reference_time_frame: Reference time to end on.
columns: optional list of column names to use, empty means all.
row: the row number of the image to split the dataset on. Value has
to be less than 60,000.
"""
new_data_split_date: str
start_reference_time_frame: str
end_reference_time_frame: str
columns: List = []


row: int

@step
def reference_data_splitter(
dataset: pd.DataFrame, config: TrainingSplitConfig
) -> Output(before=pd.DataFrame, after=pd.DataFrame):
"""Splits data for drift detection."""
cols = config.columns if config.columns else dataset.columns
dataset["GAME_DATE"] = pd.to_datetime(dataset["GAME_DATE"])
dataset.set_index("GAME_DATE")

reference_dataset = dataset.loc[
dataset["GAME_DATE"].between(
config.start_reference_time_frame,
config.end_reference_time_frame,
inclusive=True,
)
][cols]

reference_dataset = dataset[1:config.row]
print(reference_dataset.shape[0])

new_data = dataset[dataset["GAME_DATE"] >= config.new_data_split_date][
cols
]
new_data = dataset[config.row:]

return reference_dataset, new_data

0 comments on commit eea2f2a

Please sign in to comment.