Skip to content

Commit

Permalink
parallel exec example with docker
Browse files Browse the repository at this point in the history
  • Loading branch information
shroominic committed Oct 30, 2024
1 parent d8e5d83 commit 67debe4
Showing 1 changed file with 84 additions and 0 deletions.
84 changes: 84 additions & 0 deletions examples/docker_parallel_execution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import asyncio
import time
from pathlib import Path

from codeboxapi import CodeBox


async def train_model(codebox: CodeBox, data_split: int) -> dict:
"""Train a model on a subset of data."""

file = Path("examples/assets/advertising.csv")
assert file.exists(), "Dataset file does not exist"

# Upload dataset
await codebox.aupload(file.name, file.read_bytes())

# Install required packages
await codebox.ainstall("pandas")
await codebox.ainstall("scikit-learn")

# Training code with different data splits
code = f"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Load and prepare data
data = pd.read_csv('advertising.csv')
X = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']
# Split with different random states for different data subsets
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state={data_split}
)
# Train model
model = LinearRegression()
model.fit(X_train, y_train)
# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Split {data_split}:")
print(f"MSE: {{mse:.4f}}")
print(f"R2: {{r2:.4f}}")
print(f"Coefficients: {{model.coef_.tolist()}}")
"""
result = await codebox.aexec(code)
return {"split": data_split, "output": result.text, "errors": result.errors}


async def main():
# Create multiple Docker instances
num_parallel = 4
codeboxes = [CodeBox(api_key="docker") for _ in range(num_parallel)]

# Create tasks for different data splits
tasks = []
for i, codebox in enumerate(codeboxes):
task = asyncio.create_task(train_model(codebox, i))
tasks.append(task)

# Execute and time the parallel processing
start_time = time.perf_counter()
results = await asyncio.gather(*tasks)
end_time = time.perf_counter()

# Print results
print(f"\nParallel execution completed in {end_time - start_time:.2f} seconds\n")
for result in results:
if not result["errors"]:
print(f"Results for {result['split']}:")
print(result["output"])
print("-" * 50)
else:
print(f"Error in split {result['split']}:", result["errors"])


if __name__ == "__main__":
asyncio.run(main())

0 comments on commit 67debe4

Please sign in to comment.