ML for Developers
This commit is contained in:
22
deploy/cluster_compute.yaml
Normal file
22
deploy/cluster_compute.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
cloud: madewithml-us-east-2
|
||||
region: us-east2
|
||||
head_node_type:
|
||||
name: head_node_type
|
||||
instance_type: m5.2xlarge # 8 CPU, 0 GPU, 32 GB RAM
|
||||
worker_node_types:
|
||||
- name: gpu_worker
|
||||
instance_type: g4dn.xlarge # 4 CPU, 1 GPU, 16 GB RAM
|
||||
min_workers: 0
|
||||
max_workers: 1
|
||||
use_spot: False
|
||||
aws:
|
||||
BlockDeviceMappings:
|
||||
- DeviceName: "/dev/sda1"
|
||||
Ebs:
|
||||
VolumeSize: 500
|
||||
DeleteOnTermination: true
|
||||
TagSpecifications:
|
||||
- ResourceType: instance
|
||||
Tags:
|
||||
- Key: as-feature-multi-zone
|
||||
Value: "true"
|
||||
12
deploy/cluster_env.yaml
Normal file
12
deploy/cluster_env.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
base_image: anyscale/ray:2.6.0-py310-cu118
|
||||
env_vars: {}
|
||||
debian_packages:
|
||||
- curl
|
||||
|
||||
python:
|
||||
pip_packages: []
|
||||
conda_packages: []
|
||||
|
||||
post_build_cmds:
|
||||
- python3 -m pip install --upgrade pip setuptools wheel
|
||||
- python3 -m pip install -r https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/requirements.txt
|
||||
54
deploy/jobs/workloads.sh
Normal file
54
deploy/jobs/workloads.sh
Normal file
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
export PYTHONPATH=$PYTHONPATH:$PWD
|
||||
export RAY_AIR_REENABLE_DEPRECATED_SYNC_TO_HEAD_NODE=1
|
||||
mkdir results
|
||||
|
||||
# Test data
|
||||
export RESULTS_FILE=results/test_data_results.txt
|
||||
export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv"
|
||||
pytest --dataset-loc=$DATASET_LOC tests/data --verbose --disable-warnings > $RESULTS_FILE
|
||||
cat $RESULTS_FILE
|
||||
|
||||
# Test code
|
||||
export RESULTS_FILE=results/test_code_results.txt
|
||||
python -m pytest tests/code --verbose --disable-warnings > $RESULTS_FILE
|
||||
cat $RESULTS_FILE
|
||||
|
||||
# Train
|
||||
export EXPERIMENT_NAME="llm"
|
||||
export RESULTS_FILE=results/training_results.json
|
||||
export DATASET_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/dataset.csv"
|
||||
export TRAIN_LOOP_CONFIG='{"dropout_p": 0.5, "lr": 1e-4, "lr_factor": 0.8, "lr_patience": 3}'
|
||||
python madewithml/train.py \
|
||||
--experiment-name "$EXPERIMENT_NAME" \
|
||||
--dataset-loc "$DATASET_LOC" \
|
||||
--train-loop-config "$TRAIN_LOOP_CONFIG" \
|
||||
--num-workers 1 \
|
||||
--cpu-per-worker 10 \
|
||||
--gpu-per-worker 1 \
|
||||
--num-epochs 10 \
|
||||
--batch-size 256 \
|
||||
--results-fp $RESULTS_FILE
|
||||
|
||||
# Get and save run ID
|
||||
export RUN_ID=$(python -c "import os; from madewithml import utils; d = utils.load_dict(os.getenv('RESULTS_FILE')); print(d['run_id'])")
|
||||
export RUN_ID_FILE=results/run_id.txt
|
||||
echo $RUN_ID > $RUN_ID_FILE # used for serving later
|
||||
|
||||
# Evaluate
|
||||
export RESULTS_FILE=results/evaluation_results.json
|
||||
export HOLDOUT_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/holdout.csv"
|
||||
python madewithml/evaluate.py \
|
||||
--run-id $RUN_ID \
|
||||
--dataset-loc $HOLDOUT_LOC \
|
||||
--results-fp $RESULTS_FILE
|
||||
|
||||
# Test model
|
||||
RESULTS_FILE=results/test_model_results.txt
|
||||
pytest --run-id=$RUN_ID tests/model --verbose --disable-warnings > $RESULTS_FILE
|
||||
cat $RESULTS_FILE
|
||||
|
||||
# Save to S3
|
||||
export MODEL_REGISTRY=$(python -c "from madewithml import config; print(config.MODEL_REGISTRY)")
|
||||
aws s3 cp $MODEL_REGISTRY s3://madewithml/$GITHUB_USERNAME/mlflow/ --recursive
|
||||
aws s3 cp results/ s3://madewithml/$GITHUB_USERNAME/results/ --recursive
|
||||
11
deploy/jobs/workloads.yaml
Normal file
11
deploy/jobs/workloads.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
name: workloads
|
||||
project_id: prj_v9izs5t1d6b512ism8c5rkq4wm
|
||||
cluster_env: madewithml-cluster-env
|
||||
compute_config: madewithml-cluster-compute
|
||||
runtime_env:
|
||||
working_dir: .
|
||||
upload_path: s3://madewithml/GokuMohandas/jobs # <--- CHANGE USERNAME (case-sensitive)
|
||||
env_vars:
|
||||
GITHUB_USERNAME: GokuMohandas # <--- CHANGE USERNAME (case-sensitive)
|
||||
entrypoint: bash deploy/jobs/workloads.sh
|
||||
max_retries: 0
|
||||
17
deploy/services/serve_model.py
Normal file
17
deploy/services/serve_model.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
sys.path.append(".")
|
||||
|
||||
from madewithml.config import MODEL_REGISTRY # NOQA: E402
|
||||
from madewithml.serve import ModelDeployment # NOQA: E402
|
||||
|
||||
# Copy from S3
|
||||
github_username = os.environ.get("GITHUB_USERNAME")
|
||||
subprocess.check_output(["aws", "s3", "cp", f"s3://madewithml/{github_username}/mlflow/", str(MODEL_REGISTRY), "--recursive"])
|
||||
subprocess.check_output(["aws", "s3", "cp", f"s3://madewithml/{github_username}/results/", "./", "--recursive"])
|
||||
|
||||
# Entrypoint
|
||||
run_id = [line.strip() for line in open("run_id.txt")][0]
|
||||
entrypoint = ModelDeployment.bind(run_id=run_id, threshold=0.9)
|
||||
12
deploy/services/serve_model.yaml
Normal file
12
deploy/services/serve_model.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
name: madewithml
|
||||
project_id: prj_v9izs5t1d6b512ism8c5rkq4wm
|
||||
cluster_env: madewithml-cluster-env
|
||||
compute_config: madewithml-cluster-compute
|
||||
ray_serve_config:
|
||||
import_path: deploy.services.serve_model:entrypoint
|
||||
runtime_env:
|
||||
working_dir: .
|
||||
upload_path: s3://madewithml/GokuMohandas/services # <--- CHANGE USERNAME (case-sensitive)
|
||||
env_vars:
|
||||
GITHUB_USERNAME: GokuMohandas # <--- CHANGE USERNAME (case-sensitive)
|
||||
rollout_strategy: ROLLOUT # ROLLOUT or IN_PLACE
|
||||
Reference in New Issue
Block a user