Merge pull request #248 from GokuMohandas/dev
updated cluster env and local catch for efs
This commit is contained in:
commit
66e8e43711
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,6 +4,7 @@ stores/
|
|||||||
mlflow/
|
mlflow/
|
||||||
results/
|
results/
|
||||||
workspaces/
|
workspaces/
|
||||||
|
efs/
|
||||||
|
|
||||||
# VSCode
|
# VSCode
|
||||||
.vscode/
|
.vscode/
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# See https://pre-commit.com/hooks.html for more hooks
|
# See https://pre-commit.com/hooks.html for more hooks
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v4.4.0
|
rev: v4.5.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
|
1
Makefile
1
Makefile
@ -12,6 +12,7 @@ style:
|
|||||||
# Cleaning
|
# Cleaning
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: style
|
clean: style
|
||||||
|
python notebooks/clear_cell_nums.py
|
||||||
find . -type f -name "*.DS_Store" -ls -delete
|
find . -type f -name "*.DS_Store" -ls -delete
|
||||||
find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
|
find . | grep -E "(__pycache__|\.pyc|\.pyo)" | xargs rm -rf
|
||||||
find . | grep -E ".pytest_cache" | xargs rm -rf
|
find . | grep -E ".pytest_cache" | xargs rm -rf
|
||||||
|
@ -83,7 +83,7 @@ We'll start by setting up our cluster with the environment and compute configura
|
|||||||
- Project: `madewithml`
|
- Project: `madewithml`
|
||||||
- Cluster environment name: `madewithml-cluster-env`
|
- Cluster environment name: `madewithml-cluster-env`
|
||||||
# Toggle `Select from saved configurations`
|
# Toggle `Select from saved configurations`
|
||||||
- Compute config: `madewithml-cluster-compute`
|
- Compute config: `madewithml-cluster-compute-g5.4xlarge`
|
||||||
```
|
```
|
||||||
|
|
||||||
> Alternatively, we can use the [CLI](https://docs.anyscale.com/reference/anyscale-cli) to create the workspace via `anyscale workspace create ...`
|
> Alternatively, we can use the [CLI](https://docs.anyscale.com/reference/anyscale-cli) to create the workspace via `anyscale workspace create ...`
|
||||||
@ -423,7 +423,7 @@ anyscale cluster-env build deploy/cluster_env.yaml --name $CLUSTER_ENV_NAME
|
|||||||
The compute configuration determines **what** resources our workloads will be executes on. We've already created this [compute configuration](./deploy/cluster_compute.yaml) for us but this is how we can create it ourselves.
|
The compute configuration determines **what** resources our workloads will be executes on. We've already created this [compute configuration](./deploy/cluster_compute.yaml) for us but this is how we can create it ourselves.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute"
|
export CLUSTER_COMPUTE_NAME="madewithml-cluster-compute-g5.4xlarge"
|
||||||
anyscale cluster-compute create deploy/cluster_compute.yaml --name $CLUSTER_COMPUTE_NAME
|
anyscale cluster-compute create deploy/cluster_compute.yaml --name $CLUSTER_COMPUTE_NAME
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
cloud: madewithml-us-east-2
|
cloud: education-us-west-2
|
||||||
region: us-east2
|
region: us-west-2
|
||||||
head_node_type:
|
head_node_type:
|
||||||
name: head_node_type
|
name: head_node_type
|
||||||
instance_type: m5.2xlarge # 8 CPU, 0 GPU, 32 GB RAM
|
instance_type: g5.4xlarge
|
||||||
worker_node_types:
|
worker_node_types:
|
||||||
- name: gpu_worker
|
- name: gpu_worker
|
||||||
instance_type: g4dn.xlarge # 4 CPU, 1 GPU, 16 GB RAM
|
instance_type: g5.4xlarge
|
||||||
min_workers: 0
|
min_workers: 1
|
||||||
max_workers: 1
|
max_workers: 1
|
||||||
use_spot: False
|
use_spot: False
|
||||||
aws:
|
aws:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
base_image: anyscale/ray:2.6.0-py310-cu118
|
base_image: anyscale/ray:2.7.0optimized-py310-cu118
|
||||||
env_vars: {}
|
env_vars: {}
|
||||||
debian_packages:
|
debian_packages:
|
||||||
- curl
|
- curl
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
export PYTHONPATH=$PYTHONPATH:$PWD
|
export PYTHONPATH=$PYTHONPATH:$PWD
|
||||||
export RAY_AIR_REENABLE_DEPRECATED_SYNC_TO_HEAD_NODE=1
|
|
||||||
mkdir results
|
mkdir results
|
||||||
|
|
||||||
# Test data
|
# Test data
|
||||||
|
@ -11,6 +11,11 @@ ROOT_DIR = Path(__file__).parent.parent.absolute()
|
|||||||
LOGS_DIR = Path(ROOT_DIR, "logs")
|
LOGS_DIR = Path(ROOT_DIR, "logs")
|
||||||
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
LOGS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}")
|
EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}")
|
||||||
|
try:
|
||||||
|
Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
|
except OSError:
|
||||||
|
EFS_DIR = Path(ROOT_DIR, "efs")
|
||||||
|
Path(EFS_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Config MLflow
|
# Config MLflow
|
||||||
MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow")
|
MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow")
|
||||||
|
@ -58,7 +58,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"id": "e2c96931-d511-4c6e-b582-87d24455a11e",
|
"id": "e2c96931-d511-4c6e-b582-87d24455a11e",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -79,7 +79,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"id": "953a577e-3cd0-4c6b-81f9-8bc32850214d",
|
"id": "953a577e-3cd0-4c6b-81f9-8bc32850214d",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -101,7 +101,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"id": "1790e2f5-6b8b-425c-8842-a2b0ea8f3f07",
|
"id": "1790e2f5-6b8b-425c-8842-a2b0ea8f3f07",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -113,7 +113,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"id": "6b9bfadb-ba49-4f5a-b216-4db14c8888ab",
|
"id": "6b9bfadb-ba49-4f5a-b216-4db14c8888ab",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -208,7 +208,7 @@
|
|||||||
"4 A PyTorch Implementation of \"Watch Your Step: ... other "
|
"4 A PyTorch Implementation of \"Watch Your Step: ... other "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 4,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -222,7 +222,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"id": "aa5b95d5-d61e-48e4-9100-d9d2fc0d53fa",
|
"id": "aa5b95d5-d61e-48e4-9100-d9d2fc0d53fa",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -234,7 +234,7 @@
|
|||||||
"['computer-vision', 'other', 'natural-language-processing', 'mlops']"
|
"['computer-vision', 'other', 'natural-language-processing', 'mlops']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -247,7 +247,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"id": "3c828129-8248-4e38-93a4-cabb097e7ba5",
|
"id": "3c828129-8248-4e38-93a4-cabb097e7ba5",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -279,7 +279,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": null,
|
||||||
"id": "8e3c3f44-2c19-4c32-9bc5-e9a7a917d19d",
|
"id": "8e3c3f44-2c19-4c32-9bc5-e9a7a917d19d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -295,7 +295,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": null,
|
||||||
"id": "4950bdb4",
|
"id": "4950bdb4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -337,7 +337,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": null,
|
||||||
"id": "b2aae14c-9870-4a27-b5ad-90f339686620",
|
"id": "b2aae14c-9870-4a27-b5ad-90f339686620",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -364,7 +364,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": null,
|
||||||
"id": "03ee23e5",
|
"id": "03ee23e5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -401,7 +401,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"id": "71c43e8c",
|
"id": "71c43e8c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -416,7 +416,7 @@
|
|||||||
" 'description': 'A PyTorch implementation of \"Capsule Graph Neural Network\" (ICLR 2019).'}]"
|
" 'description': 'A PyTorch implementation of \"Capsule Graph Neural Network\" (ICLR 2019).'}]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -429,7 +429,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": null,
|
||||||
"id": "c9359a91-ac19-48a4-babb-e65d53f39b42",
|
"id": "c9359a91-ac19-48a4-babb-e65d53f39b42",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -462,7 +462,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"id": "5fac795e",
|
"id": "5fac795e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -486,7 +486,7 @@
|
|||||||
"['other', 'computer-vision', 'computer-vision']"
|
"['other', 'computer-vision', 'computer-vision']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -507,7 +507,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": null,
|
||||||
"id": "e4cb38a8-44cb-4cea-828c-590f223d4063",
|
"id": "e4cb38a8-44cb-4cea-828c-590f223d4063",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -543,7 +543,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": null,
|
||||||
"id": "de2d0416",
|
"id": "de2d0416",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -576,7 +576,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": null,
|
||||||
"id": "ff3c37fb",
|
"id": "ff3c37fb",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -618,7 +618,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"id": "972fee2f-86e2-445e-92d0-923f5690132a",
|
"id": "972fee2f-86e2-445e-92d0-923f5690132a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -647,7 +647,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": null,
|
||||||
"id": "9ee4e745-ef56-4b76-8230-fcbe56ac46aa",
|
"id": "9ee4e745-ef56-4b76-8230-fcbe56ac46aa",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -663,7 +663,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": null,
|
||||||
"id": "73780054-afeb-4ce6-8255-51bf91f9f820",
|
"id": "73780054-afeb-4ce6-8255-51bf91f9f820",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -709,7 +709,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": null,
|
||||||
"id": "24af6d04-d29e-4adb-a289-4c34c2cc7ec8",
|
"id": "24af6d04-d29e-4adb-a289-4c34c2cc7ec8",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -780,7 +780,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": null,
|
||||||
"id": "e22ed1e1-b34d-43d1-ae8b-32b1fd5be53d",
|
"id": "e22ed1e1-b34d-43d1-ae8b-32b1fd5be53d",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -815,7 +815,7 @@
|
|||||||
" 'tag': 'mlops'}]"
|
" 'tag': 'mlops'}]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 22,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -833,7 +833,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": null,
|
||||||
"id": "294548a5-9edf-4dea-ab8d-dc7464246810",
|
"id": "294548a5-9edf-4dea-ab8d-dc7464246810",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -864,7 +864,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": null,
|
||||||
"id": "29bca273-3ea8-4ce0-9fa9-fe19062b7c5b",
|
"id": "29bca273-3ea8-4ce0-9fa9-fe19062b7c5b",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -917,7 +917,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": null,
|
||||||
"id": "3e59a3b9-69d9-4bb5-8b88-0569fcc72f0c",
|
"id": "3e59a3b9-69d9-4bb5-8b88-0569fcc72f0c",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -1001,7 +1001,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": null,
|
||||||
"id": "15ea136e",
|
"id": "15ea136e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -1020,7 +1020,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": null,
|
||||||
"id": "ec0b498a-97c1-488c-a6b9-dc63a8a9df4d",
|
"id": "ec0b498a-97c1-488c-a6b9-dc63a8a9df4d",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -1065,7 +1065,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": null,
|
||||||
"id": "4cc80311",
|
"id": "4cc80311",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -1080,7 +1080,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": null,
|
||||||
"id": "6771b1d2",
|
"id": "6771b1d2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
23
notebooks/clear_cell_nums.py
Normal file
23
notebooks/clear_cell_nums.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import nbformat
|
||||||
|
|
||||||
|
|
||||||
|
def clear_execution_numbers(nb_path):
|
||||||
|
with open(nb_path, "r", encoding="utf-8") as f:
|
||||||
|
nb = nbformat.read(f, as_version=4)
|
||||||
|
for cell in nb["cells"]:
|
||||||
|
if cell["cell_type"] == "code":
|
||||||
|
cell["execution_count"] = None
|
||||||
|
for output in cell["outputs"]:
|
||||||
|
if "execution_count" in output:
|
||||||
|
output["execution_count"] = None
|
||||||
|
with open(nb_path, "w", encoding="utf-8") as f:
|
||||||
|
nbformat.write(nb, f)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
NOTEBOOK_DIR = Path(__file__).parent
|
||||||
|
notebook_fps = list(NOTEBOOK_DIR.glob("**/*.ipynb"))
|
||||||
|
for fp in notebook_fps:
|
||||||
|
clear_execution_numbers(fp)
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user