Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions examples/finetuning/finetuning_client.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Setting up the finetuning job"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"from bespokelabs import curator"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"finetuning_client = curator.Finetune(\n",
" backend = \"bespoke\",\n",
" backend_params = {\n",
" \"base_url\": 'https://api-dev.bespokelabs.ai'\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"dataset_id = \"41da7b9b3e384ae486a9945376d2fd9c\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"finetuning_client.create_job(\n",
" model_name=\"Qwen/Qwen2.5-7B-Instruct\",\n",
" job_name=\"bespoke-devrev-demo-live\",\n",
" dataset_id=dataset_id,\n",
" seed=42,\n",
" suffix=\"ft\",\n",
" method = {\n",
" \"type\": \"supervised\",\n",
" \"hyperparameters\": {\n",
" ## Type\n",
" \"finetuning_type\": \"lora\",\n",
" \"lora_rank\": 16,\n",
"\n",
" ## training dynamics\n",
" \"learning_rate\": 0.0001,\n",
" \"num_train_epochs\": 1,\n",
" \"per_device_train_batch_size\": 4,\n",
" \"gradient_accumulation_steps\": 1,\n",
"\n",
" ## infra\n",
" \"preprocessing_num_workers\": 32,\n",
" \"dataloader_num_workers\": 16,\n",
" \"logging_steps\": 1,\n",
" }\n",
" },\n",
" num_gpus=8\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Deploying the finetuned model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"models_client = curator.Models(\n",
" backend = \"bespoke\",\n",
" backend_params = {\n",
" \"base_url\": 'https://api-dev.bespokelabs.ai'\n",
" }\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"models_client.list_models(job_id='d3201c37255247c2825ff4ce52c110d6')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"models_client.deploy_model(model_id = '10426763-76be-483e-987b-07d2ee5adbd7')"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 3 additions & 1 deletion src/bespokelabs/curator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""BespokeLabs Curator."""

from .code_executor.code_executor import CodeExecutor
from .finetune.finetune import Finetune
from .llm.llm import LLM
from .models import Models
from .types import prompt as types
from .utils import load_dataset, push_to_viewer

__all__ = ["LLM", "CodeExecutor", "types", "push_to_viewer", "load_dataset"]
__all__ = ["LLM", "CodeExecutor", "types", "Finetune", "Models", "push_to_viewer", "load_dataset"]

from .log import _CONSOLE # noqa: F401
4 changes: 2 additions & 2 deletions src/bespokelabs/curator/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
BATCH_REQUEST_ID_TAG = "custom_id"
_CURATOR_DEFAULT_CACHE_DIR = "~/.cache/curator"
_DEFAULT_CACHE_DIR = "~/.cache"
BASE_CLIENT_URL = "https://api.bespokelabs.ai/v0/viewer"
PUBLIC_CURATOR_VIEWER_HOME_URL = "https://curator.bespokelabs.ai"
BASE_CLIENT_URL = "https://api-dev.bespokelabs.ai/v0/viewer"
PUBLIC_CURATOR_VIEWER_HOME_URL = "https://curator-dev.bespokelabs.ai"
PUBLIC_CURATOR_VIEWER_DATASET_URL = PUBLIC_CURATOR_VIEWER_HOME_URL + "/datasets"
_INTERNAL_PROMPT_KEY = "__internal_prompt"
_CACHE_MSG = (
Expand Down
5 changes: 5 additions & 0 deletions src/bespokelabs/curator/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Dataset loading and processing utilities."""

from .base import upload

__all__ = ["upload"]
54 changes: 54 additions & 0 deletions src/bespokelabs/curator/datasets/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# just upload files, a thin wrapper around push_to_viewer

import logging
from pathlib import Path

from datasets import load_dataset

from bespokelabs.curator.utils import push_to_viewer

logger = logging.getLogger(__name__)


def upload(path: str, split: str = "train"):
"""Uploads a dataset file to the Hugging Face Hub.

Args:
path: The local path to the dataset file.
split: The name of the split to upload (e.g., "train", "test").

Raises:
FileNotFoundError: If the specified file does not exist.
ValueError: If the file type is not supported.
"""
# load file into a huggingface dataset

# it could be a huggingface dataset or a local file
# first check if it is a huggingface dataset
try:
dataset = load_dataset(path, split=split)

except Exception:
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"File {path} does not exist") from None

if path.suffix not in [".jsonl", ".json", ".csv", ".parquet"]:
raise ValueError("Only jsonl, json, csv, and parquet files are supported currently") from None

try:
if path.suffix == ".jsonl" or path.suffix == ".json":
format = "json"
elif path.suffix == ".csv":
format = "csv"
elif path.suffix == ".parquet":
format = "parquet"

dataset = load_dataset(format, data_files=str(path), split=split)

except Exception as e:
logger.error(f"Error loading dataset: {e}")
raise e

link = push_to_viewer(dataset)
return link
5 changes: 5 additions & 0 deletions src/bespokelabs/curator/finetune/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Finetuning backend abstraction and implementations."""

from .finetune import Finetune

__all__ = ["Finetune"]
58 changes: 58 additions & 0 deletions src/bespokelabs/curator/finetune/base_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from abc import ABC, abstractmethod


class BaseFinetuneBackend(ABC):
"""Abstract base class for finetuning backends."""

def __init__(self, backend_params: dict):
"""Initializes the finetuning backend.

Args:
backend_params: A dictionary containing backend-specific parameters.
"""
self.backend_params = backend_params

@abstractmethod
def create_job(self, *args, **kwargs):
"""Creates a new finetuning job."""
pass

@abstractmethod
def list_jobs(self, *args, **kwargs):
"""Lists all finetuning jobs."""
pass

@abstractmethod
def list_job_events(self, job_id: str):
"""Lists events for a specific finetuning job.

Args:
job_id: The ID of the finetuning job.
"""
pass

# @abstractmethod
# def list_job_checkpoints(self, job_id: str):
# pass

# @abstractmethod
# def list_job_metrics(self, job_id: str):
# pass

@abstractmethod
def get_job_details(self, job_id: str):
"""Retrieves details for a specific finetuning job.

Args:
job_id: The ID of the finetuning job.
"""
pass

@abstractmethod
def cancel_job(self, job_id: str):
"""Cancels a specific finetuning job.

Args:
job_id: The ID of the finetuning job.
"""
pass
Loading
Loading