This guide explains how to package, train, and save outputs (both .pkl and .json) for a custom regression model in Google Vertex AI using the Custom Python Package Training Job feature.
regmodel/
├── setup.py
├── trainer/
│ ├── __init__.py
│ └── task.py # <-- main entry script
from setuptools import find_packages
from setuptools import setup
setup(
name="regmodel",
version="0.1",
install_requires=[
"scikit-learn",
"joblib",
"pandas",
"numpy",
"google-cloud-storage"
],
packages=find_packages(),
include_package_data=True,
description="Custom regression trainer for Vertex AI"
)
import argparse
import joblib
import pandas as pd
from sklearn.linear_model import LinearRegression
from google.cloud import storage
import os
import json
def train_and_save(args):
# Load training data
df = pd.read_csv(args.train_data)
X = df[args.features.split(",")]
y = df[args.target]
# Train model
model = LinearRegression()
model.fit(X, y)
# Save model locally
local_model_path = "model.pkl"
joblib.dump(model, local_model_path)
# Upload model.pkl to GCS
client = storage.Client()
bucket_name, blob_path = args.model_dir[5:].split("/", 1)
bucket = client.bucket(bucket_name)
blob = bucket.blob(f"{blob_path}/model.pkl")
blob.upload_from_filename(local_model_path)
# Save model parameters as JSON
model_dict = {
"params": model.get_params(),
"coef": model.coef_.tolist() if hasattr(model, "coef_") else None,
"intercept": model.intercept_.tolist() if hasattr(model, "intercept_") else None
}
json_path = "model.json"
with open(json_path, "w") as f:
json.dump(model_dict, f)
blob = bucket.blob(f"{blob_path}/model.json")
blob.upload_from_filename(json_path)
# Save predictions if requested
if args.prediction_output:
preds = model.predict(X).tolist()
pred_path = "predictions.csv"
pd.DataFrame({"prediction": preds}).to_csv(pred_path, index=False)
output_bucket, output_blob = args.prediction_output[5:].split("/", 1)
bucket = client.bucket(output_bucket)
blob = bucket.blob(output_blob)
blob.upload_from_filename(pred_path)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--train-data", type=str, required=True, help="GCS path to training CSV")
parser.add_argument("--features", type=str, required=True, help="Comma separated feature columns")
parser.add_argument("--target", type=str, required=True, help="Target column")
parser.add_argument("--model-dir", type=str, required=True, help="GCS path to save trained model")
parser.add_argument("--prediction-output", type=str, help="Optional GCS path to save predictions")
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
train_and_save(args)
zip -r my_package-0.1.tar.gz my_package/
gsutil cp my_package-0.1.tar.gz gs://YOUR_BUCKET_NAME/packages/
gcloud ai custom-jobs create \
--region=us-central1 \
--display-name=custom-regression-job \
--python-package-uris=gs://YOUR_BUCKET_NAME/packages/my_package-0.1.tar.gz \
--python-module=trainer.task \
--container-image-uri=us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.1-0:latest \
--args="--train-data=gs://YOUR_BUCKET_NAME/data/train.csv" \
"--features=col1,col2,col3" \
"--target=label" \
"--model-dir=gs://YOUR_BUCKET_NAME/models/custom_regression" \
"--prediction-output=gs://YOUR_BUCKET_NAME/output/predictions.csv"
gs://YOUR_BUCKET_NAME/models/custom_regression/model.pkl
→ Reloadable in Python / Vertex AI.
gs://YOUR_BUCKET_NAME/models/custom_regression/model.json
→ Human-readable model parameters (coefficients, intercept, hyperparameters).
gs://YOUR_BUCKET_NAME/output/predictions.csv (optional)
→ Predictions on training set (or dataset you pass).
setup.py + trainer/task.py are required
Vertex AI runs the entry point you specify (e.g. trainer.task).
Arguments (--train-data, --model-dir, etc.)
You pass these when submitting the custom training job (either from console or CLI).
Model saving
Always save your model to --model-dir (Vertex AI sets this automatically).
Prediction output
You can define an extra argument like --prediction-output=gs://your-bucket/output/predictions.csv to write predictions to GCS.