diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/bikesw_training/bikes_weather.py b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/bikesw_training/bikes_weather.py new file mode 100644 index 0000000..7d5862d --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/bikesw_training/bikes_weather.py @@ -0,0 +1,261 @@ + +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Adapted in part from: +# https://github.com/GoogleCloudPlatform/data-science-on-gcp/blob/master/09_cloudml/flights_model_tf2.ipynb +# by Valliappa Lakshmanan. (See that repo for more info about the accompanying book, +# "Data Science on the Google Cloud Platform", from O'Reilly.) + + + +import argparse +import logging +import os, json, math, time, shutil +import numpy as np + +import pathlib2 +import tensorflow as tf + +DEVELOP_MODE = False +NBUCKETS = 5 # for embeddings +NUM_EXAMPLES = 1000*1000 * 20 # assume 20 million examples +DNN_HIDDEN_UNITS = '128,64,32' + +CSV_COLUMNS = ('duration,end_station_id,bike_id,ts,day_of_week,start_station_id' + + ',start_latitude,start_longitude,end_latitude,end_longitude' + + ',euclidean,loc_cross,prcp,max,min,temp,dewp').split(',') +LABEL_COLUMN = 'duration' +DEFAULTS = [[0.0],['na'],['na'],[0.0],['na'],['na'], + [0.0],[0.0],[0.0],[0.0], + [0.0],['na'],[0.0],[0.0],[0.0],[0.0], [0.0]] + +STRATEGY = tf.distribute.MirroredStrategy() +TRAIN_BATCH_SIZE = 64 * STRATEGY.num_replicas_in_sync + + +def load_dataset(pattern, batch_size=1): + return tf.data.experimental.make_csv_dataset(pattern, batch_size, CSV_COLUMNS, DEFAULTS) + +def features_and_labels(features): + label = features.pop('duration') # this is what we will train for + features.pop('bike_id') + return features, label + +def read_dataset(pattern, batch_size, mode=tf.estimator.ModeKeys.TRAIN, truncate=None): + dataset = load_dataset(pattern, batch_size) + dataset = dataset.map(features_and_labels, num_parallel_calls=tf.data.experimental.AUTOTUNE) + if mode == tf.estimator.ModeKeys.TRAIN: + dataset = dataset.repeat().shuffle(batch_size*10) + # dataset = dataset.repeat() + dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + # dataset = dataset.prefetch(1) + if truncate is not None: + dataset = dataset.take(truncate) + return dataset + + +# Build a wide-and-deep model. +def wide_and_deep_classifier(inputs, linear_feature_columns, dnn_feature_columns, + dnn_hidden_units, learning_rate): + deep = tf.keras.layers.DenseFeatures(dnn_feature_columns, name='deep_inputs')(inputs) + layers = [int(x) for x in dnn_hidden_units.split(',')] + for layerno, numnodes in enumerate(layers): + deep = tf.keras.layers.Dense(numnodes, activation='relu', name='dnn_{}'.format(layerno+1))(deep) + wide = tf.keras.layers.DenseFeatures(linear_feature_columns, name='wide_inputs')(inputs) + both = tf.keras.layers.concatenate([deep, wide], name='both') + output = tf.keras.layers.Dense(1, name='dur')(both) + model = tf.keras.Model(inputs, output) + optimizer = tf.keras.optimizers.RMSprop(learning_rate) + model.compile(loss='mse', optimizer=optimizer, + metrics=['mse', 'mae']) + return model + + +def create_model(learning_rate, load_checkpoint): + + # duration,end_station_id,bike_id,ts,day_of_week,start_station_id,start_latitude,start_longitude,end_latitude,end_longitude, + # euclidean,loc_cross,prcp,max,min,temp,dewp + + real = { + colname : tf.feature_column.numeric_column(colname) + for colname in + # ('ts,start_latitude,start_longitude,end_latitude,end_longitude,euclidean,prcp,max,min,temp,dewp').split(',') + ('ts,euclidean,prcp,max,min,temp,dewp').split(',') + + } + sparse = { + 'day_of_week': tf.feature_column.categorical_column_with_vocabulary_list('day_of_week', + vocabulary_list='1,2,3,4,5,6,7'.split(',')), + 'end_station_id' : tf.feature_column.categorical_column_with_hash_bucket('end_station_id', hash_bucket_size=800), + 'start_station_id' : tf.feature_column.categorical_column_with_hash_bucket('start_station_id', hash_bucket_size=800), + 'loc_cross' : tf.feature_column.categorical_column_with_hash_bucket('loc_cross', hash_bucket_size=21000), + # 'bike_id' : tf.feature_column.categorical_column_with_hash_bucket('bike_id', hash_bucket_size=14000) + } + + inputs = { + colname : tf.keras.layers.Input(name=colname, shape=(), dtype='float32') + for colname in real.keys() + } + inputs.update({ + colname : tf.keras.layers.Input(name=colname, shape=(), dtype='string') + for colname in sparse.keys() + }) + + # embed all the sparse columns + embed = { + 'embed_{}'.format(colname) : tf.feature_column.embedding_column(col, 10) + for colname, col in sparse.items() + } + real.update(embed) + + # one-hot encode the sparse columns + sparse = { + colname : tf.feature_column.indicator_column(col) + for colname, col in sparse.items() + } + + if DEVELOP_MODE: + print(sparse.keys()) + print(real.keys()) + + model = None + print('num replicas...') + print(STRATEGY.num_replicas_in_sync) + + with STRATEGY.scope(): + if load_checkpoint: + learning_rate = 0.0000001 + logging.info("using learning rate {}".format(learning_rate)) + model = wide_and_deep_classifier( + inputs, + linear_feature_columns = sparse.values(), + dnn_feature_columns = real.values(), + dnn_hidden_units = DNN_HIDDEN_UNITS, + learning_rate=learning_rate) + if load_checkpoint: + logging.info("loading model weights from {}".format(load_checkpoint)) + model.load_weights(load_checkpoint) + + model.summary() + return model + + +def main(): + + logging.getLogger().setLevel(logging.INFO) + parser = argparse.ArgumentParser(description='ML Trainer') + parser.add_argument( + '--epochs', type=int, + default=1) + parser.add_argument( + '--steps-per-epoch', type=int, + default=-1) # if set to -1, don't override the normal calcs for this + parser.add_argument( + '--workdir', + required=True) + parser.add_argument( + '--data-dir', + default='gs://aju-dev-demos-codelabs/bikes_weather/') + # use this arg to load the model weights from a pre-existing checkpoint (this is not the + # same as the 'checkpoint_path'). These weights must be from a model of the same architecture. + parser.add_argument( + '--load-checkpoint', + ) + parser.add_argument( + '--train-output-path', + ) + + args = parser.parse_args() + logging.info("Tensorflow version " + tf.__version__) + + TRAIN_DATA_PATTERN = args.data_dir + "train*" + EVAL_DATA_PATTERN = args.data_dir + "test*" + OUTPUT_DIR='{}/bwmodel/trained_model'.format(args.workdir) + logging.info('Writing trained model to {}'.format(OUTPUT_DIR)) + learning_rate = 0.001 + + if DEVELOP_MODE: + dataset = load_dataset(TRAIN_DATA_PATTERN) + for n, data in enumerate(dataset): + numpy_data = {k: v.numpy() for k, v in data.items()} # .numpy() works only in eager mode + print(numpy_data) + if n>3: break + + print("Checking input pipeline") + one_item = read_dataset(TRAIN_DATA_PATTERN, batch_size=2, truncate=1) + print(list(one_item)) # should print one batch of 2 items + + train_batch_size = TRAIN_BATCH_SIZE + eval_batch_size = 1000 + if args.steps_per_epoch == -1: # calc based on dataset size + steps_per_epoch = NUM_EXAMPLES // train_batch_size + else: + steps_per_epoch = args.steps_per_epoch + logging.info('using {} steps per epoch'.format(steps_per_epoch)) + + train_dataset = read_dataset(TRAIN_DATA_PATTERN, train_batch_size) + eval_dataset = read_dataset(EVAL_DATA_PATTERN, eval_batch_size, tf.estimator.ModeKeys.EVAL, + eval_batch_size * 100 * STRATEGY.num_replicas_in_sync + ) + + model = create_model(learning_rate, args.load_checkpoint) + + checkpoint_path = '{}/checkpoints/bikes_weather.cpt'.format(OUTPUT_DIR) + logging.info("checkpoint path: %s", checkpoint_path) + cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, + save_weights_only=True, + verbose=1) + tb_callback = tf.keras.callbacks.TensorBoard(log_dir='{}/logs'.format(OUTPUT_DIR), + update_freq=10000) + + logging.info("training model....") + history = model.fit(train_dataset, + validation_data=eval_dataset, + validation_steps=eval_batch_size, + epochs=args.epochs, + steps_per_epoch=steps_per_epoch, + callbacks=[cp_callback # , tb_callback + ] + ) + logging.info(history.history.keys()) + + ts = str(int(time.time())) + export_dir = '{}/export/bikesw/{}'.format(OUTPUT_DIR, ts) + logging.info('Exporting to {}'.format(export_dir)) + + try: + logging.info("exporting model....") + tf.saved_model.save(model, export_dir) + if args.train_output_path: + logging.info("train_output_path: %s", args.train_output_path) + pathlib2.Path(args.train_output_path).parent.mkdir(parents=True) + export_path = '{}/export/bikesw'.format(OUTPUT_DIR) + logging.info('export path: {}'.format(export_path)) + pathlib2.Path(args.train_output_path).write_text(export_path) + except Exception as e: # hmm. retry once if error + logging.warning(e) + logging.info("retrying...") + time.sleep(10) + logging.info("again ... exporting model....") + tf.saved_model.save(model, export_dir) + if args.train_output_path: + logging.info("train_output_path: %s", args.train_output_path) + pathlib2.Path(args.train_output_path).parent.mkdir(parents=True) + export_path = '{}/export/bikesw'.format(OUTPUT_DIR) + pathlib2.Path(args.train_output_path).write_text(export_path) + + +if __name__ == "__main__": + main() diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/Dockerfile b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/Dockerfile new file mode 100644 index 0000000..3b5f9df --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/Dockerfile @@ -0,0 +1,23 @@ +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM tensorflow/tensorflow:2.0.0-gpu-py3 + +RUN pip install --upgrade pip +RUN pip install pathlib2 + + +ADD build /ml + +ENTRYPOINT ["python", "/ml/bikes_weather.py"] diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/build.sh b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/build.sh new file mode 100755 index 0000000..27e7cfd --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/bikesw_training/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash -e +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +if [ -z "$1" ] + then + PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") +else + PROJECT_ID=$1 +fi + +mkdir -p ./build +rsync -arvp "../../bikesw_training"/ ./build/ + +docker build -t ml-pipeline-bikes-train . +rm -rf ./build + +docker tag ml-pipeline-bikes-train gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train +docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/Dockerfile b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/Dockerfile new file mode 100644 index 0000000..c7d4a4f --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/Dockerfile @@ -0,0 +1,49 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM ubuntu:16.04 + +RUN apt-get update -y + +RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip + +RUN easy_install pip + +RUN pip install pyyaml==3.12 six requests==2.18.4 tensorflow==2.0.0 + +RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ + unzip -qq google-cloud-sdk.zip -d tools && \ + rm google-cloud-sdk.zip && \ + tools/google-cloud-sdk/install.sh --usage-reporting=false \ + --path-update=false --bash-completion=false \ + --disable-installation-options && \ + tools/google-cloud-sdk/bin/gcloud -q components update \ + gcloud core gsutil && \ + tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ + tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ + touch /tools/google-cloud-sdk/lib/third_party/google.py + +# RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ +# tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ +# mkdir -p /tools/ks/bin && \ +# cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ +# rm ks_0.11.0_linux_amd64.tar.gz && \ +# rm -r ks_0.11.0_linux_amd64 + +ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin + +ADD build /ml + +ENTRYPOINT ["python", "/ml/deploy-tfserve.py"] + diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/build.sh b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/build.sh new file mode 100755 index 0000000..3b6f848 --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/containers/tf-serving/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash -e +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +if [ -z "$1" ] + then + PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") +else + PROJECT_ID=$1 +fi + +mkdir -p ./build +rsync -arvp "../../tf-serving"/ ./build/ + +docker build -t ml-pipeline-tfserve . +rm -rf ./build + +docker tag ml-pipeline-tfserve gcr.io/${PROJECT_ID}/ml-pipeline-tfserve +docker push gcr.io/${PROJECT_ID}/ml-pipeline-tfserve diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/deploy-tfserve.py b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/deploy-tfserve.py new file mode 100644 index 0000000..281655e --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/deploy-tfserve.py @@ -0,0 +1,106 @@ +# Copyright 2018 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import os +import time +import logging +import subprocess +import requests + +from tensorflow.python.lib.io import file_io #pylint: disable=no-name-in-module + + +def main(): + parser = argparse.ArgumentParser(description='ML Trainer') + parser.add_argument( + '--model_name', + help='...', + required=True) + + parser.add_argument( + '--model_path', + help='...', + required=True) + + parser.add_argument('--cluster', type=str, + help='GKE cluster set up for kubeflow. If set, zone must be provided. ' + + 'If not set, assuming this runs in a GKE container and current ' + + 'cluster is used.') + parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') + args = parser.parse_args() + + KUBEFLOW_NAMESPACE = 'kubeflow' + ts = str(int(time.time())) + + # Make sure model dir exists before proceeding + retries = 0 + sleeptime = 5 + while retries < 20: + try: + model_dir = os.path.join(args.model_path, file_io.list_directory(args.model_path)[-1]) + print("model subdir: %s" % model_dir) + break + except Exception as e: #pylint: disable=broad-except + print(e) + print("Sleeping %s seconds to sync with GCS..." % sleeptime) + time.sleep(sleeptime) + retries += 1 + sleeptime *= 2 + if retries >= 20: + print("could not get model subdir from %s, exiting" % args.model_path) + exit(1) + + logging.getLogger().setLevel(logging.INFO) + args_dict = vars(args) + if args.cluster and args.zone: + cluster = args_dict.pop('cluster') #pylint: disable=unused-variable + zone = args_dict.pop('zone') #pylint: disable=unused-variable + else: + # Get cluster name and zone from metadata + metadata_server = "http://metadata/computeMetadata/v1/instance/" + metadata_flavor = {'Metadata-Flavor' : 'Google'} + cluster = requests.get(metadata_server + "attributes/cluster-name", + headers=metadata_flavor).text + zone = requests.get(metadata_server + "zone", + headers=metadata_flavor).text.split('/')[-1] + + # logging.info('Getting credentials for GKE cluster %s.' % cluster) + # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster, + # '--zone', zone]) + + logging.info('Generating training template.') + + template_file = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'tf-serve-template.yaml') + target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'tf-serve.yaml') + + with open(template_file, 'r') as f: + with open(target_file, "w") as target: + data = f.read() + changed = data.replace('MODEL_NAME', args.model_name) + changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) + changed2 = changed1.replace('MODEL_PATH', args.model_path) + changed3 = changed2.replace('SERVICE_NAME', args.model_name + ts) + target.write(changed3) + logging.info("template: %s", changed3) + + + logging.info('deploying model serving.') + subprocess.call(['kubectl', 'create', '-f', '/ml/tf-serve.yaml']) + + +if __name__ == "__main__": + main() diff --git a/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/tf-serve-template.yaml b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/tf-serve-template.yaml new file mode 100644 index 0000000..05374cf --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/kubeflow-resources/tf-serving/tf-serve-template.yaml @@ -0,0 +1,83 @@ +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + getambassador.io/config: |- + --- + apiVersion: ambassador/v0 + kind: Mapping + name: tfserving-predict-mapping-MODEL_NAME + prefix: tfserving/models/MODEL_NAME/ + rewrite: /v1/models/MODEL_NAME:predict + method: POST + service: MODEL_NAME.kubeflow:8500 + labels: + app: MODEL_NAME + name: SERVICE_NAME + namespace: KUBEFLOW_NAMESPACE +spec: + ports: + - name: grpc-tf-serving + port: 9000 + targetPort: 9000 + - name: tf-serving-builtin-http + port: 8500 + targetPort: 8500 + selector: + app: SERVICE_NAME + type: LoadBalancer +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + labels: + app: SERVICE_NAME + name: SERVICE_NAME + namespace: KUBEFLOW_NAMESPACE +spec: + replicas: 1 + template: + metadata: + labels: + app: SERVICE_NAME + version: v1 + spec: + volumes: + - name: gcp-credentials-user-gcp-sa + secret: + secretName: user-gcp-sa + containers: + - args: + - --port=9000 + - --rest_api_port=8500 + - --model_name=MODEL_NAME + - --model_base_path=MODEL_PATH + command: + - /usr/bin/tensorflow_model_server + image: tensorflow/serving + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /secret/gcp-credentials/user-gcp-sa.json + - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE + value: /secret/gcp-credentials/user-gcp-sa.json + volumeMounts: + - mountPath: /secret/gcp-credentials + name: gcp-credentials-user-gcp-sa + imagePullPolicy: IfNotPresent + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + tcpSocket: + port: 9000 + name: MODEL_NAME + ports: + - containerPort: 9000 + - containerPort: 8500 + resources: + limits: + cpu: "4" + memory: 4Gi + requests: + cpu: "1" + memory: 1Gi diff --git a/ml/kubeflow-pipelines/bikes_weather/components/serve_component.yaml b/ml/kubeflow-pipelines/bikes_weather/components/serve_component.yaml new file mode 100644 index 0000000..5c1c5c0 --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/serve_component.yaml @@ -0,0 +1,34 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Serve TF model +description: | + A Kubeflow Pipeline component to deploy a tf-serving service +metadata: + labels: + add-pod-env: 'true' +inputs: + - name: model_name + type: String + - name: model_path + type: GCSPath +implementation: + container: + image: gcr.io/google-samples/ml-pipeline-tfserve:v2 + args: [ + --model_name, {inputValue: model_name}, + --model_path, {inputValue: model_path}, + ] + env: + KFP_POD_NAME: "{{pod.name}}" diff --git a/ml/kubeflow-pipelines/bikes_weather/components/train_component.yaml b/ml/kubeflow-pipelines/bikes_weather/components/train_component.yaml new file mode 100644 index 0000000..efaea8b --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/components/train_component.yaml @@ -0,0 +1,50 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Train bikes_weather model +description: | + A Kubeflow Pipeline component to train a Tensor2Tensor + model +metadata: + labels: + add-pod-env: 'true' +inputs: + - name: epochs + type: Integer + default: 1 + - name: steps_per_epoch + type: Integer + default: -1 + - name: data_dir + type: GCSPath + - name: workdir + type: GCSPath + - name: load_checkpoint + type: GCSPath +outputs: + - name: train_output_path + type: GCSPath +implementation: + container: + image: gcr.io/google-samples/ml-pipeline-bikes-train:v2 + args: [ + --data-dir, {inputValue: data_dir}, + --epochs, {inputValue: epochs}, + --steps-per-epoch, {inputValue: steps_per_epoch}, + --workdir, {inputValue: workdir}, + --load-checkpoint, {inputValue: load_checkpoint}, + --train-output-path, {outputPath: train_output_path} + ] + env: + KFP_POD_NAME: "{{pod.name}}" diff --git a/ml/kubeflow-pipelines/bikes_weather/example_pipelines/bw.py b/ml/kubeflow-pipelines/bikes_weather/example_pipelines/bw.py new file mode 100644 index 0000000..016507b --- /dev/null +++ b/ml/kubeflow-pipelines/bikes_weather/example_pipelines/bw.py @@ -0,0 +1,66 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl +import kfp.gcp as gcp +import kfp.components as comp +from kfp.dsl.types import GCSPath, String + + +COPY_ACTION = 'copy_data' +TRAIN_ACTION = 'train' +WORKSPACE_NAME = 'ws_gh_summ' +DATASET = 'dataset' +MODEL = 'model' + +train_op = comp.load_component_from_url( + 'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/train_component.yaml' # pylint: disable=line-too-long + ) +serve_op = comp.load_component_from_url( + 'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/serve_component.yaml' # pylint: disable=line-too-long + ) + + +@dsl.pipeline( + name='bikes_weather', + description='Model bike rental duration given weather' +) +def bikes_weather( #pylint: disable=unused-argument + working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', + data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/', + epochs: 'Integer' = 1, + steps_per_epoch: 'Integer' = -1 , # if -1, don't override normal calcs based on dataset size + load_checkpoint: String = '' + ): + + + train = train_op( + data_dir=data_dir, + workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER), + epochs=epochs, steps_per_epoch=steps_per_epoch, + load_checkpoint=load_checkpoint + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + + serve = serve_op( + model_path=train.outputs['train_output_path'], + model_name='bikesw' + ).apply(gcp.use_gcp_secret('user-gcp-sa')) + + train.set_gpu_limit(1) + +if __name__ == '__main__': + import kfp.compiler as compiler + compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz')