diff --git a/Dockerfile b/Dockerfile index 29eb36ac..ff5f30c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,17 @@ +# Modifications Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # This Dockerfile is used to serve the AllenNLP demo. FROM allennlp/commit:31af01e0db7ac401b6c4923d5badd7de2691d6a2 @@ -5,10 +19,13 @@ LABEL maintainer="allennlp-contact@allenai.org" WORKDIR /stage/allennlp +# Use cache busting to install and upgrade https://docs.docker.com/develop/develop-images/dockerfile_best-practices/ +RUN apt-get -y update && apt-get -y upgrade + # Install Java. RUN echo "deb http://http.debian.net/debian jessie-backports main" >>/etc/apt/sources.list -RUN apt-get update -RUN apt-get install -y -t jessie-backports openjdk-8-jdk +RUN apt-get update && apt-get install -y -t jessie-backports openjdk-8-jdk + # Install npm early so layer is cached when mucking with the demo RUN curl -sL https://deb.nodesource.com/setup_8.x | bash - && apt-get install -y nodejs diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..e3d41c5e --- /dev/null +++ b/NOTICE @@ -0,0 +1,5 @@ +Modified files Copyright GridCell Ltd 2019 + + +This product is derived from software developed at +AllenAI (https://allenai.org/) form repository (https://github.com/allenai/allennlp-demo). \ No newline at end of file diff --git a/README.md b/README.md index 1834d27a..c6e2cc47 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,18 @@ + + # AllenNLP Demo This repository contains the AllenNLP demo. @@ -54,4 +69,237 @@ To run the demo for development, you will need to: ``` Normally, the backend server would manage the frontend assets as well - the JS has a special hack for if it is running on port 3000 (which it does by default if you are running the unoptimized JS using `npm run start`), it will look for the backend service at port 8000. Otherwise, it serves the backend and the frontend from the same port. + + # Section added by by [gridcell](https://twitter.com/gridcell_io) + + # Deploying on Kubernetes (Google Kubernetes Engine) + + ## Preparation + + 1. Docker builds + + The script `build-allennlp-image.sh` builds the local `Dockerfile`, tags the image and uploads it to the + [Google Cloud Container Registry](https://cloud.google.com/container-registry/docs/). + + + Set the following environment variables first + + ``` + export PROJECT=some_project # Google Cloud project you are working under + export VERSION=0.8.2 # set the VERSION to whatever you like e.g latest or 0.8.2 + ``` + Now set `CONTAINER_REGION`, the location of the container [registry](https://cloud.google.com/container-registry/docs/pushing-and-pulling) + + The four options are: + + * `gcr.io` hosts the images in the United States, but the location may change in the future + * `us.gcr.io` hosts the image in the United States, in a separate storage bucket from images hosted by gcr.io + * `eu.gcr.io` hosts the images in the European Union + * `asia.gcr.io` hosts the images in Asia + +Example + +```bash +export CONTAINER_REGION=us.gcr.io +``` + +then run it + +```bash + cd kubernetes +./build-allennlp-image.sh +``` + +look out for something like +```bash +The push refers to repository [us.gcr.io/PROJECT/allennlp] +... +DIGEST TAGS TIMESTAMP +XXXXXXX 0.8.2,latest 2019-03-19T15:07:01 +XXXXXXX 2019-03-12T20:25:41 +``` + + +2. Kuberetes preparation + + The `kubernetes` folder has all the kubernetes definition files to launch allennlp on a cluster + + * `allennlp.yml` contains a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) that uses persistent volumes to store downloaded models on a disk attached to the node. + This deployment is configured with 3 node replica. + + Be sure to change the file in the containers section and replace `CHANGE_ME` with the tag of your Docker image from 1. above: + i.e. `CONTAINER_REGION/PROJECT/allennlp:VERSION` e.g. `eu.gcr.io/some_project/allennlp:0.8.2` + **Be sure to change this to the EXACT tag you got from step 1 or else you will get errors when deploying to Kubernetes** + + ```yml + containers: + - name: allennlp + image: CHANGE_ME #enter path to docker registry e.g on google cloud container registry eu.gcr.io/PROJECT_ID/allennlp:0.8.2 + ``` + + * `al-lb-svc.yml` cretes a load balancer with a public IP address (see below)on http port 80 + * `al-ia-svc.yml` cretes a internal (to the cluster) load balancer. For example if you have a client nodejs , python etc running on your kubernetes cluster you can refer to it by `http://al-lb-svc:8000/predict/machine-comprehension` + * `gce-standard-sc.yml` the storage class of the disks (you can change this to ssd if you like `type: pd-ssd`) + + + ## Google Kubernetes Engine GKE + + The AllenNLP UI and backend can be deployed to a kubernetes cluster on a given cloud platform that supports Kubernetes. Requires + * [Google Gloud SDK](https://cloud.google.com/sdk/install) + * [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) + * [Minikube](https://kubernetes.io/docs/setup/minikube/) - optional if you want to test locally first + + 1. Follow the steps [here](https://cloud.google.com/kubernetes-engine/docs/quickstart) to learn more about GKE + + 2. Create a GKE cluster, 50GB disk, `highmem` instances are recommended for loading and serving models. Other [instances](https://cloud.google.com/compute/docs/machine-types) are available too. + You can also add `--preemptible` to save running costs see [here](https://cloud.google.com/kubernetes-engine/docs/how-to/preemptible-vms) + + ``` + gcloud container clusters create example-cluster \ + --node-locations us-central1-a \ + --additional-zones us-central1-b,us-central1-c \ + --machine-type=n1-highmem-4 \ + --disk-size=50 + --region us-central1 + ``` + 3. Get access to the cluster + +```bash +gcloud beta container clusters get-credentials example-cluster --region us-central1 --project PROJECT +``` +4. Apply the changes + +```bash +kubectl apply -f kubernetes/allennlp.yml +kubectl apply -f kubernetes/es-ia-svc.yml +kubectl apply -f kubernetes/es-lb-svc.yml +kubectl apply -f kubernetes/gce-standard-sc.yml +``` + +5. Check deployment. + +Step 4 will create the stateful set with 3 persistent volumes each with its own persistent volume claim, an internal load balancer service and external load balancer + +```bash +kubectl get all +``` + +should return something like +```bash +NAME READY STATUS RESTARTS AGE +pod/allennlp-0 1/1 Running 0 6m +pod/allennlp-1 1/1 Running 0 4m +pod/allennlp-2 1/1 Running 0 2m + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/al-ia-svc ClusterIP 10.XX.XX.X 8000/TCP 9m +service/al-lb-svc LoadBalancer 10.XX.XX.X YYY.YYY.YYY.YYY 8000:31627/TCP 9m +service/kubernetes ClusterIP 10.XX.XX.X 443/TCP 12m + +NAME DESIRED CURRENT AGE +statefulset.apps/allennlp 3 3 6m + +``` + +6. Check the API through the load balancer + +Using the address `YYY.YYY.YYY.YYY` in step 5 you can query the API using Postman or Curl + +`http://YYY.YYY.YYY.YYY:8000/predict/machine-comprehension` + +With the `POST` body + +```json +{ + "passage": "Robotics is an interdisciplinary branch of engineering and science that includes mechanical engineering," + "question": "What do robots that resemble humans attempt to do?" +} +``` + +which should return something like (shortend and trunctated): +```json +{ + "best_span": [ + 147, + 154 + ], + "best_span_str": "replicate walking, lifting, speech, cognition", + "passage_question_attention": [ + [ + 0.2749797999858856, + 0.04183763265609741, + 0.17196990549564362, + 0.20194320380687714, + 0.04489961266517639, + 0.02290954254567623, + 0.055871427059173584, + 0.05694901570677757, + 0.06618849188089371, + 0.06245144084095955 + ], + [ + 0.21053647994995117, + 0.11562809348106384, + 0.11758492887020111, + 0.15465371310710907, + 0.1086740493774414, + 0.02625945955514908, + 0.04414062201976776, + 0.061525385826826096, + 0.09386775642633438, + 0.0671294555068016 + ], + [ + 0.11171773821115494, + 0.11988984048366547, + 0.023860322311520576, + 0.030449630692601204, + 0.021468957886099815, + 0.009852085262537003, + 0.020644865930080414, + 0.04953836649656296, + 0.41884300112724304, + 0.19373510777950287 + ] + ], + "passage_tokens": [ + "Robotics", + "is", + "an", + "can", + "do", + "." + ], + "question_tokens": [ + "What", + "do", + "?" + ], + "span_end_logits": [ + -6.517852306365967, + -12.051702499389648, + -1.8338027000427246 + ], + "span_end_probs": [ + 0.0001122089524869807, + 4.4330960236038663e-7, + 0.009583641774952412, + 0.07637327909469604, + 0.048559825867414474, + 0.01214184332638979 + ], + "span_start_logits": [ + -5.348807334899902, + -10.847269058227539, + -3.811220169067383, + -4.827718734741211 + ], + "span_start_probs": [ + 0.000004799693670065608, + 1.9645446158733648e-8, + 0.000022334650566335768, + 0.00000808201093605021 + ] +} +``` \ No newline at end of file diff --git a/kubernetes/al-ia-svc.yml b/kubernetes/al-ia-svc.yml new file mode 100644 index 00000000..00a47d1c --- /dev/null +++ b/kubernetes/al-ia-svc.yml @@ -0,0 +1,27 @@ +# Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Service +metadata: + name: al-ia-svc + labels: + app: al-ia-svc +spec: + selector: + app: allennlp + ports: + - name: http + port: 8000 + protocol: TCP diff --git a/kubernetes/al-lb-svc.yml b/kubernetes/al-lb-svc.yml new file mode 100644 index 00000000..dc9fff05 --- /dev/null +++ b/kubernetes/al-lb-svc.yml @@ -0,0 +1,28 @@ +# Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Service +metadata: + name: al-lb-svc + labels: + app: al-lb-svc +spec: + type: LoadBalancer + selector: + app: allennlp + ports: + - name: http + port: 8000 + protocol: TCP \ No newline at end of file diff --git a/kubernetes/allennlp.yml b/kubernetes/allennlp.yml new file mode 100644 index 00000000..fb99e521 --- /dev/null +++ b/kubernetes/allennlp.yml @@ -0,0 +1,84 @@ +# Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: allennlp + labels: + app: allennlp +spec: + selector: + matchLabels: + app: allennlp + # the headless-service that governs this StatefulSet + # responsible for the network identity of the set. + serviceName: allennlp-discovery-svc + replicas: 3 + # Template is the object that describes the pod that will be created + template: + metadata: + labels: + app: allennlp + spec: + securityContext: + # allows read/write access for mounted volumes + # by users that belong to a group with gid: 1000 + fsGroup: 1000 + initContainers: + # init-container for setting the mmap count limit + - name: sysctl + image: busybox + imagePullPolicy: IfNotPresent + command: ["sysctl", "-w", "vm.max_map_count=262144"] + securityContext: + privileged: true + #optional uncomment these 2 next lines if you want allennlp to only run on specified nodes + #see https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +# nodeSelector: +# purpose: allennlp + containers: + - name: allennlp + image: CHANGE_ME #enter path to docker registry e.g on google cloud container registry eu.gcr.io/PROJECT_ID/allennlp:0.8.2 + resources: + limits: + memory: "13Gi" + cpu: "2.5" + requests: + memory: "12.0Gi" + cpu: "2" + imagePullPolicy: Always + ports: + - containerPort: 8000 + name: transport + protocol: TCP + volumeMounts: + - name: es-data-vc + mountPath: /usr/share/allennlp/data +# # The StatefulSet guarantees that a given [POD] network identity will +# # always map to the same storage identity + volumeClaimTemplates: + - metadata: + name: es-data-vc + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + # elasticsearch mounted data directory size (to be adjusted based on need) + storage: 25Gi + storageClassName: gce-standard-sc + # no LabelSelector defined + # claims can specify a label selector to further filter the set of volumes + # currently, a PVC with a non-empty selector can not have a PV dynamically provisioned for it + # no volumeName is provided diff --git a/kubernetes/build-allennlp-image.sh b/kubernetes/build-allennlp-image.sh new file mode 100755 index 00000000..6cbda053 --- /dev/null +++ b/kubernetes/build-allennlp-image.sh @@ -0,0 +1,37 @@ +# Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env bash +if [[ -z "${PROJECT}" ]]; then + echo "Please specify a Google Compute project" +fi +if [[ -z "${VERSION}" ]]; then + echo "Please specify a version" +fi + +if [[ -z "${CONTAINER_REGION}" ]]; then + echo "Please specify a container region" +fi + + +export CWD=`pwd` +gcloud auth configure-docker + + +cd ../ +docker build . -t allennlp:$VERSION +docker tag allennlp:$VERSION $CONTAINER_REGION/$PROJECT/allennlp:$VERSION +docker push $CONTAINER_REGION/$PROJECT/allennlp:$VERSION +gcloud container images list-tags $CONTAINER_REGION/$PROJECT/allennlp +cd $CWD \ No newline at end of file diff --git a/kubernetes/gce-standard-sc.yml b/kubernetes/gce-standard-sc.yml new file mode 100644 index 00000000..86d4b8ee --- /dev/null +++ b/kubernetes/gce-standard-sc.yml @@ -0,0 +1,21 @@ +# Copyright 2019 GridCell Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +kind: StorageClass +apiVersion: storage.k8s.io/v1beta1 +metadata: + name: gce-standard-sc +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-standard