Skip to content

Commit

Permalink
Create a script to update the index and lookup file used to serve pre…
Browse files Browse the repository at this point in the history
…dictions. (kubeflow#352)

* This script will be the last step in a pipeline to continuously update
  the index for serving.

* The script updates the parameters of the search index server to point
  to the supplied index files. It then commits them and creates a PR
  to push those commits.

* Restructure the parameters for the search index server so that we can use
  ks param set to override the indexFile and lookupFile.

* We do this because we want to be able to push a new index by doing
  ks param set in a continuously running pipeline
* Remove default parameters from search-index-server

* Create a dockerfile suitable for running this script.
  • Loading branch information
jlewi authored and k8s-ci-robot committed Nov 26, 2018
1 parent 4f95e85 commit 5d6a4e9
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 14 deletions.
11 changes: 11 additions & 0 deletions code_search/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ build-ui-gcb:
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.ui.json \
--timeout=3600 ./build

build-index-updater-gcb:
mkdir -p build
jsonnet ./docker/index_updater/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
> ./build/build.index_updater.json
cp -r ./docker ./build/
cp -r ./src ./build/
rm -rf ./build/src/code_search/dataflow/cli/test_data
rm -rf ./build/src/code_search/t2t/test_data
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci --config=./build/build.index_updater.json \
--timeout=3600 ./build

# Build but don't attach the latest tag. This allows manual testing/inspection of the image
# first.
push-cpu: build-cpu
Expand Down
9 changes: 9 additions & 0 deletions code_search/docker/index_updater/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM ubuntu:xenial

RUN apt-get update && apt-get install -y wget &&\
rm -rf /var/lib/apt/lists/*

RUN wget -O /tmp/hub-linux-amd64-2.6.0.tgz https://github.com/github/hub/releases/download/v2.6.0/hub-linux-amd64-2.6.0.tgz && \
cd /usr/local && \
tar -xvf /tmp/hub-linux-amd64-2.6.0.tgz && \
ln -sf /usr/local/hub-linux-amd64-2.6.0/bin/hub /usr/local/bin/hub
3 changes: 3 additions & 0 deletions code_search/docker/index_updater/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Index Updater

A Docker image and script suitable for updating the index served.
26 changes: 26 additions & 0 deletions code_search/docker/index_updater/build.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// TODO(jlewi): We should tag the image latest and then
// use latest as a cache so that rebuilds are fast
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
{

"steps": [
{
"id": "build",
"name": "gcr.io/cloud-builders/docker",
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
"--label=git-versions=" + std.extVar("gitVersion"),
"--file=docker/index_updater/Dockerfile",
"."],
},
{
"id": "tag",
"name": "gcr.io/cloud-builders/docker",
"args": ["tag", "gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search/index_updater:latest",],
"waitFor": ["build"],
},
],
"images": ["gcr.io/kubeflow-examples/code-search/index_updater:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search/index_updater:latest",
],
}
95 changes: 95 additions & 0 deletions code_search/docker/index_updater/update_index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/bash
#
# This script creates a PR updating the nmslib index used by search-index-server.
# It uses ks CLI to update the parameters.
# After creating and pushing a commit it uses the hub github CLI to create a PR.
#
# The argument --base can be used to change the owner/org of the repo the PR is opened on.
# To use the main kubeflow/examples repo use
# --base=kubeflow:master
#
# To use user alex's fork use
# --base=alex/master
set -ex

DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"

parseArgs() {
# Parse all command line options
while [[ $# -gt 0 ]]; do
# Parameters should be of the form
# --{name}=${value}
echo parsing "$1"
if [[ $1 =~ ^--(.*)=(.*)$ ]]; then
name=${BASH_REMATCH[1]}
value=${BASH_REMATCH[2]}

eval ${name}="${value}"
elif [[ $1 =~ ^--(.*)$ ]]; then
name=${BASH_REMATCH[1]}
value=true
eval ${name}="${value}"
else
echo "Argument $1 did not match the pattern --{name}={value} or --{name}"
fi
shift
done
}

usage() {
echo "Usage: update_index.sh --base=OWNER:branch --appDir=<ksonnet app dir> --env=<ksonnet environment> --indexFile=<index file> --lookupFile=<lookup file>"
}

parseArgs $*

if [ ! -z ${help} ]; then
usage
fi

if [ -z ${dryrun} ]; then
dryrun=false
fi

# List of required parameters
names=(appDir env lookupFile indexFile base)


missingParam=false
for i in ${names[@]}; do
if [ -z ${!i} ]; then
echo "--${i} not set"
missingParam=true
fi
done

if ${missingParam}; then
usage
exit 1
fi
cd ${appDir}
ks param set --env=${env} search-index-server indexFile ${indexFile}
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
git add .

if (! ${dryrun}); then
git commit -m "Update the lookup and index file."
git push
else
echo "dryrun; not committing to git."
fi

FILE=$(mktemp tmp.create_pull_request.XXXX)

cat <<EOF >$FILE
Update the lookup and index file.
This PR is automatically generated by update_index.sh.
This PR updates the index and lookup file used to serve
predictions.
EOF

# Create a pull request
if (! ${dryrun}); then
hub pull-request --base=${base} -F ${FILE}
fi
6 changes: 2 additions & 4 deletions code_search/kubeflow/components/params.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,9 @@
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
},
"search-index-server": {
// Most defaults should be defined in experiments.libsonnet.
// Parameters will be used to override those values.
name: "search-index-server",
problem: $.components["t2t-code-search"].problem,
dataDir: $.components["t2t-code-search"].workingDir + "/data",
lookupFile: $.components["t2t-code-search"].workingDir + "/code_search_index.csv",
indexFile: $.components["t2t-code-search"].workingDir + "/code_search_index.nmslib",
servingUrl: "http://t2t-code-search.kubeflow:8500/v1/models/t2t-code-search:predict",
// 1 replica is convenient for debugging but we should bump after debugging.
replicas: 1,
Expand Down
7 changes: 4 additions & 3 deletions code_search/kubeflow/components/search-index-server.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ local experiments = import "experiments.libsonnet";

local experimentName = baseParams.experiment;
local experimentParams = experiments[experimentName];
local params = baseParams + experimentParams + {
name: "search-index-server",
};

// baseParams override experiment parameters because we want to be able to set a new
// index and csv file by doing ks param set.
local params = experimentParams + baseParams;

local deploymentSpec = {
apiVersion: "extensions/v1beta1",
Expand Down
15 changes: 8 additions & 7 deletions code_search/kubeflow/environments/cs_demo/params.libsonnet
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
local params = std.extVar("__ksonnet/params");
local globals = import "globals.libsonnet";
local params = std.extVar('__ksonnet/params');
local globals = import 'globals.libsonnet';
local envParams = params {
components+: {
"t2t-code-search"+: {
},
"t2t-code-search"+: {},
"t2t-code-search-datagen"+: {
githubTable: "",
githubTable: '',
},
"submit-preprocess-job"+: {
githubTable: "",
githubTable: '',
},
"search-index-server"+: {
},
},
};
Expand All @@ -18,4 +19,4 @@ local envParams = params {
[x]: envParams.components[x] + globals
for x in std.objectFields(envParams.components)
},
}
}

0 comments on commit 5d6a4e9

Please sign in to comment.