Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upgrade: docker-compose with version 8.x #132

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.sample
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ES_VERSION=7.5.1
ES_VERSION=8.3.3
48 changes: 48 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
ARG ES_VERSION=8.3.3
ARG DEBIAN_FRONTEND=noninteractive

# thanks to https://github.com/cpfriend1721994/docker-es-cococ-tokenizer
FROM docker.elastic.co/elasticsearch/elasticsearch:$ES_VERSION as builder
ARG ES_VERSION
ARG DEBIAN_FRONTEND
USER root

RUN apt-get update -y && apt-get install -y software-properties-common build-essential
RUN gcc --version
RUN apt-get update -y && \
apt-get install -y make cmake pkg-config wget git openjdk-17-jdk
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64

RUN cd /tmp && wget https://dlcdn.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \
tar xvf apache-maven-3.8.8-bin.tar.gz && \
mkdir -p /usr/share/maven && cd /usr/share/maven && \
cp -r /tmp/apache-maven-3.8.8/* .
ENV PATH=/usr/share/maven/bin:$PATH

WORKDIR /

COPY pom.xml .
RUN mvn verify clean --fail-never

RUN git clone https://github.com/coccoc/coccoc-tokenizer.git

RUN mkdir /coccoc-tokenizer/build
WORKDIR /coccoc-tokenizer/build
RUN cmake -DBUILD_JAVA=1 ..
RUN make install

COPY . /elasticsearch-analysis-vietnamese
WORKDIR /elasticsearch-analysis-vietnamese
RUN mvn package -Dmaven.test.skip -e

FROM docker.elastic.co/elasticsearch/elasticsearch:$ES_VERSION
ARG ES_VERSION

COPY --from=builder /coccoc-tokenizer/dicts/tokenizer /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/dicts/vn_lang_tool /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/libcoccoc_tokenizer_jni.so /usr/lib
COPY --from=builder /coccoc-tokenizer/build/multiterm_trie.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/nontone_pair_freq_map.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /coccoc-tokenizer/build/syllable_trie.dump /usr/local/share/tokenizer/dicts
COPY --from=builder /elasticsearch-analysis-vietnamese/target/releases/elasticsearch-analysis-vietnamese-$ES_VERSION.zip /
RUN echo "Y" | /usr/share/elasticsearch/bin/elasticsearch-plugin install --batch file:///elasticsearch-analysis-vietnamese-$ES_VERSION.zip
26 changes: 24 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,28 @@ The above example produces the following terms:

```

## Use Docker

Make sure you have installed both Docker & docker-compose as well

### Use Docker Compose

```bash
# build the image
cp .env.sample .env
docker compose build
docker compose up

# test
curl -k http://elastic:changemenow@localhost:9200/_analyze -H 'Content-Type: application/json' -d '
{
"analyzer": "vi_analyzer",
"text": "Cộng hòa Xã hội chủ nghĩa Việt Nam"
}'
# example result
{"tokens":[{"token":"cộng hòa","start_offset":0,"end_offset":8,"type":"<WORD>","position":0},{"token":"xã hội","start_offset":9,"end_offset":15,"type":"<WORD>","position":1},{"token":"chủ nghĩa","start_offset":16,"end_offset":25,"type":"<WORD>","position":2},{"token":"việt nam","start_offset":26,"end_offset":34,"type":"<WORD>","position":3}]}
```

## Build from Source
### Step 1: Build C++ tokenizer for Vietnamese library
```sh
Expand Down Expand Up @@ -136,7 +158,7 @@ Optionally, edit the `elasticsearch-analysis-vietnamese/pom.xml` to change the v

```xml
...
<version>7.17.1</version>
<version>8.3.3</version>
...
```

Expand All @@ -149,7 +171,7 @@ mvn package
### Step 3: Installation the plugin on Elasticsearch

```sh
bin/elasticsearch-plugin install file://target/releases/elasticsearch-analysis-vietnamese-7.17.1.zip
bin/elasticsearch-plugin install file://target/releases/elasticsearch-analysis-vietnamese-8.3.3.zip
```

## Compatible Versions
Expand Down
29 changes: 21 additions & 8 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
version: '3.4'

services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION}
build: .
restart: on-failure
ports:
- "9200:9200"
volumes:
- ./target/releases/elasticsearch-analysis-vietnamese-${ES_VERSION}.zip:/usr/share/elasticsearch/plugin/elasticsearch-analysis-vietnamese-${ES_VERSION}.zip
- ./install-es-plugin.sh:/apps/install-es-plugin.sh
ulimits:
nofile:
soft: 65536
hard: 65536
memlock:
hard: -1
soft: -1
environment:
- "ES_VERSION=${ES_VERSION}"
- "discovery.type=single-node"
entrypoint:
- /apps/install-es-plugin.sh
ES_JAVA_OPTS: "-Xmx2g -Xms2g"
ELASTIC_USERNAME: elastic
ELASTIC_PASSWORD: changemenow
bootstrap.memory_lock: "true"
discovery.type: single-node
xpack.security.enabled: true
networks:
- elastic

networks:
elastic:
driver: bridge
8 changes: 0 additions & 8 deletions install-es-plugin.sh

This file was deleted.