fix: reduce image size and refactor layers (#405)

- reduce image size by optimizing layers - remove unneeded files - remove loggly plugin as its way out of date - use specific install instead of git clone Signed-off-by: Brian Davis <dbrian@vmware.com>
vmware · Jun 30, 2023 · b55270e · b55270e
1 parent eb95237
commit b55270e
Show file tree

Hide file tree

Showing 9 changed files with 81 additions and 87 deletions.
diff --git a/README.md b/README.md
@@ -77,12 +77,12 @@ bad tag for <match>: hello-world. Tag must start with **, $thisns or demo
 
 When the configuration is made valid again the `fluentd-status` is set to "".
 
-To see kube-fluentd-operator in action you need a cloud log collector like logz.io, loggly, papertrail or ELK accessible from the K8S cluster. A simple loggly configuration looks like this (replace TOKEN with your customer token):
+To see kube-fluentd-operator in action you need a cloud log collector like logz.io, papertrail or ELK accessible from the K8S cluster. A simple logz.io configuration looks like this (replace TOKEN with your customer token):
 
 ```xml
 <match **>
-   @type loggly
-   loggly_url https://logs-01.loggly.com/inputs/TOKEN/tag/fluentd
+   @type logzio_buffered
+   endpoint_url https://listener.logz.io:8071?token=$TOKEN
 </match>
 ```
 
@@ -264,12 +264,12 @@ A very useful feature is the `<filter>` and the `$labels` macro to define parsin
 </filter>
 
 <match **>
-  @type loggly
+  @type logzio_buffered
   # destination config omitted
 </match>
 ```
 
-The above config will pipe all logs from the pods labelled with `app=log-router` through a [logfmt](https://github.com/vmware/kube-fluentd-operator/blob/master/base-image/plugins/parser_logfmt.rb) parser before sending them to loggly. Again, this configuration is valid in any namespace. If the namespace doesn't contain any `log-router` components then the `<filter>` directive is never activated. The `_container` is sort of a "meta" label and it allows for targeting the log stream of a specific container in a multi-container pod.
+The above config will pipe all logs from the pods labelled with `app=log-router` through a [logfmt](https://github.com/vmware/kube-fluentd-operator/blob/master/base-image/plugins/parser_logfmt.rb) parser before sending them to logz.io. Again, this configuration is valid in any namespace. If the namespace doesn't contain any `log-router` components then the `<filter>` directive is never activated. The `_container` is sort of a "meta" label and it allows for targeting the log stream of a specific container in a multi-container pod.
 
 If you use [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/) for the pods and deployments, then KFO will rewrite `.` characters into `_`.
 
@@ -431,13 +431,13 @@ Also, users don't need to bother with setting the correct `stream` parameter. *k
 
 ### Reusing output plugin definitions (since v1.6.0)
 
-Sometimes you only have a few valid options for log sinks: a dedicated S3 bucket, the ELK stack you manage, etc. The only flexibility you're after is letting namespace owners filter and parse their logs. In such cases you can abstract over an output plugin configuration - basically reducing it to a simple name which can be referenced from any namespace. For example, let's assume you have an S3 bucket for a "test" environment and you use loggly for a "staging" environment. The first thing you do is define these two output in the *admin* namespace:
+Sometimes you only have a few valid options for log sinks: a dedicated S3 bucket, the ELK stack you manage, etc. The only flexibility you're after is letting namespace owners filter and parse their logs. In such cases you can abstract over an output plugin configuration - basically reducing it to a simple name which can be referenced from any namespace. For example, let's assume you have an S3 bucket for a "test" environment and you use logz.io for a "staging" environment. The first thing you do is define these two output in the *admin* namespace:
 
 ```xml
 admin-ns.conf:
 <match systemd.** docker kube.kube-system.** k8s.**>
-  @type loggly
-  loggly_url https://logs-01.loggly.com/inputs/TOKEN/tag/fluentd
+  @type logzio_buffered
+  endpoint_url https://listener.logz.io:8071?token=$TOKEN
 </match>
 
 <plugin test>
@@ -449,8 +449,8 @@ admin-ns.conf:
 </plugin>
 
 <plugin staging>
-  @type loggly
-  loggly_url https://logs-01.loggly.com/inputs/TOKEN/tag/fluentd
+  @type logzio_buffered
+  endpoint_url https://listener.logz.io:8071?token=$TOKEN
 </plugin>
 ```
 
@@ -682,7 +682,6 @@ This projects tries to keep up with major releases for [Fluentd docker image](ht
 * fluent-plugin-kubernetes_sumologic (2.4.2)
 * fluent-plugin-kubernetes (0.3.1)
 * fluent-plugin-logentries (0.2.10)
-* fluent-plugin-loggly (1.0.0) - forked to fix for new fluentd api
 * fluent-plugin-logzio (0.0.22)
 * fluent-plugin-mail (0.3.0)
 * fluent-plugin-mongo (1.5.0)

diff --git a/base-image/Dockerfile b/base-image/Dockerfile
@@ -11,11 +11,12 @@ ARG RUBYOPT='-W:no-deprecated -W:no-experimental'
 
 ENV PATH $RUBY_PATH/bin:$PATH
 ENV FLUENTD_DISABLE_BUNDLER_INJECTION 1
-ENV  BUILDDEPS="\
+ENV BUILDDEPS="\
       gmp-devel \
       libffi-devel \
       bzip2 \
       shadow \
+      which \
       wget \
       which \
       vim \
@@ -31,7 +32,8 @@ ENV  BUILDDEPS="\
       libedit-devel \
       gdbm \
       gdbm-devel \
-      openssl-devel"
+      openssl-devel \
+      gpg"
 
 RUN tdnf clean all && \
     tdnf upgrade -y && \
@@ -41,67 +43,57 @@ RUN tdnf clean all && \
          procps-ng \
          util-linux \
          systemd \
-         net-tools \
-         gpg && \
-    tdnf install -y $BUILDDEPS && \
+         net-tools && \
     tdnf clean all
 
 SHELL [ "/bin/bash", "-l", "-c" ]
 
 COPY failsafe.conf entrypoint.sh Gemfile Gemfile.lock /fluentd/
 
-# Install the gems with bundler is better practice:
-RUN curl -sSL https://rvm.io/mpapis.asc | gpg --import \
-    && curl -sSL https://rvm.io/pkuczynski.asc | gpg --import \
-    && curl -sSL https://get.rvm.io | bash -s stable \
-    && source /etc/profile.d/rvm.sh \
-    && rvm autolibs disable \
-    && rvm requirements \
-    && rvm install --disable-binary $RUBY_VERSION --default \
-    && gem update --system --no-document \
-    && gem install bundler -v '>= 2.4.13' --default --no-document
-
-## Install dependencies which are not up to date and we need a fork or local download
-RUN mkdir -p /fluentd/log /fluentd/etc /fluentd/plugins /usr/local/bundle/bin/ \
+# Install the gems with bundler is better practice
+# We need to keep this as a single layer because of the builddeps
+# if we split between multiple steps, we need up with the lots of extra files between layers
+RUN tdnf install -y $BUILDDEPS \
+  && curl -sSL https://rvm.io/mpapis.asc | gpg --import \
+  && curl -sSL https://rvm.io/pkuczynski.asc | gpg --import \
+  && curl -sSL https://get.rvm.io | bash -s stable \
+  && source /etc/profile.d/rvm.sh \
+  && rvm autolibs disable \
+  && rvm requirements \
+  && rvm install --disable-binary $RUBY_VERSION --default \
+  && gem update --system --no-document \
+  && gem install bundler -v '>= 2.4.15' --default --no-document \
+  && mkdir -p /fluentd/log /fluentd/etc /fluentd/plugins /usr/local/bundle/bin/ \
   && echo 'gem: --no-document' >> /etc/gemrc \
   && bundle config silence_root_warning true \
   && cd /fluentd \
   && bundle install \
   && cd /fluentd \
-  && git clone https://github.com/javiercri/fluent-plugin-google-cloud.git fluent-plugin-google-cloud \
-  && cd fluent-plugin-google-cloud \
-  && gem build fluent-plugin-google-cloud.gemspec \
-  && gem install fluent-plugin-google-cloud-*.gem \
-  && rm -rf /fluentd/fluent-plugin-google-cloud \
+  && gem specific_install https://github.com/javiercri/fluent-plugin-google-cloud.git \
   && cd /fluentd \
-  && git clone https://github.com/slimm609/fluent-plugin-loggly.git fluent-plugin-loggly \
-  && cd fluent-plugin-loggly \
-  && gem build fluent-plugin-loggly.gemspec \
-  && gem install fluent-plugin-loggly-*.gem \
-  && rm -rf /fluentd/fluent-plugin-loggly
-
-COPY plugins /fluentd/plugins
-
-## Install jemalloc
-RUN wget https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2 -P /tmp \
+  && gem sources --clear-all \
+  && ln -s $(which fluentd) /usr/local/bundle/bin/fluentd \
+  && gem cleanup \
+  && rvm cleanup all \
+  && rm -rf $RVM_PATH/src $RVM_PATH/examples $RVM_PATH/docs $RVM_PATH/archives \
+     $RUBY_PATH/lib/ruby/gems/3.*/cache $RUBY_PATH/lib/ruby/gems/3.*/doc/ \
+     /usr/share/doc /root/.bundle/cache \
+  ## Install jemalloc
+  && curl -sLo /tmp/jemalloc-5.3.0.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2 \
   && tar -C /tmp/ -xjvf /tmp/jemalloc-5.3.0.tar.bz2 \
   && cd /tmp/jemalloc-5.3.0 \
   && ./configure && make \
   && mv -v lib/libjemalloc.so* /usr/lib \
-  && rm -rf /tmp/jemalloc-5.3.0 /tmp/jemalloc-5.3.0.tar.bz2
+  && rm -rf /tmp/jemalloc-5.3.0 /tmp/jemalloc-5.3.0.tar.bz2 \
+  # cleanup build deps
+  && tdnf remove -y $BUILDDEPS \
+  && tdnf clean all
+
+COPY plugins /fluentd/plugins
+
 # Make sure fluentd picks jemalloc 5.3.0 lib as default
 ENV LD_PRELOAD="/usr/lib/libjemalloc.so"
 
-## Remove all fluentd build deps and non needit configs
-RUN cd /fluentd \
-  && gem sources --clear-all \
-  && ln -s $(which fluentd) /usr/local/bundle/bin/fluentd \
-  && tdnf remove -y $BUILDDEPS \
-  && tdnf clean all \
-  && gem uninstall bundler \
-  && gem cleanup \
-  && rm -rf $RUBY_PATH/lib/ruby/gems/3.1.4/cache $RUBY_PATH/lib/ruby/gems/3.1.4/doc/ /usr/share/doc /root/.bundle/cache
-
 EXPOSE 24444 5140
 
 USER root

diff --git a/base-image/Gemfile b/base-image/Gemfile
@@ -53,3 +53,4 @@ gem 'fluent-plugin-webhdfs', '1.5.0'
 gem 'fluent-plugin-detect-exceptions', '~> 0.0.15'
 # webhdfs requires gssapi plugin to work
 gem 'gssapi', '1.3.1'
+gem 'specific_install', '0.3.8'
diff --git a/base-image/Gemfile.lock b/base-image/Gemfile.lock
@@ -1,9 +1,9 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    activemodel (7.0.5)
-      activesupport (= 7.0.5)
-    activesupport (7.0.5)
+    activemodel (7.0.6)
+      activesupport (= 7.0.6)
+    activesupport (7.0.6)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 1.6, < 2)
       minitest (>= 5.1)
@@ -14,32 +14,32 @@ GEM
     amq-protocol (2.3.2)
     attr_required (1.0.1)
     aws-eventstream (1.2.0)
-    aws-partitions (1.781.0)
-    aws-sdk-cloudwatchlogs (1.65.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+    aws-partitions (1.782.0)
+    aws-sdk-cloudwatchlogs (1.66.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sigv4 (~> 1.1)
-    aws-sdk-core (3.175.0)
+    aws-sdk-core (3.176.1)
       aws-eventstream (~> 1, >= 1.0.2)
       aws-partitions (~> 1, >= 1.651.0)
       aws-sigv4 (~> 1.5)
       jmespath (~> 1, >= 1.6.1)
-    aws-sdk-firehose (1.53.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+    aws-sdk-firehose (1.54.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sigv4 (~> 1.1)
-    aws-sdk-kinesis (1.47.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+    aws-sdk-kinesis (1.48.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sigv4 (~> 1.1)
-    aws-sdk-kms (1.67.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+    aws-sdk-kms (1.68.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sigv4 (~> 1.1)
-    aws-sdk-s3 (1.126.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+    aws-sdk-s3 (1.127.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sdk-kms (~> 1)
-      aws-sigv4 (~> 1.4)
-    aws-sdk-sqs (1.58.0)
-      aws-sdk-core (~> 3, >= 3.174.0)
+      aws-sigv4 (~> 1.6)
+    aws-sdk-sqs (1.59.0)
+      aws-sdk-core (~> 3, >= 3.176.0)
       aws-sigv4 (~> 1.1)
-    aws-sigv4 (1.5.2)
+    aws-sigv4 (1.6.0)
       aws-eventstream (~> 1, >= 1.0.2)
     azure-loganalytics-datacollector-api (0.5.0)
       rest-client
@@ -68,7 +68,7 @@ GEM
     elasticsearch-api (8.8.0)
       multi_json
     excon (0.100.0)
-    faraday (2.7.7)
+    faraday (2.7.9)
       faraday-net_http (>= 2.0, < 3.1)
       ruby2_keywords (>= 0.0.4)
     faraday-excon (2.1.0)
@@ -351,6 +351,7 @@ GEM
     sorted_set (1.0.3)
       rbtree
       set (~> 1.0)
+    specific_install (0.3.8)
     strptime (0.2.5)
     swd (2.0.2)
       activesupport (>= 3)
@@ -441,6 +442,7 @@ DEPENDENCIES
   kubeclient (~> 4.9.3)
   logfmt (= 0.0.10)
   oj (= 3.13.23)
+  specific_install (= 0.3.8)
 
 BUNDLED WITH
-   2.4.13
+   2.4.15
diff --git a/config-reloader/Makefile b/config-reloader/Makefile
@@ -79,7 +79,7 @@ push-latest:
 	docker push $(IMAGE):latest
 
 create-test-ns:
-	HUMIO_KEY=$(HUMIO_KEY) LOGGLY_TOKEN=$(LOGGLY_TOKEN) envsubst '$$LOGGLY_TOKEN:$$HUMIO_KEY' < examples/manifests/kfo-test.yaml | kubectl apply -f -
+	HUMIO_KEY=$(HUMIO_KEY) LOGZ_TOKEN=$(LOGZ_TOKEN) envsubst '$$LOGZ_TOKEN:$$HUMIO_KEY' < examples/manifests/kfo-test.yaml | kubectl apply -f -
 
 delete-test-ns:
 	kubectl delete -f examples/manifests/kfo-test.yaml

diff --git a/config-reloader/examples/manifests/kfo-test.yaml b/config-reloader/examples/manifests/kfo-test.yaml
@@ -174,13 +174,13 @@ data:
         with_namespace no-such-namespace
       </store>
       <store>
-        @type loggly
-        loggly_url https://logs-01.loggly.com/inputs/$LOGGLY_TOKEN/tag/fluentd
+        @type logzio_buffered
+        endpoint_url https://listener.logz.io:8071?token=$LOGZ_TOKEN
       </store>
     </match>
     <match **>
-      @type loggly
-      loggly_url https://logs-01.loggly.com/inputs/$LOGGLY_TOKEN/tag/fluentd
+      @type logzio_buffered
+      endpoint_url https://listener.logz.io:8071?token=$LOGZ_TOKEN
     </match>
 
 ---

diff --git a/config-reloader/examples/my-favorite-namespace.conf b/config-reloader/examples/my-favorite-namespace.conf
@@ -15,12 +15,12 @@
   buffer_size 7m
 </match>
 
-# all the rest goes to loggly. ** gets expanded to kube.{namespace}.**
+# all the rest goes to logz. ** gets expanded to kube.{namespace}.**
 <match **>
    @type copy
    <store>
-    @type loggly
-    loggly_url https://logs-01.loggly.com/inputs/TOKEN/tag/fluentd
+    @type logzio_buffered
+    endpoint_url https://listener.logz.io:8071?token=$LOGZ_TOKEN
     buffer_path /test
    </store>
    <store>

diff --git a/config-reloader/processors/extract_plugins_test.go b/config-reloader/processors/extract_plugins_test.go
@@ -10,7 +10,7 @@ import (
 func TestExtracPluginsFromKubeSystem(t *testing.T) {
 	s := `
 	<match kube.kube-system.**>
-	  @type loggly
+	  @type logzio_buffered
 	</match>
 
 	<plugin p1>

diff --git a/config-reloader/processors/share_test.go b/config-reloader/processors/share_test.go
@@ -69,8 +69,8 @@ func TestProcessShareDirectiveFromReceivingNs(t *testing.T) {
 	//   </store>
 	// </match>
 	// <match **>
-	//   @type loggly
-	//   loggly_url https://logs-01.loggly.com/inputs/$LOGGLY_TOKEN/tag/fluentd
+	//   @type logzio_buffered
+	//   endpoint_url https://listener.logz.io:8071?token=$LOGZ_TOKEN
 	// </match>
 	//`