Kubernetes Logging with Fluentd and Elasticsearch

The EFK (Elasticsearch, Fluentd, Kibana) stack provides comprehensive log aggregation and analysis for Kubernetes clusters. This guide covers deploying the EFK stack, configuring Fluentd as a DaemonSet, parsing logs with ConfigMaps, managing Elasticsearch indices, and visualizing logs with Kibana dashboards on your VPS and baremetal infrastructure.

Table of Contents

EFK Stack Overview

Components

Elasticsearch: Distributed search and analytics engine

  • Stores logs as documents
  • Provides full-text search
  • Scalable and highly available

Fluentd: Unified logging layer

  • Collects logs from containers
  • Parses and enriches log data
  • Routes logs to Elasticsearch

Kibana: Visualization platform

  • Query and analyze logs
  • Create dashboards
  • View log events in real-time

Architecture

Kubernetes Cluster
├── Container Logs (stdout/stderr)
├── Node Logs (/var/log)
└── System Logs
        ↓
   Fluentd DaemonSet
   (runs on every node)
        ↓
   Log Processing
   (parse, enrich, route)
        ↓
   Elasticsearch Cluster
   (distributed storage)
        ↓
   Kibana
   (visualization)

Elasticsearch Installation

Prerequisites

  • Kubernetes v1.16+
  • 3+ nodes recommended (for production)
  • 8GB+ RAM per Elasticsearch node
  • 20GB+ storage per node

Installing Elasticsearch via Helm

# Add Helm repository
helm repo add elastic https://Helm.elastic.co
helm repo update

# Create namespace
kubectl create namespace logging

# Install Elasticsearch
helm install elasticsearch elastic/elasticsearch \
  -n logging \
  -f elasticsearch-values.yaml

Elasticsearch Values File

# elasticsearch-values.yaml
replicas: 3

nodeGroup: data

volumeClaimTemplate:
  accessModes: [ "ReadWriteOnce" ]
  resources:
    requests:
      storage: 30Gi
  storageClassName: fast-ssd

resources:
  limits:
    cpu: 1000m
    memory: 2Gi
  requests:
    cpu: 500m
    memory: 1Gi

env:
  ELASTIC_PASSWORD: "SetStrongPassword123"
  discovery.type: "multi-node"
  cluster.initial_master_nodes:
  - "elasticsearch-0"
  - "elasticsearch-1"
  - "elasticsearch-2"

esConfig:
  elasticsearch.yml: |
    xpack.security.enabled: true
    xpack.security.enrollment.enabled: true
    xpack.security.http.ssl:
      enabled: true
      keystore.path: certs/http.p12
    xpack.security.transport.ssl:
      enabled: true
      verification_mode: certificate
      keystore.path: certs/transport.p12
      truststore.path: certs/transport.p12

Verify Elasticsearch

# Wait for ready
kubectl rollout status -n logging statefulset/elasticsearch --timeout=5m

# Check cluster health
kubectl port-forward -n logging svc/elasticsearch 9200:9200 &

# Verify
curl -u elastic:SetStrongPassword123 http://localhost:9200/_cluster/health

Fluentd Configuration

Fluentd DaemonSet

apiVersion: v1
kind: ServiceAccount
metadata:
  name: fluentd
  namespace: logging
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: fluentd
rules:
- apiGroups: [""]
  resources:
  - pods
  - namespaces
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: fluentd
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: fluentd
subjects:
- kind: ServiceAccount
  name: fluentd
  namespace: logging
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: fluentd-config
  namespace: logging
data:
  fluent.conf: |
    <source>
      @type tail
      @id input_tail_container_logs
      path /var/log/containers/*.log
      pos_file /var/log/fluentd-containers.log.pos
      tag kubernetes.*
      read_from_head true
      <parse>
        @type json
        time_format %Y-%m-%dT%H:%M:%S.%NZ
      </parse>
    </source>

    <filter kubernetes.**>
      @type kubernetes_metadata
      @id filter_kube_metadata
      kubernetes_url "#{ENV['FLUENT_FILTER_KUBERNETES_URL'] || 'https://' + ENV.fetch('KUBERNETES_SERVICE_HOST') + ':' + ENV.fetch('KUBERNETES_SERVICE_PORT') + '/api'}"
      verify_ssl "#{ENV['KUBERNETES_VERIFY_SSL'] || true}"
      ca_file "#{ENV['KUBERNETES_CA_FILE']}"
      skip_labels "#{ENV['FLUENT_FILTER_KUBERNETES_SKIP_LABELS'] || 'false'}"
      skip_container_metadata "#{ENV['FLUENT_FILTER_KUBERNETES_SKIP_CONTAINER_METADATA'] || 'false'}"
      skip_master_url "#{ENV['FLUENT_FILTER_KUBERNETES_SKIP_MASTER_URL'] || 'false'}"
    </filter>

    <match **>
      @type elasticsearch
      @id output_elasticsearch
      @log_level info
      include_tag_key true
      host elasticsearch
      port 9200
      path /
      scheme "#{ENV['FLUENT_ELASTICSEARCH_SCHEME'] || 'http'}"
      ssl_verify_mode "#{ENV['ELASTICSEARCH_SSL_VERIFY_MODE'] || 'ssl_verify_none'}"
      ssl_version "#{ENV['ELASTICSEARCH_SSL_VERSION'] || 'TLSv1_2'}"
      logstash_format true
      logstash_prefix logstash
      logstash_dateformat %Y.%m.%d
      include_timestamp false
      type_name "_doc"
      <buffer>
        @type file
        path /var/log/fluentd-buffers/kubernetes.system.buffer
        flush_mode interval
        retry_type exponential_backoff
        flush_interval 5s
        retry_forever false
        retry_max_interval 30
        retry_max_times 17
        chunk_limit_size "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_CHUNK_LIMIT_SIZE'] || '8M'}"
        queue_limit_length 8
        overflow_action block
      </buffer>
      <elasticsearch_dynamic_config>
        enable_retry_on_failure true
        max_retry_get_es_version 1
        max_retry_putting_template 10
        check_connection_on_startup true
      </elasticsearch_dynamic_config>
    </match>
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: fluentd
  namespace: logging
  labels:
    k8s-app: fluentd-logging
spec:
  selector:
    matchLabels:
      k8s-app: fluentd-logging
  template:
    metadata:
      labels:
        k8s-app: fluentd-logging
    spec:
      serviceAccount: fluentd
      serviceAccountName: fluentd
      tolerations:
      - key: node-role.kubernetes.io/master
        effect: NoSchedule
      - key: node-role.kubernetes.io/control-plane
        effect: NoSchedule
      containers:
      - name: fluentd
        image: fluent/fluentd-kubernetes-daemonset:v1-debian-elasticsearch
        env:
        - name: FLUENT_ELASTICSEARCH_HOST
          value: elasticsearch
        - name: FLUENT_ELASTICSEARCH_PORT
          value: "9200"
        - name: FLUENT_ELASTICSEARCH_SCHEME
          value: http
        - name: FLUENTD_SYSTEMD_CONF
          value: disable
        volumeMounts:
        - name: varlog
          mountPath: /var/log
        - name: varlibdockercontainers
          mountPath: /var/lib/docker/containers
          readOnly: true
        - name: config
          mountPath: /fluentd/etc/fluent.conf
          subPath: fluent.conf
      volumes:
      - name: varlog
        hostPath:
          path: /var/log
      - name: varlibdockercontainers
        hostPath:
          path: /var/lib/docker/containers
      - name: config
        configMap:
          name: fluentd-config

Kibana Setup

Installing Kibana

helm install kibana elastic/kibana \
  -n logging \
  -f kibana-values.yaml

Kibana Values

# kibana-values.yaml
replicaCount: 1

elasticsearchHosts: "http://elasticsearch:9200"
elasticsearchUsername: elastic
elasticsearchPassword: "SetStrongPassword123"

service:
  type: ClusterIP
  port: 5601

resources:
  limits:
    cpu: 1000m
    memory: 1Gi
  requests:
    cpu: 100m
    memory: 256Mi

ingress:
  enabled: true
  ingressClassName: nginx
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
  hosts:
  - host: kibana.example.com
    paths:
    - path: /
      pathType: Prefix
  tls:
  - secretName: kibana-tls
    hosts:
    - kibana.example.com

Accessing Kibana

# Port forward
kubectl port-forward -n logging svc/kibana 5601:5601

# Access: http://localhost:5601

Index Management

Creating Index Patterns

In Kibana:

  1. Management → Stack Management → Index Patterns
  2. Create Index Pattern
  3. Index Pattern: logstash-*
  4. Time Field: @timestamp
  5. Create Pattern

Index Lifecycle Management (ILM)

Set up automatic index cleanup:

# Configure index template with ILM
curl -u elastic:password -X PUT "localhost:9200/_ilm/policy/logs-policy" \
  -H "Content-Type: application/json" \
  -d '{
    "policy": "logs-policy",
    "phases": {
      "hot": {
        "min_age": "0d",
        "actions": {
          "rollover": {
            "max_primary_shard_size": "50gb",
            "max_age": "1d"
          }
        }
      },
      "warm": {
        "min_age": "7d",
        "actions": {
          "set_priority": {
            "priority": 50
          }
        }
      },
      "cold": {
        "min_age": "30d",
        "actions": {
          "set_priority": {
            "priority": 0
          }
        }
      },
      "delete": {
        "min_age": "90d",
        "actions": {
          "delete": {}
        }
      }
    }
  }'

Monitoring Indices

# List indices
curl -u elastic:password http://localhost:9200/_cat/indices

# Get index size
curl -u elastic:password http://localhost:9200/_cat/indices?v=true&s=store.size:desc

Log Parsing

Custom Log Parsing

Update Fluentd ConfigMap with custom parsers:

<source>
  @type tail
  path /var/log/containers/*_production_*.log
  pos_file /var/log/fluentd-production.pos
  tag kubernetes.production.*
  <parse>
    @type json
    time_format %Y-%m-%dT%H:%M:%S.%NZ
  </parse>
</source>

<filter kubernetes.production.**>
  @type parser
  key_name log
  <parse>
    @type multiline
    format_firstline /\d{4}-\d{2}-\d{2}/
    format1 /^(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d+) \[(?<level>\w+)\] (?<message>.*)/
    time_format %Y-%m-%d %H:%M:%S.%L
  </parse>
</filter>

Filtering and Enriching

<filter kubernetes.**>
  @type record_transformer
  enable_ruby true
  <record>
    cluster_name "production"
    environment "prod"
    hostname "#{Socket.gethostname}"
  </record>
</filter>

<filter kubernetes.**>
  @type grep
  <exclude>
    key log
    pattern /health.*200/
  </exclude>
</filter>

Practical Examples

Example: Complete EFK Stack Deployment

#!/bin/bash

echo "=== Creating logging namespace ==="
kubectl create namespace logging

echo "=== Installing Elasticsearch ==="
helm repo add elastic https://Helm.elastic.co
helm repo update

helm install elasticsearch elastic/elasticsearch \
  -n logging \
  --set replicas=3 \
  --set "env.ELASTIC_PASSWORD=LoggingPassword123" \
  --set "volumeClaimTemplate.resources.requests.storage=30Gi"

echo "=== Installing Kibana ==="
helm install kibana elastic/kibana \
  -n logging \
  --set elasticsearchPassword=LoggingPassword123

echo "=== Installing Fluentd ==="
kubectl apply -f fluentd-daemonset.yaml

echo "=== Waiting for stack ==="
kubectl rollout status -n logging statefulset/elasticsearch --timeout=5m
kubectl rollout status -n logging deployment/kibana --timeout=5m
kubectl rollout status -n logging daemonset/fluentd --timeout=5m

echo "=== EFK Stack deployed ==="
kubectl port-forward -n logging svc/kibana 5601:5601 &
echo "Access Kibana: http://localhost:5601"

Example: Monitoring Application Logs

apiVersion: v1
kind: ConfigMap
metadata:
  name: fluentd-app-config
  namespace: logging
data:
  fluent-app.conf: |
    # Monitor application logs
    <source>
      @type tail
      path /var/log/containers/*_production_app-*.log
      pos_file /var/log/app.pos
      tag app.production
      <parse>
        @type json
      </parse>
    </source>

    # Add app metadata
    <filter app.**>
      @type record_transformer
      <record>
        app_name "myapp"
        service_name "backend"
      </record>
    </filter>

    # Route to Elasticsearch
    <match app.**>
      @type elasticsearch
      host elasticsearch
      port 9200
      logstash_format true
      logstash_prefix "app-logs"
    </match>

Conclusion

The EFK stack provides comprehensive logging and monitoring for Kubernetes clusters on VPS and baremetal infrastructure. By deploying Fluentd as a DaemonSet, parsing logs with ConfigMaps, and visualizing with Kibana, you create a powerful troubleshooting and monitoring platform. Start with basic log collection, gradually add custom parsing rules, implement index lifecycle management for cost optimization, and leverage Kibana dashboards for operational insights. Regular maintenance of indices and Elasticsearch cluster health ensures reliable logging infrastructure supporting your production deployments.