Kubernetes GKE setup (Batteries included)

Photo by Growtika on Unsplash

Kubernetes GKE setup (Batteries included)

From Terraform setup to Cluster autoscaler, wordpress app with HPA configured, ingress controller, monitoring, loadtesting

ยท

10 min read

Things to look for

this blog post is a continuation of

what all things to consider before we can start with the configurations

  • Our main goal of creating this blog is to get an idea how can we configure Kubernetes managed cluster with our desired application, which is highly available with Horizontal pod Autoscale, health checks for wordpress.

  • It contains some optimizations for wordpress as well but should not be replicated in your own production env

  • it will provision infra via Terraform / OpenTofu

  • Cluster is capable of autoscale

  • manifests for deploying yourself

  • We will be provisioning GKE cluster (where wordpress, ingress controller will be running), Google SQL for wordpress

  • Make sure you have gcloud configured to that particular project and the specific APIs are enabled!

Infrastructure part

Terraform

provider that we are going to use

### providers.tf

terraform {
  required_providers {
    local = {
      source  = "hashicorp/local"
      version = "~> 2.5"
    }
    google = {
      source  = "hashicorp/google"
      version = "~> 5.23"
    }
    null = {
      source  = "hashicorp/null"
      version = "= 3.2.2"
    }
    random = {
      source  = "hashicorp/random"
      version = "~> 3.6"
    }
  }
}

provider "google" {
  credentials = file(var.google_credentials_file)
  project     = var.project_id
  region      = var.region
}

provider "local" {
}

provider "null" {
}

provider "random" {
}

Variables we are going to use change according to your customizations

### vars.tf
variable "project_id" {
  description = "Your Google Cloud project ID"
  type        = string
}

variable "google_credentials_file" {
  description = "google credentials file execute $ gcloud auth application-default login"
  default     = "/Users/dipankardas/.config/gcloud/application_default_credentials.json"
}

variable "label_node_pool" {
  type = map(string)
  default = {
    environment = "dev"
    type        = "init-custom"
  }
}

variable "region" {
  description = "Google Cloud region where resources will be deployed"
  default     = "asia-south1"
  type        = string
}

variable "cluster_name" {
  description = "Name of the GKE cluster"
  default     = "wordpress-k8s"
  type        = string
}

variable "node_machine_type" {
  description = "Machine type for GKE nodes"
  default     = "e2-medium"
  type        = string
}

variable "node_disk_size_gb" {
  description = "Disk size for GKE nodes (in GB)"
  default     = 30
  type        = number
}

variable "max_cluster_mem_limit" {
  description = "maximunm number of memory for the entire cluster"
  default     = 64
  type        = number
}

variable "min_cluster_mem_limit" {
  description = "maximunm number of memory for the entire cluster"
  default     = 8
  type        = number
}

variable "max_cluster_cpu_limit" {
  description = "maximunm number of memory for the entire cluster"
  default     = 32
  type        = number
}

variable "min_cluster_cpu_limit" {
  description = "maximunm number of memory for the entire cluster"
  default     = 4
  type        = number
}

variable "sql_instance_name" {
  description = "Name of the Cloud SQL instance"
  default     = "wordpress-sql-instance"
  type        = string
}

variable "sql_tier" {
  description = "Tier for the Cloud SQL instance"
  default     = "db-f1-micro"
}

variable "sql_db" {
  description = "sql database name"
  default     = "wordpress"
}

variable "sql_user" {
  description = "sql username"
  default     = "wordpress"
}

variable "sql_iam_name" {
  default = "cloudsql-proxy"
}

variable "k8s_ns" {
  default = "demo"
}

variable "k8s_certmanager_version" {
  default = "1.14.3"
}

variable "k8s_nginx_ingress_version" {
  default = "1.10.0"
}

Main terraform resources to provision the cluster as well as the terr

####### main.tf
resource "google_container_node_pool" "primary_preemptible_nodes" {
  name     = "custom-pool"
  cluster  = google_container_cluster.my_cluster.id
  location = var.region

  initial_node_count = 1 # NOTE: its no of nodes per zone
  node_config {
    preemptible  = true
    machine_type = var.node_machine_type
    disk_size_gb = var.node_disk_size_gb

    oauth_scopes = [
      "https://www.googleapis.com/auth/cloud-platform",
      "https://www.googleapis.com/auth/logging.write",
      "https://www.googleapis.com/auth/monitoring"
    ]
    labels = var.label_node_pool
  }
  autoscaling {
    total_max_node_count = 6
    total_min_node_count = 3
  }

  management {
    auto_repair  = true
    auto_upgrade = true
  }
  provisioner "local-exec" {
    command     = "gcloud container clusters get-credentials ${var.cluster_name} --project ${var.project_id} --region ${var.region} && kubectl cluster-info"
    interpreter = ["/bin/zsh", "-c"]
    working_dir = path.module
  }
}

resource "null_resource" "enable_api" {
  provisioner "local-exec" {
    command     = "gcloud services enable container.googleapis.com sqladmin.googleapis.com serviceusage.googleapis.com"
    interpreter = ["/bin/zsh", "-c"]
    working_dir = path.module
  }
}

resource "google_container_cluster" "my_cluster" {
  depends_on = [
    null_resource.enable_api
  ]
  name                = var.cluster_name
  location            = var.region
  deletion_protection = false
  # datapath_provider   = "ADVANCED_DATAPATH"

  # NOTE We can't create a cluster with no node pool defined, but we want to only use
  # separately managed node pools. So we create the smallest possible default
  # node pool and immediately delete it.
  initial_node_count       = 1 # NOTE: its no of nodes per zone
  remove_default_node_pool = true
  cluster_autoscaling {
    enabled = true
    resource_limits {
      resource_type = "cpu"
      maximum       = var.max_cluster_cpu_limit
      minimum       = var.min_cluster_cpu_limit
    }
    resource_limits {
      resource_type = "memory"
      maximum       = var.max_cluster_mem_limit
      minimum       = var.min_cluster_mem_limit
    }

    auto_provisioning_defaults {
      disk_size = var.node_disk_size_gb
      oauth_scopes = [
        "https://www.googleapis.com/auth/cloud-platform",
        "https://www.googleapis.com/auth/logging.write",
        "https://www.googleapis.com/auth/monitoring"
      ]
    }
  }
  vertical_pod_autoscaling {
    enabled = true
  }
  addons_config {
    horizontal_pod_autoscaling {
      disabled = false
    }
  }
}

resource "google_sql_database" "database_deletion_policy" {
  name            = var.sql_db
  instance        = google_sql_database_instance.my_sql_instance.name
  deletion_policy = "ABANDON"
}

resource "random_password" "password" {
  length           = 16
  special          = true
  override_special = "!#$%&*()-_=+[]{}<>:?"
}

resource "google_sql_user" "users" {
  name     = var.sql_user
  instance = google_sql_database_instance.my_sql_instance.name
  host     = "%"
  password = random_password.password.result
}

resource "google_sql_database_instance" "my_sql_instance" {
  depends_on = [
    null_resource.enable_api
  ]
  name                = var.sql_instance_name
  database_version    = "MYSQL_8_0"
  region              = var.region
  deletion_protection = "false"
  settings {
    tier = var.sql_tier
  }
}

resource "google_service_account" "my_service_account" {

  depends_on = [
    google_container_node_pool.primary_preemptible_nodes
  ]

  account_id   = var.sql_iam_name
  display_name = var.sql_iam_name
}

resource "google_project_iam_binding" "cloudsql_binding" {
  project = var.project_id
  role    = "roles/cloudsql.client"

  members = [
    "serviceAccount:${google_service_account.my_service_account.email}",
  ]
}

resource "google_service_account_key" "my_key" {
  service_account_id = google_service_account.my_service_account.name
}

resource "local_file" "myaccountjson" {
  content  = base64decode(google_service_account_key.my_key.private_key)
  filename = "key.json"
  depends_on = [
    google_sql_database.database_deletion_policy,
    google_sql_user.users
  ]

  provisioner "local-exec" {
    environment = {
      "NAMESPACE"        = "${var.k8s_ns}"
      "SQL_USR"          = "${var.sql_user}"
      "SQL_PASS"         = random_password.password.result
      "CERTMANAGER_VER"  = "${var.k8s_certmanager_version}"
      "NGINXINGRESS_VER" = "${var.k8s_nginx_ingress_version}"
    }
    command     = "./post-install.sh"
    interpreter = ["/bin/bash", "-c"]
    working_dir = path.module
  }
}

output "sql_connection_name" {
  value = google_sql_database_instance.my_sql_instance.connection_name
}

Create a Script with the post-install.sh

#!/bin/bash

PS4='+\[\033[0;33m\](\[\033[0;36m\]${BASH_SOURCE##*/}:${LINENO}\[\033[0;33m\])\[\033[0m\] '

set -xe

# create the namespace where our workload is going to be deployed
kubectl create ns $NAMESPACE || echo "already created namespace"

# create the secrets for wordpress to work
kubectl delete secret cloudsql-db-credentials --namespace=$NAMESPACE || echo "cloudsql-db-credentials not there"
kubectl create secret generic cloudsql-db-credentials --from-literal=username=$SQL_USR --from-literal=password=$SQL_PASS --namespace=$NAMESPACE

kubectl delete secret cloudsql-instance-credentials --namespace=$NAMESPACE || echo "cloudsql-instance-credentials not there"
kubectl create secret generic cloudsql-instance-credentials --from-file key.json --namespace=$NAMESPACE

# install certmanager and nginx controller for HTTPS and ingress
helm repo add cert-manager https://charts.jetstack.io

kubectl create ns cert-manager || echo "already created namespace"

helm install my-cert-manager cert-manager/cert-manager --version $CERTMANAGER_VER --set installCRDs=true --set global.leaderElection.namespace=cert-manager || echo "already there"

kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v$NGINXINGRESS_VER/deploy/static/provider/cloud/deploy.yaml
โš 
WARN: make sure you executed this command before you run the terraform command as this stores the tokens all required for the terraform to work and it stores in your local system directory for me it was /Users/dipankardas/.config/gcloud/application_default_credentials.json, change the value of var.google_credentials_file
#!/bin/bash
gcloud init
gcloud auth application-default login

Once these things are done you can run terraform module and apply

export TF_VAR_project_id="<project>"

tf init
tf plan
tf apply

tf destroy # use this to remove resources

Kubernetes deployment for wordpress

this is the folder structure

ingress.yaml

apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: kubeissuer-wp
  namespace: demo
spec:
  acme:
    server: https://acme-v02.api.letsencrypt.org/directory
    email: <>
    privateKeySecretRef:
      name: kubeissuer-wp
    solvers:
    - http01:
        ingress:
          class: nginx
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
  namespace: demo
  name: kubecert-wp
spec:
  secretName: tls-wp
  issuerRef:
    name: kubeissuer-wp
    kind: ClusterIssuer
  commonName: <>
  dnsNames:
  - <>
---

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  annotations:
    cert-manager.io/cluster-issuer: kubeissuer-wp
    kubernetes.io/ingress.class: nginx
  namespace: demo
  name: kube-certs-ingress
spec:
  ingressClassName: nginx
  tls:
  - hosts:
      - <>
    secretName: tls-wp

  rules:
  - host: <>
    http:
      paths:
      - backend:
          service:
            name: wordpress
            port:
              number: 80
        path: /
        pathType: Prefix

environment/staging/kustomization.yaml

# Only chalenge which is not addressed
# is how to configure the no of replicas if using gitops tool like argocd
# other than that you configure the newTag with your gitops pipeline
# using j2 templating

namespace: demo
replicas:
  - name: wordpress
    count: 1
images:
  - newName: ghcr.io/<>
    name: placeholder-base-image
    newTag: 66d9bf1
resources:
  - ../../base
patches:
- target:
    group: apps
    version: v1
    kind: Deployment
    name: wordpress
  patch: |-
    - op: replace
      path: /spec/template/spec/containers/2/command
      value:
        - /cloud_sql_proxy
        - -instances=<GET YOUR MYSQL INSTANCE CONNECTION NAME FROM OUTPUT OF TERRAFORM APPLY>=tcp:3306
        - -credential_file=/secrets/cloudsql/key.json

Next is the big file

base/site.yaml

๐Ÿ’ก
Note: Also another thing to note is I have used a custom wordpress image, feel free to use wordpress-fpm official image. for that just modify the above file images[0].newName
๐Ÿ˜‡
HPA is used to configure when should the wordpress should scale. also you can optimize the thresholds

You can configure the resource limits for the php

---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: wordpress-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: wordpress
  minReplicas: 1
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: AverageValue
        averageValue: 350Mi
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: fpm-conf
data:
  www.conf: |
    [www]
    user = www-data
    group = www-data
    listen = 127.0.0.1:9000
    listen.backlog = 65535
    pm = dynamic
    pm.max_children = 205
    pm.start_servers = 16
    pm.min_spare_servers = 16
    pm.max_spare_servers = 32
    pm.max_requests = 500
    ping.path = /healthz
    ping.response = ok
    request_terminate_timeout = 300
    php_admin_value[memory_limit] = 2048M
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: nginx-conf
data:
  default.conf: |
    server {

            server_name _;

            index index.php index.html index.htm;

            root /var/www/html;

            location / {
                    try_files $uri $uri/ /index.php$is_args$args;
            }

            location ~ ^/(healthz)$ {
                include fastcgi_params;
                fastcgi_pass 127.0.0.1:9000;
                fastcgi_param SCRIPT_FILENAME $fastcgi_script_name;
            }

            location ~ \.php$ {
                    try_files $uri =404;
                    fastcgi_split_path_info ^(.+\.php)(/.+)$;
                    fastcgi_pass 127.0.0.1:9000;
                    fastcgi_index index.php;
                    include fastcgi_params;
                    fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
                    fastcgi_param PATH_INFO $fastcgi_path_info;
            }

            location ~ /\.ht {
                    deny all;
            }

            location = /favicon.ico {
                    log_not_found off; access_log off;
            }
            location = /robots.txt {
                    log_not_found off; access_log off; allow all;
            }
            location ~* \.(css|gif|ico|jpeg|jpg|js|png)$ {
                    expires max;
                    log_not_found off;
            }
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: wordpress
spec:
  selector:
    matchLabels:
      app: wordpress
  template:
    metadata:
      labels:
        app: wordpress
    spec:
      volumes:
        - name: share-volumes
          emptyDir: {}

        - configMap:
            name: fpm-conf
          name: fpm-conf
        - configMap:
            name: nginx-conf
          name: config
        - name: cloudsql-instance-credentials
          secret:
            defaultMode: 420
            secretName: cloudsql-instance-credentials
      initContainers:
        - name: volume-format
          image: placeholder-base-image
          command: ["cp", "-rv", "/var/www/html/.", "/mnt"]
          volumeMounts:
            - name: share-volumes
              mountPath: /mnt
      containers:
        - name: wordpress
          image: placeholder-base-image
          resources:
            limits:
              memory: "300Mi"
              cpu: "500m"
          volumeMounts:
            - name: share-volumes
              mountPath: /var/www/html
            - mountPath: /usr/local/etc/php-fpm.d/www.conf
              name: fpm-conf
              subPath: www.conf
          readinessProbe:
            tcpSocket:
              port: 9000
            initialDelaySeconds: 15
            periodSeconds: 10
          livenessProbe:
            tcpSocket:
              port: 9000
            initialDelaySeconds: 15
            periodSeconds: 10
          env:
            - name: WORDPRESS_DB_HOST
              value: 127.0.0.1:3306
            - name: WORDPRESS_DB_USER
              valueFrom:
                secretKeyRef:
                  key: username
                  name: cloudsql-db-credentials
            - name: WORDPRESS_DB_PASSWORD
              valueFrom:
                secretKeyRef:
                  key: password
                  name: cloudsql-db-credentials
          ports:
            - containerPort: 9000
              name: fpm

        - name: nginx
          image: nginx:alpine
          resources:
            limits:
              memory: "100Mi"
              cpu: "50m"
          volumeMounts:
            - mountPath: /etc/nginx/conf.d/default.conf
              name: config
              subPath: default.conf
            - name: share-volumes
              mountPath: /var/www/html
          livenessProbe:
            httpGet:
              path: "/healthz"
              port: 80
              httpHeaders:
                - name: Host
                  value: localhost
            initialDelaySeconds: 3
            periodSeconds: 3
          readinessProbe:
            tcpSocket:
              port: 80
            initialDelaySeconds: 15
            periodSeconds: 10
          ports:
            - containerPort: 80
              name: web

        - name: cloudsql-proxy
          command:
            - /cloud_sql_proxy
            - -instances=INSTANCE_CONNECTION_NAME=tcp:3306
            - -credential_file=/secrets/cloudsql/key.json
          image: gcr.io/cloudsql-docker/gce-proxy:1.33.2
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              memory: "100Mi"
              cpu: "100m"
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop:
                - NET_RAW
            runAsUser: 2
          volumeMounts:
            - mountPath: /secrets/cloudsql
              name: cloudsql-instance-credentials
              readOnly: true

---
apiVersion: v1
kind: Service
metadata:
  name: wordpress
spec:
  selector:
    app: wordpress
  ports:
    - port: 80
      name: web
    - port: 9000
      name: fpm

base/kustomization.yaml

resources:
# - namespace.yaml
- site.yaml

to apply the kustomize apply

kubectl apply -k environment/staging/

# for the ingress thing you first need to grab the ExternalIp from the
# service type loadbabalcer of the nginx ingress controller deployed!
# and set the DNS A record
# modify the ingress.yaml
kubectl apply -f ingress.yaml

From here you can head over to your Domain to complete the setup for the wordpress

Lets do LoadTesting to check the power of HPA

for that we will be using k6

import http from "k6/http";
import { check, sleep } from "k6";

export const options = {
  stages: [
    { duration: "30s", target: 20 },
    { duration: "1m", target: 150 },
    { duration: "5m", target: 500 },
    { duration: "10m", target: 1500 },
    { duration: "6m", target: 500 },
    { duration: "30s", target: 0 },
  ],
};

export default function () {
  const res = http.get("https://<your domain>");
  check(res, { "status was 200": (r) =>{ 
    console.log(r.status)
    return r.status == 200
  } 
  });
  sleep(1);
}
k6 run script.js

we can also see what all components of the pod had a lot of resource utilization

wordpress-fpm and the mysql got bottleneck due to a lot of traffic so you can adjust the limits and also choose a bigger Node size so that the cluster gets time to provision a new node and subsequent wordpress pod health check before it can serve the traffic (Its all small small optimizations)

For all the monitoring you can deploy kube-prometheus stack if you are getting started or new to monitoring

It install prometheus with grafana

helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm install my-kube-prometheus-stack prometheus-community/kube-prometheus-stack --version 57.2.0

ย