diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5dfe310 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.terraform* +terraform.tfstate* diff --git a/README.md b/README.md new file mode 100644 index 0000000..18bfbdf --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +# (no name yet) + +This is a Terraform configuration to deploy a Kubernetes cluster on +[Oracle Cloud Infrastructure][oci]. It creates a few virtual machines +and uses [kubeadm] to install a Kubernetes control plane on the first +machine, and join the other machines as worker nodes. + +By default, it deploys a 4-node cluster using ARM machines. Each machine +has 1 OCPU and 6 GB of RAM, which means that the cluster fits within +Oracle's (pretty generous if you ask me) [free tier][freetier]. + +**It is not meant to run production workloads,** +but it's great if you want to learn Kubernetes with a "real" cluster +(i.e. a cluster with multiple nodes) without breaking the bank, *and* +if you want to develop or test applications on ARM. + +## Getting started + +1. Create an Oracle Cloud Infrastructure account. +2. Configure OCI credentials. (FIXME) +3. `terraform apply` + +That's it! + +At the end of the `terraform apply`, a `kubeconfig` file is generated +in this directory. To use your new cluster, you can do: + +```bash +export KUBECONFIG=$PWD/kubeconfig +kubectl get nodes +``` + +The command above should show you 4 nodes, named `node1` to `node4`. + +You can also log into the VMs. At the end of the Terraform output +you should see a command that you can use to SSH into the first VM +(just copy-paste the command). + +## Customization + +Check `variables.tf` to see tweakable parameters. You can change the number +of nodes, the size of the nodes, or switch to Intel/AMD instances if you'd +like. Keep in mind that if you switch to Intel/AMD instances, you won't get +advantage of the free tier. + +## Stopping the cluster + +`terraform destroy` + +## Implementation details + +This Terraform configuration: + +- generates an OpenSSH keypair and a kubeadm token +- deploys 4 VMs using Ubuntu 20.04 +- uses cloud-init to install and configure everything +- installs Docker and Kubernetes packages +- runs `kubeadm init` on the first VM +- runs `kubeadm join` on the other VMs +- installs the Weave CNI plugin +- transfers the `kubeconfig` file generated by `kubeadm` +- patches that file to use the public IP address of the machine + +## Caveats + +There is no cloud controller manager, which means that you cannot +create services with `type: LoadBalancer`; or rather, if you create +such services, their `EXTERNAL-IP` will remain ``. + +To expose services, use `NodePort`. + +Likewise, there is no ingress controller and no storage class. + +(These might be added in a later iteration of this project.) + +## Remarks + +Oracle Cloud also has a managed Kubernetes service called +[Container Engine for Kubernetes (or OKE)][oke]. That service +doesn't have the caveats mentioned above; however, it's not part +of the free tier. + +[freetier]: https://www.oracle.com/cloud/free/ +[kubeadm]: https://kubernetes.io/docs/reference/setup-tools/kubeadm/ +[oci]: https://www.oracle.com/cloud/compute/ +[oke]: https://www.oracle.com/cloud-native/container-engine-kubernetes/ diff --git a/cloudinit.tf b/cloudinit.tf new file mode 100644 index 0000000..0b6c861 --- /dev/null +++ b/cloudinit.tf @@ -0,0 +1,164 @@ +locals { + packages = [ + "apt-transport-https", + "build-essential", + "ca-certificates", + "curl", + "docker.io", + "jq", + "kubeadm", + "kubelet", + "lsb-release", + "make", + "prometheus-node-exporter", + "python3-pip", + "software-properties-common", + "tmux", + "tree", + "unzip", + ] +} + +data "cloudinit_config" "_" { + for_each = local.nodes + + part { + filename = "cloud-config.cfg" + content_type = "text/cloud-config" + content = <<-EOF + hostname: ${each.value.node_name} + package_update: true + package_upgrade: false + packages: + ${yamlencode(local.packages)} + apt: + sources: + kubernetes.list: + source: "deb https://apt.kubernetes.io/ kubernetes-xenial main" + key: | + ${indent(8, data.http.apt_repo_key.body)} + users: + - default + - name: k8s + primary_group: k8s + groups: docker + home: /home/k8s + shell: /bin/bash + sudo: ALL=(ALL) NOPASSWD:ALL + ssh_authorized_keys: + - ${tls_private_key.ssh.public_key_openssh} + write_files: + - path: /etc/kubeadm_token + owner: "root:root" + permissions: "0600" + content: ${local.kubeadm_token} + - path: /etc/kubeadm_config.yaml + owner: "root:root" + permissions: "0600" + content: | + kind: InitConfiguration + apiVersion: kubeadm.k8s.io/v1beta2 + bootstrapTokens: + - token: ${local.kubeadm_token} + --- + kind: KubeletConfiguration + apiVersion: kubelet.config.k8s.io/v1beta1 + cgroupDriver: cgroupfs + --- + kind: ClusterConfiguration + apiVersion: kubeadm.k8s.io/v1beta2 + apiServer: + certSANs: + - @@PUBLIC_IP_ADDRESS@@ + - path: /home/k8s/.ssh/id_rsa + defer: true + owner: "k8s:k8s" + permissions: "0600" + content: | + ${indent(4, tls_private_key.ssh.private_key_pem)} + - path: /home/k8s/.ssh/id_rsa.pub + defer: true + owner: "k8s:k8s" + permissions: "0600" + content: | + ${indent(4, tls_private_key.ssh.public_key_openssh)} + EOF + } + + # By default, all inbound traffic is blocked + # (except SSH) so we need to change that. + part { + filename = "allow-inbound-traffic.sh" + content_type = "text/x-shellscript" + content = <<-EOF + #!/bin/sh + sed -i "s/-A INPUT -j REJECT --reject-with icmp-host-prohibited//" /etc/iptables/rules.v4 + netfilter-persistent start + EOF + } + + dynamic "part" { + for_each = each.value.role == "controlplane" ? ["yes"] : [] + content { + filename = "kubeadm-init.sh" + content_type = "text/x-shellscript" + content = <<-EOF + #!/bin/sh + PUBLIC_IP_ADDRESS=$(curl https://icanhazip.com/) + sed -i s/@@PUBLIC_IP_ADDRESS@@/$PUBLIC_IP_ADDRESS/ /etc/kubeadm_config.yaml + kubeadm init --config=/etc/kubeadm_config.yaml --ignore-preflight-errors=NumCPU + export KUBECONFIG=/etc/kubernetes/admin.conf + kubever=$(kubectl version | base64 | tr -d '\n') + kubectl apply -f https://cloud.weave.works/k8s/net?k8s-version=$kubever + mkdir -p /home/k8s/.kube + cp $KUBECONFIG /home/k8s/.kube/config + chown -R k8s:k8s /home/k8s/.kube + EOF + } + } + + dynamic "part" { + for_each = each.value.role == "worker" ? ["yes"] : [] + content { + filename = "kubeadm-join.sh" + content_type = "text/x-shellscript" + content = <<-EOF + #!/bin/sh + kubeadm join --discovery-token-unsafe-skip-ca-verification --token ${local.kubeadm_token} ${local.nodes[1].ip_address}:6443 + EOF + } + } +} + +data "http" "apt_repo_key" { + url = "https://packages.cloud.google.com/apt/doc/apt-key.gpg.asc" +} + +# The kubeadm token must follow a specific format: +# - 6 letters/numbers +# - a dot +# - 16 letters/numbers + +resource "random_string" "token1" { + length = 6 + number = true + lower = true + special = false + upper = false +} + +resource "random_string" "token2" { + length = 16 + number = true + lower = true + special = false + upper = false +} + +locals { + kubeadm_token = format( + "%s.%s", + random_string.token1.result, + random_string.token2.result + ) +} diff --git a/kubeconfig.tf b/kubeconfig.tf new file mode 100644 index 0000000..144d598 --- /dev/null +++ b/kubeconfig.tf @@ -0,0 +1,37 @@ +resource "null_resource" "wait_for_kube_apiserver" { + depends_on = [oci_core_instance._[1]] + provisioner "local-exec" { + command = <<-EOT + while ! curl -k https://${oci_core_instance._[1].public_ip}:6443; do + sleep 1 + done + EOT + } +} + +data "external" "kubeconfig" { + depends_on = [null_resource.wait_for_kube_apiserver] + program = [ + "sh", + "-c", + <<-EOT + set -e + cat >/dev/null + echo '{"base64": "'$( + ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + -l k8s -i ${local_file.ssh_private_key.filename} \ + ${oci_core_instance._[1].public_ip} \ + sudo cat /etc/kubernetes/admin.conf | base64 -w0 + )'"}' + EOT + ] +} + +resource "local_file" "kubeconfig" { + content = base64decode(data.external.kubeconfig.result.base64) + filename = "kubeconfig" + file_permission = "0600" + provisioner "local-exec" { + command = "kubectl --kubeconfig=kubeconfig config set-cluster kubernetes --server=https://${oci_core_instance._[1].public_ip}:6443" + } +} diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..aef4775 --- /dev/null +++ b/main.tf @@ -0,0 +1,57 @@ +resource "oci_identity_compartment" "_" { + name = var.name + description = var.name + enable_delete = true +} + +locals { + compartment_id = oci_identity_compartment._.id +} + +data "oci_identity_availability_domains" "_" { + compartment_id = local.compartment_id +} + +data "oci_core_images" "_" { + compartment_id = local.compartment_id + shape = var.shape + operating_system = "Canonical Ubuntu" + operating_system_version = "20.04" + #operating_system = "Oracle Linux" + #operating_system_version = "7.9" +} + +resource "oci_core_instance" "_" { + for_each = local.nodes + display_name = each.value.node_name + availability_domain = data.oci_identity_availability_domains._.availability_domains[0].name + compartment_id = local.compartment_id + shape = var.shape + shape_config { + memory_in_gbs = var.memory_in_gbs_per_node + ocpus = var.ocpus_per_node + } + source_details { + source_id = data.oci_core_images._.images[0].id + source_type = "image" + } + create_vnic_details { + subnet_id = oci_core_subnet._.id + private_ip = each.value.ip_address + } + metadata = { + ssh_authorized_keys = join("\n", local.authorized_keys) + user_data = data.cloudinit_config._[each.key].rendered + } +} + +locals { + nodes = { + for i in range(1, 1 + var.how_many_nodes) : + i => { + node_name = format("node%d", i) + ip_address = format("10.0.0.%d", 10 + i) + role = i == 1 ? "controlplane" : "worker" + } + } +} diff --git a/network.tf b/network.tf new file mode 100644 index 0000000..d93f7b9 --- /dev/null +++ b/network.tf @@ -0,0 +1,38 @@ +resource "oci_core_vcn" "_" { + compartment_id = local.compartment_id + cidr_block = "10.0.0.0/16" +} + +resource "oci_core_internet_gateway" "_" { + compartment_id = local.compartment_id + vcn_id = oci_core_vcn._.id +} + +resource "oci_core_default_route_table" "_" { + manage_default_resource_id = oci_core_vcn._.default_route_table_id + route_rules { + destination = "0.0.0.0/0" + destination_type = "CIDR_BLOCK" + network_entity_id = oci_core_internet_gateway._.id + } +} + +resource "oci_core_default_security_list" "_" { + manage_default_resource_id = oci_core_vcn._.default_security_list_id + ingress_security_rules { + protocol = "all" + source = "0.0.0.0/0" + } + egress_security_rules { + protocol = "all" + destination = "0.0.0.0/0" + } +} + +resource "oci_core_subnet" "_" { + compartment_id = local.compartment_id + cidr_block = "10.0.0.0/24" + vcn_id = oci_core_vcn._.id + route_table_id = oci_core_default_route_table._.id + security_list_ids = [oci_core_default_security_list._.id] +} diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..cbb356b --- /dev/null +++ b/outputs.tf @@ -0,0 +1,8 @@ +output "ssh" { + value = format( + "\nssh -i %s -l %s %s\n", + local_file.ssh_private_key.filename, + "k8s", + oci_core_instance._[1].public_ip + ) +} diff --git a/providers.tf b/providers.tf new file mode 100644 index 0000000..8be26af --- /dev/null +++ b/providers.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + oci = { + source = "hashicorp/oci" + version = "4.57.0" + } + } +} diff --git a/sshkey.tf b/sshkey.tf new file mode 100644 index 0000000..d0a87f4 --- /dev/null +++ b/sshkey.tf @@ -0,0 +1,20 @@ +resource "tls_private_key" "ssh" { + algorithm = "RSA" + rsa_bits = "4096" +} + +resource "local_file" "ssh_private_key" { + content = tls_private_key.ssh.private_key_pem + filename = "id_rsa" + file_permission = "0600" +} + +resource "local_file" "ssh_public_key" { + content = tls_private_key.ssh.public_key_openssh + filename = "id_rsa.pub" + file_permission = "0600" +} + +locals { + authorized_keys = [chomp(tls_private_key.ssh.public_key_openssh)] +} diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..0b57621 --- /dev/null +++ b/variables.tf @@ -0,0 +1,33 @@ +variable "name" { + type = string + default = "kubernetes-on-arm-with-oracle" +} + +/* +Available flex shapes: +"VM.Optimized3.Flex" # Intel Ice Lake +"VM.Standard3.Flex" # Intel Ice Lake +"VM.Standard.A1.Flex" # Ampere Altra +"VM.Standard.E3.Flex" # AMD Rome +"VM.Standard.E4.Flex" # AMD Milan +*/ + +variable "shape" { + type = string + default = "VM.Standard.A1.Flex" +} + +variable "how_many_nodes" { + type = number + default = 4 +} + +variable "ocpus_per_node" { + type = number + default = 1 +} + +variable "memory_in_gbs_per_node" { + type = number + default = 6 +}