I have this terraform config for creating a single mongodb replica and a service but I can't connect to mongo using the cli and the cluster domain name.
locals {
labels = {
"app" = "mongo"
}
volume_config_name = "mongo-config"
}
module "mongo" {
source = "terraform-iaac/stateful-set/kubernetes"
version = "1.4.2"
# insert the 3 required variables here
image = "mongo:4.4"
name = "mongodb"
namespace = kubernetes_namespace.cmprimg.metadata[0].name
custom_labels = local.labels
volume_host_path = [
{
volume_name = "data"
path_on_node = "/data/db"
},
]
volume_mount = [
{
mount_path = "/data/db"
volume_name = "data"
},
{
mount_path = "/etc/mongod.conf.orig"
volume_name = "mongodb-conf"
sub_path = "configfile" // Key from configmap
}
]
volume_config_map = [{
mode = "0777"
volume_name = "mongodb-conf"
name = "mongodb-confmap"
}]
# volume_claim = [
# {
# name = "mongo"
# namespace = kubernetes_namespace.cmprimg.metadata[0].name
# access_modes = ["ReadWriteOnce"]
# requests_storage = "4Gi"
# persistent_volume_name = "mongo"
# storage_class_name = "linode-block-storage-retain"
# }
# ]
env = {
"MONGO_INITDB_ROOT_USERNAME" = var.username,
"MONGO_INITDB_ROOT_PASSWORD" = var.password,
}
command = [
"mongod",
"--bind_ip",
"0.0.0.0",
]
internal_port = [
{
name = "mongo"
internal_port = 27017
}
]
resources = {
request_cpu = "100m"
request_memory = "800Mi"
limit_cpu = "120m"
limit_memory = "900Mi"
}
replicas = 1
}
module "mongo_service" {
source = "terraform-iaac/service/kubernetes"
version = "1.0.4"
# insert the 3 required variables here
app_name = module.mongo.name
app_namespace = kubernetes_namespace.cmprimg.metadata[0].name
port_mapping = [
{
name = "mongo"
internal_port = 27107
external_port = 27017
}
]
custom_labels = local.labels
}
resource "kubernetes_persistent_volume_claim" "example" {
metadata {
name = "mongo"
namespace = kubernetes_namespace.cmprimg.metadata[0].name
labels = local.labels
}
spec {
access_modes = ["ReadWriteOnce"]
resources {
requests = {
storage = "20Gi"
}
}
storage_class_name = "linode-block-storage-retain"
}
}
resource "kubernetes_config_map" "mongodb_conf" {
metadata {
name = "mongodb-confmap"
namespace = kubernetes_namespace.cmprimg.metadata[0].name
labels = local.labels
}
data = {
"configfile" = yamlencode({
storage : {
dbPath : "/data/db",
},
net : {
port : 27017,
bindIp : "0.0.0.0",
}
})
}
}
I can exec into the mongodb pod and use mongo cli to connect using localhost, but when I'm in the same pod and use mongocli to connect using the domain name mongodb.default.svc.cluster.local:27017 I get connection refused. I can see in the logs that mongodb binds to 0.0.0.0 but can't connect through external ports. Did I misconfigure the service or do something else wrong?
Take a closer look at this section:
port_mapping = [
{
name = "mongo"
internal_port = 27107
external_port = 27017
}
]
You use „internal” and „external” port numbers inconsistently across file (the internal here is 27107)
Are you sure the syntax of this block is correct? In the reaserch I made through the Internet the portMappings section has usually different syntax (e.g. https://github.com/hashicorp/terraform-provider-aws/issues/21861)
.
portMappings = [
{
containerPort = var.container_port
hostPort = var.container_port
protocol = "tcp"
}
]
Related
I'm trying to deploy an EKS self managed with Terraform. While I can deploy the cluster with addons, vpc, subnet and all other resources, it always fails at helm:
Error: Kubernetes cluster unreachable: the server has asked for the client to provide credentials
with module.eks-ssp-kubernetes-addons.module.ingress_nginx[0].helm_release.nginx[0]
on .terraform/modules/eks-ssp-kubernetes-addons/modules/kubernetes-addons/ingress-nginx/main.tf line 19, in resource "helm_release" "nginx":
resource "helm_release" "nginx" {
This error repeats for metrics_server, lb_ingress, argocd, but cluster-autoscaler throws:
Warning: Helm release "cluster-autoscaler" was created but has a failed status.
with module.eks-ssp-kubernetes-addons.module.cluster_autoscaler[0].helm_release.cluster_autoscaler[0]
on .terraform/modules/eks-ssp-kubernetes-addons/modules/kubernetes-addons/cluster-autoscaler/main.tf line 1, in resource "helm_release" "cluster_autoscaler":
resource "helm_release" "cluster_autoscaler" {
My main.tf looks like this:
terraform {
backend "remote" {}
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 3.66.0"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 2.7.1"
}
helm = {
source = "hashicorp/helm"
version = ">= 2.4.1"
}
}
}
data "aws_eks_cluster" "cluster" {
name = module.eks-ssp.eks_cluster_id
}
data "aws_eks_cluster_auth" "cluster" {
name = module.eks-ssp.eks_cluster_id
}
provider "aws" {
access_key = "xxx"
secret_key = "xxx"
region = "xxx"
assume_role {
role_arn = "xxx"
}
}
provider "kubernetes" {
host = data.aws_eks_cluster.cluster.endpoint
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
token = data.aws_eks_cluster_auth.cluster.token
}
provider "helm" {
kubernetes {
host = data.aws_eks_cluster.cluster.endpoint
token = data.aws_eks_cluster_auth.cluster.token
cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority.0.data)
}
}
My eks.tf looks like this:
module "eks-ssp" {
source = "github.com/aws-samples/aws-eks-accelerator-for-terraform"
# EKS CLUSTER
tenant = "DevOpsLabs2b"
environment = "dev-test"
zone = ""
terraform_version = "Terraform v1.1.4"
# EKS Cluster VPC and Subnet mandatory config
vpc_id = "xxx"
private_subnet_ids = ["xxx","xxx", "xxx", "xxx"]
# EKS CONTROL PLANE VARIABLES
create_eks = true
kubernetes_version = "1.19"
# EKS SELF MANAGED NODE GROUPS
self_managed_node_groups = {
self_mg = {
node_group_name = "DevOpsLabs2b"
subnet_ids = ["xxx","xxx", "xxx", "xxx"]
create_launch_template = true
launch_template_os = "bottlerocket" # amazonlinux2eks or bottlerocket or windows
custom_ami_id = "xxx"
public_ip = true # Enable only for public subnets
pre_userdata = <<-EOT
yum install -y amazon-ssm-agent \
systemctl enable amazon-ssm-agent && systemctl start amazon-ssm-agent \
EOT
disk_size = 10
instance_type = "t2.small"
desired_size = 2
max_size = 10
min_size = 0
capacity_type = "" # Optional Use this only for SPOT capacity as capacity_type = "spot"
k8s_labels = {
Environment = "dev-test"
Zone = ""
WorkerType = "SELF_MANAGED_ON_DEMAND"
}
additional_tags = {
ExtraTag = "t2x-on-demand"
Name = "t2x-on-demand"
subnet_type = "public"
}
create_worker_security_group = false # Creates a dedicated sec group for this Node Group
},
}
}
enable_amazon_eks_vpc_cni = true
amazon_eks_vpc_cni_config = {
addon_name = "vpc-cni"
addon_version = "v1.7.5-eksbuild.2"
service_account = "aws-node"
resolve_conflicts = "OVERWRITE"
namespace = "kube-system"
additional_iam_policies = []
service_account_role_arn = ""
tags = {}
}
enable_amazon_eks_kube_proxy = true
amazon_eks_kube_proxy_config = {
addon_name = "kube-proxy"
addon_version = "v1.19.8-eksbuild.1"
service_account = "kube-proxy"
resolve_conflicts = "OVERWRITE"
namespace = "kube-system"
additional_iam_policies = []
service_account_role_arn = ""
tags = {}
}
#K8s Add-ons
enable_aws_load_balancer_controller = true
enable_metrics_server = true
enable_cluster_autoscaler = true
enable_aws_for_fluentbit = true
enable_argocd = true
enable_ingress_nginx = true
depends_on = [module.eks-ssp.self_managed_node_groups]
}
OP has confirmed in the comment that the problem was resolved:
Of course. I think I found the issue. Doing "kubectl get svc" throws: "An error occurred (AccessDenied) when calling the AssumeRole operation: User: arn:aws:iam::xxx:user/terraform_deploy is not authorized to perform: sts:AssumeRole on resource: arn:aws:iam::xxx:user/terraform_deploy"
Solved it by using my actual role, that's crazy. No idea why it was calling itself.
For similar problem look also this issue.
I solved this error by adding dependencies in the helm installations.
The depends_on will wait for the step to successfully complete and then helm module runs.
module "nginx-ingress" {
depends_on = [module.eks, module.aws-load-balancer-controller]
source = "terraform-module/release/helm"
...}
module "aws-load-balancer-controller" {
depends_on = [module.eks]
source = "terraform-module/release/helm"
...}
module "helm_autoscaler" {
depends_on = [module.eks]
source = "terraform-module/release/helm"
...}
I have this Terraform GKE cluster with 3 nodes. When I deploy this cluster all nodes are localised in the same zones which is europe-west1-b.
gke-cluster.yml
resource "google_container_cluster" "primary" {
name = var.cluster_name
initial_node_count = var.initial_node_count
master_auth {
username = ""
password = ""
client_certificate_config {
issue_client_certificate = false
}
}
node_config {
//machine_type = "e2-medium"
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
]
metadata = {
disable-legacy-endpoints = "true"
}
labels = {
app = var.app_name
}
tags = ["app", var.app_name]
}
timeouts {
create = "30m"
update = "40m"
}
}
variables.tf
variable "cluster_name" {
default = "cluster"
}
variable "app_name" {
default = "my-app"
}
variable "initial_node_count" {
default = 3
}
variable "kubernetes_min_ver" {
default = "latest"
}
variable "kubernetes_max_ver" {
default = "latest"
}
variable "remove_default_node_pool" {
default = false
}
variable "project" {
default = "your-project-name"
}
variable "credentials" {
default = "terraform-key.json"
}
variable "region" {
default = "europe-west1"
}
variable "zone" {
type = list(string)
description = "The zones to host the cluster in."
default = ["europe-west1-b", "europe-west1-c", "europe-west1-d"]
}
And would like to know if it's possible to deploy each node in a different zone.
If yes how can I do it using Terraform?
Simply add the following line
resource "google_container_cluster" "primary" {
name = "cluster"
location = "us-central1"
initial_node_count = "3"
in order to create a regional cluster. The above will bring up 9 nodes with each zone (f a b) containing 3 nodes. If you only want 1 node per zone, then just change initial_node_count to 1.
More info here at Argument reference.
I have been working for nearly 5 straight days now on this and can't get this to work. According to AWS documentation I should* be able to mount an EFS Volume to a pod deployed to a fargate node in kubernetes (EKS).
I'm doing everything 100% through terraform. I'm lost at this point and my eyes are practically bleeding from the amount of terrible documentation I have read. Any guidance that anyone can give me on getting this to work would be amazing!
Here is what I have done so far:
Setup an EKS CSI driver, storage class, and role bindings (not really sure why I need these role bindings tbh)
resource "kubernetes_csi_driver" "efs" {
metadata {
name = "efs.csi.aws.com"
}
spec {
attach_required = false
volume_lifecycle_modes = [
"Persistent"
]
}
}
resource "kubernetes_storage_class" "efs" {
metadata {
name = "efs-sc"
}
storage_provisioner = kubernetes_csi_driver.efs.metadata[0].name
reclaim_policy = "Retain"
}
resource "kubernetes_cluster_role_binding" "efs_pre" {
metadata {
name = "efs_role_pre"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
subject {
kind = "ServiceAccount"
name = "default"
namespace = "pre"
}
}
resource "kubernetes_cluster_role_binding" "efs_live" {
metadata {
name = "efs_role_live"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cluster-admin"
}
subject {
kind = "ServiceAccount"
name = "default"
namespace = "live"
}
}
Setup The EFS Volume with policies and security groups
module "vpc" {
source = "../../read_only_data/vpc"
stackname = var.vpc_stackname
}
resource "aws_efs_file_system" "efs_data" {
creation_token = "xva-${var.environment}-pv-efsdata-${var.side}"
# encrypted = true
# kms_key_id = ""
performance_mode = "generalPurpose" #maxIO
throughput_mode = "bursting"
lifecycle_policy {
transition_to_ia = "AFTER_30_DAYS"
}
}
data "aws_efs_file_system" "efs_data" {
file_system_id = aws_efs_file_system.efs_data.id
}
resource "aws_efs_access_point" "efs_data" {
file_system_id = aws_efs_file_system.efs_data.id
}
/* Policy that does the following:
- Prevent root access by default
- Enforce read-only access by default
- Enforce in-transit encryption for all clients
*/
resource "aws_efs_file_system_policy" "efs_data" {
file_system_id = aws_efs_file_system.efs_data.id
policy = jsonencode({
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"AWS": "*"
},
"Action": "elasticfilesystem:ClientMount",
"Resource": aws_efs_file_system.efs_data.arn
},
{
"Effect": "Deny",
"Principal": {
"AWS": "*"
},
"Action": "*",
"Resource": aws_efs_file_system.efs_data.arn,
"Condition": {
"Bool": {
"aws:SecureTransport": "false"
}
}
}
]
})
}
# Security Groups for this volume
resource "aws_security_group" "allow_eks_cluster" {
name = "xva-${var.environment}-efsdata-${var.side}"
description = "This will allow the cluster ${data.terraform_remote_state.cluster.outputs.eks_cluster_name} to access this volume and use it."
vpc_id = module.vpc.vpc_id
ingress {
description = "NFS For EKS Cluster ${data.terraform_remote_state.cluster.outputs.eks_cluster_name}"
from_port = 2049
to_port = 2049
protocol = "tcp"
security_groups = [
data.terraform_remote_state.cluster.outputs.eks_cluster_sg_id
]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "allow_tls"
}
}
# Mount to the subnets that will be using this efs volume
# Also attach sg's to restrict access to this volume
resource "aws_efs_mount_target" "efs_data-app01" {
file_system_id = aws_efs_file_system.efs_data.id
subnet_id = module.vpc.private_app_subnet_01
security_groups = [
aws_security_group.allow_eks_cluster.id
]
}
resource "aws_efs_mount_target" "efs_data-app02" {
file_system_id = aws_efs_file_system.efs_data.id
subnet_id = module.vpc.private_app_subnet_02
security_groups = [
aws_security_group.allow_eks_cluster.id
]
}
Create a Persistant Volume referencing the EFS Volume in kubernetes
data "terraform_remote_state" "csi" {
backend = "s3"
config = {
bucket = "xva-${var.account_type}-terraform-${var.region_code}"
key = "${var.environment}/efs/driver/terraform.tfstate"
region = var.region
profile = var.profile
}
}
resource "kubernetes_persistent_volume" "efs_data" {
metadata {
name = "pv-efsdata"
labels = {
app = "example"
}
}
spec {
access_modes = ["ReadOnlyMany"]
capacity = {
storage = "25Gi"
}
volume_mode = "Filesystem"
persistent_volume_reclaim_policy = "Retain"
storage_class_name = data.terraform_remote_state.csi.outputs.storage_name
persistent_volume_source {
csi {
driver = data.terraform_remote_state.csi.outputs.csi_name
volume_handle = aws_efs_file_system.efs_data.id
read_only = true
}
}
}
}
Then create a deployment to fargate with the pod mounting the EFS volume
data "terraform_remote_state" "efs_data_volume" {
backend = "s3"
config = {
bucket = "xva-${var.account_type}-terraform-${var.region_code}"
key = "${var.environment}/efs/volume/terraform.tfstate"
region = var.region
profile = var.profile
}
}
resource "kubernetes_persistent_volume_claim" "efs_data" {
metadata {
name = "pv-efsdata-claim-${var.side}"
namespace = var.side
}
spec {
access_modes = ["ReadOnlyMany"]
storage_class_name = data.terraform_remote_state.csi.outputs.storage_name
resources {
requests = {
storage = "25Gi"
}
}
volume_name = data.terraform_remote_state.efs_data_volume.outputs.volume_name
}
}
resource "kubernetes_deployment" "example" {
timeouts {
create = "3m"
update = "4m"
delete = "2m"
}
metadata {
name = "deployment-example"
namespace = var.side
labels = {
app = "example"
platform = "fargate"
subnet = "app"
}
}
spec {
replicas = 1
selector {
match_labels = {
app = "example"
}
}
template {
metadata {
labels = {
app = "example"
platform = "fargate"
subnet = "app"
}
}
spec {
volume {
name = "efs-data-volume"
persistent_volume_claim {
claim_name = kubernetes_persistent_volume_claim.efs_data.metadata[0].name
read_only = true
}
}
container {
image = "${var.nexus_docker_endpoint}/example:${var.docker_tag}"
name = "example"
env {
name = "environment"
value = var.environment
}
env {
name = "dockertag"
value = var.docker_tag
}
volume_mount {
name = "efs-data-volume"
read_only = true
mount_path = "/appconf/"
}
# liveness_probe {
# http_get {
# path = "/health"
# port = 443
# }
# initial_delay_seconds = 3
# period_seconds = 3
# }
port {
container_port = 443
}
}
}
}
}
}
It can see the persistant volume in kuberenetes, I can see that it is claimed, heck I can even see that it attempts to mount the volume in the pod logs. However, I inevitably always see the following error when describing the pod:
Volumes:
efs-data-volume:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: pv-efsdata-claim-pre
ReadOnly: true
...
...
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning FailedMount 11m (x629 over 23h) kubelet, <redacted-fargate-endpoint> Unable to attach or mount volumes: unmounted volumes=[efs-data-volume], unattached volumes=[efs-data-volume]: timed out waiting for the condition
Warning FailedMount 47s (x714 over 23h) kubelet, <redacted-fargate-endpoint> MountVolume.SetUp failed for volume "pv-efsdata" : kubernetes.io/csi: mounter.SetupAt failed: rpc error: code = InvalidArgument desc = Volume capability not supported
I finally have done it. I have successfully mounted an EFS Volume to a Fargate Pod (nearly 6 days later)! I was able to get the direction I need from this closed github issue: https://github.com/aws/containers-roadmap/issues/826
It ended up being that I am using this module to build my eks cluster: https://registry.terraform.io/modules/cloudposse/eks-cluster/aws/0.29.0?tab=outputs
If you use the output "security_group_id" it outputs the "Additional Security group". Which in my experience is good for absolutely nothing in aws. Not sure why it even exists when you can't do anything with it. The security group I needed to use was the "Cluster security group". So I added the "Cluster security group"'s id on port 2049 ingress rule on the EFS volumes security groups mount point and BAM! I mounted the EFS volume to the deployed pod successfully.
The other important change was I changed the persistant volume type to be ReadWriteMany, since fargate apparently doesn't support ReadOnlyMany.
I have two public Subnets declared in my VPC and now I want to create an EC2 instance in each of the two public subnets, but Terraform doesn't properly resolve the subnet ids
Here is what I have defined:
resource "aws_subnet" "archer-public-1" {
vpc_id = aws_vpc.archer.id
cidr_block = "10.0.1.0/24"
map_public_ip_on_launch = "true"
availability_zone = "${var.AZ1}"
}
resource "aws_subnet" "archer-public-2" {
vpc_id = aws_vpc.archer.id
cidr_block = "10.0.2.0/24"
map_public_ip_on_launch = "true"
availability_zone = "${var.AZ2}"
}
Here is my EC2 resource definition with the subnet expression that I tried unsuccessfully.
resource "aws_instance" "nginx" {
count = 2
ami = var.AMIS[var.AWS_REGION]
instance_type = "t2.micro"
subnet_id = "aws_subnet.archer-public-${count.index+1}.id" <== why doesn't this work?!
}
The variable interpolation does produce the proper values for the two subnets: archer-public-1 and archer-public-2, yet, the terraform produces these errors:
Error: Error launching source instance: InvalidSubnetID.NotFound: The subnet ID 'aws_subnet.archer-public-1.id' does not exist
status code: 400, request id: 26b4f710-e968-484d-a17a-6faa5a9d15d5
Yet when I invoke the terraform console, I can see that it properly resolves these objects as expected:
> aws_subnet.archer-public-1
{
"arn" = "arn:aws:ec2:us-west-2:361879417564:subnet/subnet-0fb47d0d30f501585"
"assign_ipv6_address_on_creation" = false
"availability_zone" = "us-west-2a"
"availability_zone_id" = "usw2-az1"
"cidr_block" = "10.0.1.0/24"
"id" = "subnet-0fb47d0d30f501585"
"ipv6_cidr_block" = ""
"ipv6_cidr_block_association_id" = ""
"map_public_ip_on_launch" = true
"outpost_arn" = ""
"owner_id" = "361879417564"
"tags" = {
"Name" = "archer-public-1"
}
"vpc_id" = "vpc-074637b06747e227b"
}
I am trying to deploy Windows VM on Google Cloud through terraform. The VM is getting deployed and I am able to execute PowerShell scripts by using windows-startup-script-url.
With this approach, I can only use scripts which are already stored in Google Storage. If the script has parameters / variables, then how to pass that parameter, any clue !
provider "google" {
project = "my-project"
region = "my-location"
zone = "my-zone"
}
resource "google_compute_instance" "default" {
name = "my-name"
machine_type = "n1-standard-2"
zone = "my-zone"
boot_disk {
initialize_params {
image = "windows-cloud/windows-2019"
}
}
metadata {
windows-startup-script-url = "gs://<my-storage>/<my-script.ps1>"
}
network_interface {
network = "default"
access_config {
}
}
tags = ["http-server", "windows-server"]
}
resource "google_compute_firewall" "http-server" {
name = "default-allow-http"
network = "default"
allow {
protocol = "tcp"
ports = ["80"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = ["http-server"]
}
resource "google_compute_firewall" "windows-server" {
name = "windows-server"
network = "default"
allow {
protocol = "tcp"
ports = ["3389"]
}
source_ranges = ["0.0.0.0/0"]
target_tags = ["windows-server"]
}
output "ip" {
value = "${google_compute_instance.default.network_interface.0.access_config.0.nat_ip}"
}
Terraform doesn't require startup scripts to be pulled from GCS buckets necessarily.
The example here shows:
}
metadata = {
foo = "bar"
}
metadata_startup_script = "echo hi > /test.txt"
service_account {
scopes = ["userinfo-email", "compute-ro", "storage-ro"]
}
}
More in Official docs for GCE and Powershell scripting here