Bazel failed to deploy sample k8s deployment (deployment.yaml) file in k8s tenant.
I followed the link https://github.com/bazelbuild/rules_k8s#aliasing-eg-k8s_deploy and tried one sample deployment.yaml file to deploy the application in k8s tenant. I have one k8s tenant already configured in the build machine. To deploy the application I executed:
bazel run //main:dev.create
But the bazel command is failing with below error:
[root#localhost t2]# bazel run //main:dev.create <br/>
Starting local Bazel server and connecting to it... <br/>
INFO: Analyzed target //main:dev.create (68 packages loaded, 6876 targets configured).<br/>
INFO: Found 1 target...<br/>
INFO: Deleting stale sandbox base <br/>/root/.cache/bazel/_bazel_root/5ad59170e5ff426844f68e5dd9f66fb3/sandbox
Target //main:dev.create up-to-date:<br/>
bazel-bin/main/dev.create<br/>
INFO: Elapsed time: 33.497s, Critical Path: 2.04s<br/>
INFO: 0 processes.<br/>
INFO: Build completed successfully, 1 total action<br/>
INFO: Build completed successfully, 1 total action<br/>
$ /usr/local/bin/kubectl --cluster=kubernetes --context= --user= create -f -<br/>
error: error parsing STDIN: error converting YAML to JSON: yaml: line 4: <br/>mapping values are not allowed in this context<br/>
this is my WORKSPACE file
load("#bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
git_repository(
name = "io_bazel_rules_go",
remote = "https://github.com/bazelbuild/rules_go.git",
tag = "0.18.5"
)
git_repository(
name = "bazel_gazelle",
remote = "https://github.com/bazelbuild/bazel-gazelle.git",
tag = "0.17.0",
)
load("#io_bazel_rules_go//go:deps.bzl", "go_download_sdk","go_register_toolchains","go_rules_dependencies")
go_download_sdk(
name = "gosdk",
sdks = {
......
},
urls = [....],
)
go_register_toolchains(
"#//:gosdk",
)
go_rules_dependencies()
load("#bazel_gazelle//:deps.bzl", "gazelle_dependencies", "go_repository")
gazelle_dependencies()
git_repository(
name = "io_bazel_rules_docker",
commit = "e12e276a9a6ded09363a6c1f0de46c573bd6096c",
remote = "https://github.com/xxxxx/rules_docker.git",
)
load(
"#io_bazel_rules_docker//repositories:repositories.bzl",
container_repositories = "repositories",
)
container_repositories()
load("#io_bazel_rules_docker//container:container.bzl", "container_pull")
load(
"#io_bazel_rules_docker//go:image.bzl",
go_image_repos = "repositories",
)
go_image_repos()
load("#bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "io_bazel_rules_k8s",
sha256 = "91fef3e6054096a8947289ba0b6da3cba559ecb11c851d7bdfc9ca395b46d8d8",
strip_prefix = "rules_k8s-0.1",
urls = ["https://github.com/bazelbuild/rules_k8s/releases/download/v0.1/rules_k8s-v0.1.tar.gz"],
)
load("#io_bazel_rules_k8s//k8s:k8s.bzl", "k8s_repositories")
k8s_repositories()
load("#io_bazel_rules_k8s//k8s:k8s.bzl", "k8s_defaults")
k8s_defaults(
name = "k8s_deploy",
kind = "deployment",
cluster = "kubernetes",
)
build.bazel file :
load("#io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
go_binary(
name = "hello_go",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
)
load("#io_bazel_rules_docker//go:image.bzl", "go_image")
go_image(
name = "go-image",
base = ":test",
embed = [":go_default_library"],
)
load("#io_bazel_rules_docker//container:image.bzl", "container_image")
container_image(
name = "test",
base = "#go_image_base//image",
user = "101",
)
load("#io_bazel_rules_k8s//k8s:object.bzl", "k8s_object")
k8s_object(
name = "dev",
kind = "deployment",
template = ":deployment.yaml",
cluster = "kubernetes",
images = {
"xxxxx.net/test/new:v1": ":go-image",
},
)
deployment.yaml file :
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: staging
spec:
replicas: 1
template:
metadata:
labels:
app: test
spec:
containers:
- name: test
image: xxxxx.net/test/new:v1
imagePullPolicy: Always
ports:
- containerPort: 50051
On the same server, i have kubeconfig file kept at /root/.kube/config.
Related
I read through the karpenter document at https://karpenter.sh/v0.16.1/getting-started/getting-started-with-terraform/#install-karpenter-helm-chart. I followed instructions step by step. I got errors at the end.
kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller
DEBUG controller.provisioning Relaxing soft constraints for pod since it previously failed to schedule, removing: spec.topologySpreadConstraints = {"maxSkew":1,"topologyKey":"topology.kubernetes.io/zone","whenUnsatisfiable":"ScheduleAnyway","labelSelector":{"matchLabels":{"app.kubernetes.io/instance":"karpenter","app.kubernetes.io/name":"karpenter"}}} {"commit": "b157d45", "pod": "karpenter/karpenter-5755bb5b54-rh65t"}
2022-09-10T00:13:13.122Z
ERROR controller.provisioning Could not schedule pod, incompatible with provisioner "default", incompatible requirements, key karpenter.sh/provisioner-name, karpenter.sh/provisioner-name DoesNotExist not in karpenter.sh/provisioner-name In [default] {"commit": "b157d45", "pod": "karpenter/karpenter-5755bb5b54-rh65t"}
Below is the source code:
cat main.tf
terraform {
required_version = "~> 1.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.0"
}
helm = {
source = "hashicorp/helm"
version = "~> 2.5"
}
kubectl = {
source = "gavinbunney/kubectl"
version = "~> 1.14"
}
}
}
provider "aws" {
region = "us-east-1"
}
locals {
cluster_name = "karpenter-demo"
# Used to determine correct partition (i.e. - `aws`, `aws-gov`, `aws-cn`, etc.)
partition = data.aws_partition.current.partition
}
data "aws_partition" "current" {}
module "vpc" {
# https://registry.terraform.io/modules/terraform-aws-modules/vpc/aws/latest
source = "terraform-aws-modules/vpc/aws"
version = "3.14.4"
name = local.cluster_name
cidr = "10.0.0.0/16"
azs = ["us-east-1a", "us-east-1b", "us-east-1c"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
one_nat_gateway_per_az = false
public_subnet_tags = {
"kubernetes.io/cluster/${local.cluster_name}" = "shared"
"kubernetes.io/role/elb" = 1
}
private_subnet_tags = {
"kubernetes.io/cluster/${local.cluster_name}" = "shared"
"kubernetes.io/role/internal-elb" = 1
}
}
module "eks" {
# https://registry.terraform.io/modules/terraform-aws-modules/eks/aws/latest
source = "terraform-aws-modules/eks/aws"
version = "18.29.0"
cluster_name = local.cluster_name
cluster_version = "1.22"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
# Required for Karpenter role below
enable_irsa = true
node_security_group_additional_rules = {
ingress_nodes_karpenter_port = {
description = "Cluster API to Node group for Karpenter webhook"
protocol = "tcp"
from_port = 8443
to_port = 8443
type = "ingress"
source_cluster_security_group = true
}
}
node_security_group_tags = {
# NOTE - if creating multiple security groups with this module, only tag the
# security group that Karpenter should utilize with the following tag
# (i.e. - at most, only one security group should have this tag in your account)
"karpenter.sh/discovery/${local.cluster_name}" = local.cluster_name
}
# Only need one node to get Karpenter up and running.
# This ensures core services such as VPC CNI, CoreDNS, etc. are up and running
# so that Karpenter can be deployed and start managing compute capacity as required
eks_managed_node_groups = {
initial = {
instance_types = ["m5.large"]
# Not required nor used - avoid tagging two security groups with same tag as well
create_security_group = false
min_size = 1
max_size = 1
desired_size = 1
iam_role_additional_policies = [
"arn:${local.partition}:iam::aws:policy/AmazonSSMManagedInstanceCore", # Required by Karpenter
"arn:${local.partition}:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:${local.partition}:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", #for access to ECR images
"arn:${local.partition}:iam::aws:policy/CloudWatchAgentServerPolicy"
]
tags = {
# This will tag the launch template created for use by Karpenter
"karpenter.sh/discovery/${local.cluster_name}" = local.cluster_name
}
}
}
}
#The EKS module creates an IAM role for the EKS managed node group nodes. We’ll use that for Karpenter.
#We need to create an instance profile we can reference.
#Karpenter can use this instance profile to launch new EC2 instances and those instances will be able to connect to your cluster.
resource "aws_iam_instance_profile" "karpenter" {
name = "KarpenterNodeInstanceProfile-${local.cluster_name}"
role = module.eks.eks_managed_node_groups["initial"].iam_role_name
}
#Create the KarpenterController IAM Role
#Karpenter requires permissions like launching instances, which means it needs an IAM role that grants it access. The config
#below will create an AWS IAM Role, attach a policy, and authorize the Service Account to assume the role using IRSA. We will
#create the ServiceAccount and connect it to this role during the Helm chart install.
module "karpenter_irsa" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.3.3"
role_name = "karpenter-controller-${local.cluster_name}"
attach_karpenter_controller_policy = true
karpenter_tag_key = "karpenter.sh/discovery/${local.cluster_name}"
karpenter_controller_cluster_id = module.eks.cluster_id
karpenter_controller_node_iam_role_arns = [
module.eks.eks_managed_node_groups["initial"].iam_role_arn
]
oidc_providers = {
ex = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["karpenter:karpenter"]
}
}
}
#Install Karpenter Helm Chart
#Use helm to deploy Karpenter to the cluster. We are going to use the helm_release Terraform resource to do the deploy and pass in the
#cluster details and IAM role Karpenter needs to assume.
provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
args = ["eks", "get-token", "--cluster-name", local.cluster_name]
}
}
}
resource "helm_release" "karpenter" {
namespace = "karpenter"
create_namespace = true
name = "karpenter"
repository = "https://charts.karpenter.sh"
chart = "karpenter"
version = "v0.16.1"
set {
name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn"
value = module.karpenter_irsa.iam_role_arn
}
set {
name = "clusterName"
value = module.eks.cluster_id
}
set {
name = "clusterEndpoint"
value = module.eks.cluster_endpoint
}
set {
name = "aws.defaultInstanceProfile"
value = aws_iam_instance_profile.karpenter.name
}
}
#Provisioner
#Create a default provisioner using the command below. This provisioner configures instances to connect to your cluster’s endpoint and
#discovers resources like subnets and security groups using the cluster’s name.
#This provisioner will create capacity as long as the sum of all created capacity is less than the specified limit.
provider "kubectl" {
apply_retry_count = 5
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)
load_config_file = false
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_id]
}
}
resource "kubectl_manifest" "karpenter_provisioner" {
yaml_body = <<-YAML
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
metadata:
name: default
spec:
requirements:
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
limits:
resources:
cpu: 1000
provider:
subnetSelector:
Name: "*private*"
securityGroupSelector:
karpenter.sh/discovery/${module.eks.cluster_id}: ${module.eks.cluster_id}
tags:
karpenter.sh/discovery/${module.eks.cluster_id}: ${module.eks.cluster_id}
ttlSecondsAfterEmpty: 30
YAML
depends_on = [
helm_release.karpenter
]
}
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: inflate
spec:
replicas: 0
selector:
matchLabels:
app: inflate
template:
metadata:
labels:
app: inflate
spec:
terminationGracePeriodSeconds: 0
containers:
- name: inflate
image: public.ecr.aws/eks-distro/kubernetes/pause:3.2
resources:
requests:
cpu: 1
EOF
kubectl scale deployment inflate --replicas 5
kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller
DEBUG controller.provisioning Relaxing soft constraints for pod since it previously failed to schedule, removing: spec.topologySpreadConstraints = {"maxSkew":1,"topologyKey":"topology.kubernetes.io/zone","whenUnsatisfiable":"ScheduleAnyway","labelSelector":{"matchLabels":{"app.kubernetes.io/instance":"karpenter","app.kubernetes.io/name":"karpenter"}}} {"commit": "b157d45", "pod": "karpenter/karpenter-5755bb5b54-rh65t"}
2022-09-10T00:13:13.122Z
ERROR controller.provisioning Could not schedule pod, incompatible with provisioner "default", incompatible requirements, key karpenter.sh/provisioner-name, karpenter.sh/provisioner-name DoesNotExist not in karpenter.sh/provisioner-name In [default] {"commit": "b157d45", "pod": "karpenter/karpenter-5755bb5b54-rh65t"}
I belive this is due to the pod topology defined in the Karpenter deployment here:
https://github.com/aws/karpenter/blob/main/charts/karpenter/values.yaml#L73-L77
, you can read further on what pod topologySpreadConstraints does here:
https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/
If you increase the desired_size to 2 which matches the default deployment replicas above, that should resove the error.
I am running into an issue where terraform plan recreates resources that don't need to be recreated every run. This is an issue because some of the steps depend on those resources being available, and since they are recreated with each run, the script fails to complete.
My setup is Github Actions, Linode LKE, Terraform Cloud.
My main.tf file looks like this:
terraform {
required_providers {
linode = {
source = "linode/linode"
version = "=1.16.0"
}
helm = {
source = "hashicorp/helm"
version = "=2.1.0"
}
}
backend "remote" {
hostname = "app.terraform.io"
organization = "MY-ORG-HERE"
workspaces {
name = "MY-WORKSPACE-HERE"
}
}
}
provider "linode" {
}
provider "helm" {
debug = true
kubernetes {
config_path = "${local_file.kubeconfig.filename}"
}
}
resource "linode_lke_cluster" "lke_cluster" {
label = "MY-LABEL-HERE"
k8s_version = "1.21"
region = "us-central"
pool {
type = "g6-standard-2"
count = 3
}
}
and my outputs.tf file
resource "local_file" "kubeconfig" {
depends_on = [linode_lke_cluster.lke_cluster]
filename = "kube-config"
# filename = "${path.cwd}/kubeconfig"
content = base64decode(linode_lke_cluster.lke_cluster.kubeconfig)
}
resource "helm_release" "ingress-nginx" {
# depends_on = [local_file.kubeconfig]
depends_on = [linode_lke_cluster.lke_cluster, local_file.kubeconfig]
name = "ingress"
repository = "https://kubernetes.github.io/ingress-nginx"
chart = "ingress-nginx"
}
resource "null_resource" "custom" {
depends_on = [helm_release.ingress-nginx]
# change trigger to run every time
triggers = {
build_number = "${timestamp()}"
}
# download kubectl
provisioner "local-exec" {
command = "curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl && chmod +x kubectl"
}
# apply changes
provisioner "local-exec" {
command = "./kubectl apply -f ./k8s/ --kubeconfig ${local_file.kubeconfig.filename}"
}
}
In Github Actions, I'm running these steps:
jobs:
init-terraform:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./terraform
steps:
- name: Checkout code
uses: actions/checkout#v2
with:
ref: 'privatebeta-kubes'
- name: Setup Terraform
uses: hashicorp/setup-terraform#v1
with:
cli_config_credentials_token: ${{ secrets.TERRAFORM_API_TOKEN }}
- name: Terraform Init
run: terraform init
- name: Terraform Format Check
run: terraform fmt -check -v
- name: List terraform state
run: terraform state list
- name: Terraform Plan
run: terraform plan
id: plan
env:
LINODE_TOKEN: ${{ secrets.LINODE_TOKEN }}
When I look at the results of terraform state list I can see my resources:
Run terraform state list
terraform state list
shell: /usr/bin/bash -e {0}
env:
TERRAFORM_CLI_PATH: /home/runner/work/_temp/3f9749b8-515b-4cb4-8053-1a6318496321
/home/runner/work/_temp/3f9749b8-515b-4cb4-8053-1a6318496321/terraform-bin state list
helm_release.ingress-nginx
linode_lke_cluster.lke_cluster
local_file.kubeconfig
null_resource.custom
But my terraform plan fails and the issue seems to stem from the fact that those resources try to get recreated.
Run terraform plan
terraform plan
shell: /usr/bin/bash -e {0}
env:
TERRAFORM_CLI_PATH: /home/runner/work/_temp/3f9749b8-515b-4cb4-8053-1a6318496321
LINODE_TOKEN: ***
/home/runner/work/_temp/3f9749b8-515b-4cb4-8053-1a6318496321/terraform-bin plan
Running plan in the remote backend. Output will stream here. Pressing Ctrl-C
will stop streaming the logs, but will not stop the plan running remotely.
Preparing the remote plan...
Waiting for the plan to start...
Terraform v1.0.2
on linux_amd64
Configuring remote state backend...
Initializing Terraform configuration...
linode_lke_cluster.lke_cluster: Refreshing state... [id=31946]
local_file.kubeconfig: Refreshing state... [id=fbb5520298c7c824a8069397ef179e1bc971adde]
helm_release.ingress-nginx: Refreshing state... [id=ingress]
╷
│ Error: Kubernetes cluster unreachable: stat kube-config: no such file or directory
│
│ with helm_release.ingress-nginx,
│ on outputs.tf line 8, in resource "helm_release" "ingress-nginx":
│ 8: resource "helm_release" "ingress-nginx" {
Is there a way to tell terraform it doesn't need to recreate those resources?
Regarding the actual error shown, Error: Kubernetes cluster unreachable: stat kibe-config: no such file or directory... which is referencing your outputs file... I found this which could help with your specific error: https://github.com/hashicorp/terraform-provider-helm/issues/418
1 other thing looks strange to me. Why does your outputs.tf refer to 'resources' & not 'outputs'. Shouldn't your outputs.tf look like this?
output "local_file_kubeconfig" {
value = "reference.to.resource"
}
Also I see your state file / backend config looks like it's properly configured.
I recommend logging into your terraform cloud account to verify that the workspace is indeed there, as expected. It's the state file that tells terraform not to re-create the resources it manages.
If the resources are already there and terraform is trying to re-create them, that could indicate that those resources were created prior to using terraform or possibly within another terraform cloud workspace or plan.
Did you end up renaming your backend workspace at any point with this plan? I'm referring to your main.tf file, this part where it says MY-WORKSPACE-HERE :
terraform {
required_providers {
linode = {
source = "linode/linode"
version = "=1.16.0"
}
helm = {
source = "hashicorp/helm"
version = "=2.1.0"
}
}
backend "remote" {
hostname = "app.terraform.io"
organization = "MY-ORG-HERE"
workspaces {
name = "MY-WORKSPACE-HERE"
}
}
}
Unfortunately I am not a kurbenetes expert, so possibly more help can be used there.
I have this BUILD file:
package(default_visibility = ["//visibility:public"])
load("#npm_bazel_typescript//:index.bzl", "ts_library")
ts_library(
name = "lib",
srcs = glob(
include = ["**/*.ts"],
exclude = ["**/*.spec.ts"]
),
deps = [
"//packages/enums/src:lib",
"//packages/hello/src:lib",
"#npm//faker",
"#npm//#types/faker",
"#npm//express",
"#npm//#types/express",
],
)
load("#io_bazel_rules_docker//nodejs:image.bzl", "nodejs_image")
nodejs_image(
name = "server",
data = [":lib"],
entry_point = ":index.ts",
)
load("#io_bazel_rules_docker//container:container.bzl", "container_push")
container_push(
name = "push_server",
image = ":server",
format = "Docker",
registry = "gcr.io",
repository = "learning-bazel-monorepo/server",
tag = "dev",
)
load("#io_bazel_rules_k8s//k8s:object.bzl", "k8s_object")
k8s_object(
name = "k8s_deploy",
kind = "deployment",
namespace = "default",
template = ":server.yaml",
images = {
"deploy_server:do_not_delete": ":server"
},
)
But when running the k8s_deploy rule I get this error:
INFO: Analyzed target //services/server/src:k8s_deploy (1 packages loaded, 7 targets configured).
INFO: Found 1 target...
Target //services/server/src:k8s_deploy up-to-date:
bazel-bin/services/server/src/k8s_deploy.substituted.yaml
bazel-bin/services/server/src/k8s_deploy
INFO: Elapsed time: 0.276s, Critical Path: 0.01s
INFO: 0 processes.
INFO: Build completed successfully, 1 total action
INFO: Build completed successfully, 1 total action
2019/12/22 07:45:14 Unable to publish images: unable to publish image deploy_server:do_not_delete
The lib, server and push_server rules work fine. So I don't know what's the issue as there is no specific error message.
A snippet out of my server.yaml file:
spec:
containers:
- name: server
image: deploy_server:do_not_delete
You can try it yourself by running bazel run //services/server/src:k8s_deploy on this repo: https://github.com/flolude/minimal-bazel-monorepo/tree/de898eb1bb4edf0e0b1b99c290ff7ab57db81988
Have you pushed images using this syntax before?
I'm used to using the full repository tag for both the server.yaml and the k8s_object images.
So, instead of just "deploy_server:do_not_delete", try "gcr.io/learning-bazel-monorepo/deploy_server:do_not_delete".
I'm have a Gitlab cloud connected to a k8s cluster running on Google (GKE).
The cluster was created via Gitlab cloud.
I want to customise the config.toml because I want to fix the cache on k8s as suggested in this issue.
I found the config.toml configuration in the runner-gitlab-runner ConfigMap.
I updated the ConfigMap to contain this config.toml setup:
config.toml: |
concurrent = 4
check_interval = 3
log_level = "info"
listen_address = '[::]:9252'
[[runners]]
executor = "kubernetes"
cache_dir = "/tmp/gitlab/cache"
[runners.kubernetes]
memory_limit = "1Gi"
[runners.kubernetes.node_selector]
gitlab = "true"
[[runners.kubernetes.volumes.host_path]]
name = "gitlab-cache"
mount_path = "/tmp/gitlab/cache"
host_path = "/home/core/data/gitlab-runner/data"
To apply the changes I deleted the runner-gitlab-runner-xxxx-xxx pod so a new one gets created with the updated config.toml.
However, when I look into the new pod, the /home/gitlab-runner/.gitlab-runner/config.toml now contains 2 [[runners]] sections:
listen_address = "[::]:9252"
concurrent = 4
check_interval = 3
log_level = "info"
[session_server]
session_timeout = 1800
[[runners]]
name = ""
url = ""
token = ""
executor = "kubernetes"
cache_dir = "/tmp/gitlab/cache"
[runners.kubernetes]
host = ""
bearer_token_overwrite_allowed = false
image = ""
namespace = ""
namespace_overwrite_allowed = ""
privileged = false
memory_limit = "1Gi"
service_account_overwrite_allowed = ""
pod_annotations_overwrite_allowed = ""
[runners.kubernetes.node_selector]
gitlab = "true"
[runners.kubernetes.volumes]
[[runners.kubernetes.volumes.host_path]]
name = "gitlab-cache"
mount_path = "/tmp/gitlab/cache"
host_path = "/home/core/data/gitlab-runner/data"
[[runners]]
name = "runner-gitlab-runner-xxx-xxx"
url = "https://gitlab.com/"
token = "<my-token>"
executor = "kubernetes"
[runners.cache]
[runners.cache.s3]
[runners.cache.gcs]
[runners.kubernetes]
host = ""
bearer_token_overwrite_allowed = false
image = "ubuntu:16.04"
namespace = "gitlab-managed-apps"
namespace_overwrite_allowed = ""
privileged = true
service_account_overwrite_allowed = ""
pod_annotations_overwrite_allowed = ""
[runners.kubernetes.volumes]
The file /scripts/config.toml is the configuration as I created it in the ConfigMap.
So I suspect the /home/gitlab-runner/.gitlab-runner/config.toml is somehow updated when registering the Gitlab-Runner with the Gitlab cloud.
If if changing the config.toml via the ConfigMap does not work, how should I then change the configuration? I cannot find anything about this in Gitlab or Gitlab documentation.
Inside the mapping you can try to append the volume and the extra configuration parameters:
# Add docker volumes
cat >> /home/gitlab-runner/.gitlab-runner/config.toml << EOF
[[runners.kubernetes.volumes.host_path]]
name = "var-run-docker-sock"
mount_path = "/var/run/docker.sock"
EOF
I did the runner deployment using a helm chart; I guess you did the same, in the following link you will find more information about the approach I mention: https://gitlab.com/gitlab-org/gitlab-runner/issues/2578
If after appending the config your pod is not able to start, check the logs, I did test the appending approach and had some errors like "Directory not Found," and it was because I was appending in the wrong path, but after fixing those issues, the runner works fine.
Seems to me you should be modifying config.template.toml (within your relevant configmap, that is)
If you want modify existing config.toml in /home/gitlab-runner/.gitlab-runner you need to set environment variables in deployment. For example, this is default set of variables in case you have installed gitlab-runner by pressing install button in gitlab.
Environment:
CI_SERVER_URL: http://git.example.com/
CLONE_URL:
RUNNER_REQUEST_CONCURRENCY: 1
RUNNER_EXECUTOR: kubernetes
REGISTER_LOCKED: true
RUNNER_TAG_LIST:
RUNNER_OUTPUT_LIMIT: 4096
KUBERNETES_IMAGE: ubuntu:16.04
KUBERNETES_PRIVILEGED: true
KUBERNETES_NAMESPACE: gitlab-managed-apps
KUBERNETES_POLL_TIMEOUT: 180
KUBERNETES_CPU_LIMIT:
KUBERNETES_CPU_LIMIT_OVERWRITE_MAX_ALLOWED:
KUBERNETES_MEMORY_LIMIT:
KUBERNETES_MEMORY_LIMIT_OVERWRITE_MAX_ALLOWED:
KUBERNETES_CPU_REQUEST:
KUBERNETES_CPU_REQUEST_OVERWRITE_MAX_ALLOWED:
KUBERNETES_MEMORY_REQUEST:
KUBERNETES_MEMORY_REQUEST_OVERWRITE_MAX_ALLOWED:
KUBERNETES_SERVICE_ACCOUNT:
KUBERNETES_SERVICE_CPU_LIMIT:
KUBERNETES_SERVICE_MEMORY_LIMIT:
KUBERNETES_SERVICE_CPU_REQUEST:
KUBERNETES_SERVICE_MEMORY_REQUEST:
KUBERNETES_HELPER_CPU_LIMIT:
KUBERNETES_HELPER_MEMORY_LIMIT:
KUBERNETES_HELPER_CPU_REQUEST:
KUBERNETES_HELPER_MEMORY_REQUEST:
KUBERNETES_HELPER_IMAGE:
Modify existing values or add new ones - it will appear in correct section of config.toml.
Does anyone successfully setup kubernetes executor/runner on gitlab for CI jobs? I set up mine but its stucking on executing my pipeline indefinitely.
I'm running a runner as a docker container on top of kubernetes cluster and connecting to my gitlab instance for handling my CI builds.
Any working config file would be appreciated.
My runner configuration looks like this:
[[runners]]
name = "kube-executor"
url = "https://gitlab.example.ltd/"
token = "some-token"
executor = "kubernetes"
[runners.cache]
[runners.kubernetes]
host = "https://my-kubernetes-api-address:443"
ca_file = "/etc/ssl/certs/ca.crt"
cert_file = "/etc/ssl/certs/server.crt"
key_file = "/etc/ssl/certs/server.key"
image = "docker:latest"
namespace = "gitlab"
namespace_overwrite_allowed = "ci-.*"
privileged = true
cpu_limit = "1"
memory_limit = "1Gi"
service_cpu_limit = "1"
service_memory_limit = "1Gi"
helper_cpu_limit = "500m"
helper_memory_limit = "100Mi"
poll_interval = 5
poll_timeout = 3600
[runners.kubernetes.volumes]
this throws this error: ERROR: Job failed (system failure): Post https://my-kubernetes-api-address:443/api/v1/namespaces/gitlab/secrets: x509: certificate signed by unknown authority
you are using https, so where are the certs, are they self signed certs? if yes you have to mention --tls-cert-file and --tls-private-key-file flags in your configmap.
Copied from https://stackoverflow.com/a/43362697/432115