Is there any way to inject environment variables to initializing/running init-containers when it is pre-loaded with your inferenceservice/deployment? - kubernetes

I am creating a inferenceservice instance using the given yaml file-
apiVersion: "serving.kubeflow.org/v1alpha2"
kind: "InferenceService"
metadata:
minReplicas: 1
name: "sklearn-iris"
spec:
default:
predictor:
sklearn:
storageUri: "gs://kfserving-samples/models/sklearn/iris"
Now this will create a deployment, and since I am working from behind the proxy I am injecting the env variables for proxies as:
kubectl set env deployment/sklearn-iris-predictor-default-dclkq-deployment -n kfserving-test http_proxy={http_proxy value}
kubectl set env deployment/sklearn-iris-predictor-default-dclkq-deployment -n kfserving-test https_proxy={https_proxy value}
kubectl set env deployment/sklearn-iris-predictor-default-dclkq-deployment -n kfserving-test no_proxy={no_proxy value}
Since I have fixed minimum replicas as 1 so it make sure that one pod is there even without the traffic, now when this pod is being made it runs 1 init-container and 2 containers, so doing the kubectl set env thing, the proxies are being set in the container variables but not in the init-container and it is failing, so overall things are failing.
So in crux is there any way to set proxy/env details in init-container, without having the availibility of whole deployment yaml file to configure the env?
Edit:
kubectl edit deploy/deployment_name -o yaml -n namespace gives
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
autoscaling.knative.dev/class: kpa.autoscaling.knative.dev
autoscaling.knative.dev/minScale: "1"
deployment.kubernetes.io/revision: "1"
internal.serving.kubeflow.org/storage-initializer-sourceuri: gs://kfserving-samples/models/sklearn/iris
serving.knative.dev/creator: system:serviceaccount:kfserving-system:default
creationTimestamp: "2021-02-03T06:51:29Z"
generation: 3
labels:
app: sklearn-iris-predictor-default-6xcgj
component: predictor
service.istio.io/canonical-name: sklearn-iris-predictor-default
service.istio.io/canonical-revision: sklearn-iris-predictor-default-6xcgj
serving.knative.dev/configuration: sklearn-iris-predictor-default
serving.knative.dev/configurationGeneration: "1"
serving.knative.dev/revision: sklearn-iris-predictor-default-6xcgj
serving.knative.dev/revisionUID: 470195f7-db41-4e9c-ac6b-c96c79a1218f
serving.knative.dev/service: sklearn-iris-predictor-default
serving.kubeflow.org/inferenceservice: sklearn-iris
name: sklearn-iris-predictor-default-6xcgj-deployment
namespace: kfserving-test
ownerReferences:
- apiVersion: serving.knative.dev/v1
blockOwnerDeletion: true
controller: true
kind: Revision
name: sklearn-iris-predictor-default-6xcgj
uid: 470195f7-db41-4e9c-ac6b-c96c79a1218f
resourceVersion: "633491"
selfLink: /apis/apps/v1/namespaces/kfserving-test/deployments/sklearn-iris-predictor-default-6xcgj-deployment
uid: 2fc10485-ba59-4eaf-b62a-480ecf4ab078
spec:
progressDeadlineSeconds: 120
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
serving.knative.dev/revisionUID: 470195f7-db41-4e9c-ac6b-c96c79a1218f
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
annotations:
autoscaling.knative.dev/class: kpa.autoscaling.knative.dev
autoscaling.knative.dev/minScale: "1"
internal.serving.kubeflow.org/storage-initializer-sourceuri: gs://kfserving-samples/models/sklearn/iris
serving.knative.dev/creator: system:serviceaccount:kfserving-system:default
creationTimestamp: null
labels:
app: sklearn-iris-predictor-default-6xcgj
component: predictor
service.istio.io/canonical-name: sklearn-iris-predictor-default
service.istio.io/canonical-revision: sklearn-iris-predictor-default-6xcgj
serving.knative.dev/configuration: sklearn-iris-predictor-default
serving.knative.dev/configurationGeneration: "1"
serving.knative.dev/revision: sklearn-iris-predictor-default-6xcgj
serving.knative.dev/revisionUID: 470195f7-db41-4e9c-ac6b-c96c79a1218f
serving.knative.dev/service: sklearn-iris-predictor-default
serving.kubeflow.org/inferenceservice: sklearn-iris
spec:
containers:
- args:
- --model_name=sklearn-iris
- --model_dir=/mnt/models
- --http_port=8080
- --workers=0
env:
- name: http_proxy
value: {proxy data}
- name: https_proxy
value: {proxy data}
- name: no_proxy
value: {no proxy data}
- name: PORT
value: "8080"
- name: K_REVISION
value: sklearn-iris-predictor-default-6xcgj
- name: K_CONFIGURATION
value: sklearn-iris-predictor-default
- name: K_SERVICE
value: sklearn-iris-predictor-default
- name: K_INTERNAL_POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: K_INTERNAL_POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: gcr.io/kfserving/sklearnserver#sha256:fd87e984a6092aae6efd28a2d596aac16d83d207a0269a503a221cb24cfd2f39
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
httpGet:
path: /wait-for-drain
port: 8022
scheme: HTTP
name: kfserving-container
ports:
- containerPort: 8080
name: user-port
protocol: TCP
resources:
limits:
cpu: "1"
memory: 2Gi
requests:
cpu: "1"
memory: 2Gi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /var/log
name: knative-var-log
subPathExpr: $(K_INTERNAL_POD_NAMESPACE)_$(K_INTERNAL_POD_NAME)_kfserving-container
- env:
- name: SERVING_NAMESPACE
value: kfserving-test
- name: SERVING_SERVICE
value: sklearn-iris-predictor-default
- name: SERVING_CONFIGURATION
value: sklearn-iris-predictor-default
- name: SERVING_REVISION
value: sklearn-iris-predictor-default-6xcgj
- name: QUEUE_SERVING_PORT
value: "8012"
- name: CONTAINER_CONCURRENCY
value: "0"
- name: REVISION_TIMEOUT_SECONDS
value: "300"
- name: SERVING_POD
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: SERVING_POD_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: SERVING_LOGGING_CONFIG
value: |-
{
"level": "info",
"development": false,
"outputPaths": ["stdout"],
"errorOutputPaths": ["stderr"],
"encoding": "json",
"encoderConfig": {
"timeKey": "ts",
"levelKey": "level",
"nameKey": "logger",
"callerKey": "caller",
"messageKey": "msg",
"stacktraceKey": "stacktrace",
"lineEnding": "",
"levelEncoder": "",
"timeEncoder": "iso8601",
"durationEncoder": "",
"callerEncoder": ""
}
}
- name: SERVING_LOGGING_LEVEL
- name: SERVING_REQUEST_LOG_TEMPLATE
value: '{"httpRequest": {"requestMethod": "{{.Request.Method}}", "requestUrl":
"{{js .Request.RequestURI}}", "requestSize": "{{.Request.ContentLength}}",
"status": {{.Response.Code}}, "responseSize": "{{.Response.Size}}", "userAgent":
"{{js .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}",
"serverIp": "{{.Revision.PodIP}}", "referer": "{{js .Request.Referer}}",
"latency": "{{.Response.Latency}}s", "protocol": "{{.Request.Proto}}"},
"traceId": "{{index .Request.Header "X-B3-Traceid"}}"}'
- name: SERVING_ENABLE_REQUEST_LOG
value: "false"
- name: SERVING_REQUEST_METRICS_BACKEND
value: prometheus
- name: TRACING_CONFIG_BACKEND
value: none
- name: TRACING_CONFIG_ZIPKIN_ENDPOINT
- name: TRACING_CONFIG_STACKDRIVER_PROJECT_ID
- name: TRACING_CONFIG_DEBUG
value: "false"
- name: TRACING_CONFIG_SAMPLE_RATE
value: "0.1"
- name: USER_PORT
value: "8080"
- name: SYSTEM_NAMESPACE
value: knative-serving
- name: METRICS_DOMAIN
value: knative.dev/internal/serving
- name: SERVING_READINESS_PROBE
value: '{"tcpSocket":{"port":8080,"host":"127.0.0.1"},"successThreshold":1}'
- name: ENABLE_PROFILING
value: "false"
- name: SERVING_ENABLE_PROBE_REQUEST_LOG
value: "false"
- name: METRICS_COLLECTOR_ADDRESS
image: gcr.io/knative-releases/knative.dev/serving/cmd/queue#sha256:0db974f58b48b219ab8047e11b481c2bbda52b7a2d54db5ed58e8659748ec125
imagePullPolicy: IfNotPresent
name: queue-proxy
ports:
- containerPort: 8022
name: http-queueadm
protocol: TCP
- containerPort: 9090
name: http-autometric
protocol: TCP
- containerPort: 9091
name: http-usermetric
protocol: TCP
- containerPort: 8012
name: queue-port
protocol: TCP
readinessProbe:
exec:
command:
- /ko-app/queue
- -probe-period
- "0"
failureThreshold: 3
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 10
resources:
requests:
cpu: 25m
securityContext:
allowPrivilegeEscalation: false
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 300
volumes:
- emptyDir: {}
name: knative-var-log
status:
conditions:
- lastTransitionTime: "2021-02-03T06:51:29Z"
lastUpdateTime: "2021-02-03T06:51:29Z"
message: Deployment does not have minimum availability.
reason: MinimumReplicasUnavailable
status: "False"
type: Available
- lastTransitionTime: "2021-02-03T07:38:08Z"
lastUpdateTime: "2021-02-03T07:38:08Z"
message: ReplicaSet "sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96"
has timed out progressing.
reason: ProgressDeadlineExceeded
status: "False"
type: Progressing
observedGeneration: 2
replicas: 1
unavailableReplicas: 1
updatedReplicas: 1
kubectl describe pod podname -n namespace gives
Name: sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96vqbgr
Namespace: kfserving-test
Priority: 0
Node: minikube/192.168.99.109
Start Time: Wed, 03 Feb 2021 13:50:33 +0530
Labels: app=sklearn-iris-predictor-default-6xcgj
component=predictor
pod-template-hash=7c97895d96
service.istio.io/canonical-name=sklearn-iris-predictor-default
service.istio.io/canonical-revision=sklearn-iris-predictor-default-6xcgj
serving.knative.dev/configuration=sklearn-iris-predictor-default
serving.knative.dev/configurationGeneration=1
serving.knative.dev/revision=sklearn-iris-predictor-default-6xcgj
serving.knative.dev/revisionUID=470195f7-db41-4e9c-ac6b-c96c79a1218f
serving.knative.dev/service=sklearn-iris-predictor-default
serving.kubeflow.org/inferenceservice=sklearn-iris
Annotations: autoscaling.knative.dev/class: kpa.autoscaling.knative.dev
autoscaling.knative.dev/minScale: 1
internal.serving.kubeflow.org/storage-initializer-sourceuri: gs://kfserving-samples/models/sklearn/iris
serving.knative.dev/creator: system:serviceaccount:kfserving-system:default
Status: Pending
IP: 172.17.0.22
Controlled By: ReplicaSet/sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96
Init Containers:
storage-initializer:
Container ID: docker://262a195f39fad7dfc62b494d9c9bbda8c7cdeee2f4b903b2948b809c5e00fb0c
Image: gcr.io/kfserving/storage-initializer:v0.5.0-rc2
Image ID: docker-pullable://gcr.io/kfserving/storage-initializer#sha256:9a16e6af385412bb62fd7e09f6d749e107e3ad92c488039acd20361fb5dd68cc
Port: <none>
Host Port: <none>
Args:
gs://kfserving-samples/models/sklearn/iris
/mnt/models
State: Running
Started: Wed, 03 Feb 2021 13:58:00 +0530
Last State: Terminated
Reason: Error
Message: ownload(src_uri, dest_path)
File "/usr/local/lib/python3.7/site-packages/kfserving/storage.py", line 58, in download
Storage._download_gcs(uri, out_dir)
File "/usr/local/lib/python3.7/site-packages/kfserving/storage.py", line 116, in _download_gcs
for blob in blobs:
File "/usr/local/lib/python3.7/site-packages/google/api_core/page_iterator.py", line 212, in _items_iter
for page in self._page_iter(increment=False):
File "/usr/local/lib/python3.7/site-packages/google/api_core/page_iterator.py", line 243, in _page_iter
page = self._next_page()
File "/usr/local/lib/python3.7/site-packages/google/api_core/page_iterator.py", line 369, in _next_page
response = self._get_next_page_response()
File "/usr/local/lib/python3.7/site-packages/google/api_core/page_iterator.py", line 419, in _get_next_page_response
method=self._HTTP_METHOD, path=self.path, query_params=params
File "/usr/local/lib/python3.7/site-packages/google/cloud/storage/_http.py", line 63, in api_request
return call()
File "/usr/local/lib/python3.7/site-packages/google/api_core/retry.py", line 286, in retry_wrapped_func
on_error=on_error,
File "/usr/local/lib/python3.7/site-packages/google/api_core/retry.py", line 206, in retry_target
last_exc,
File "<string>", line 3, in raise_from
google.api_core.exceptions.RetryError: Deadline of 120.0s exceeded while calling functools.partial(functools.partial(<bound method JSONConnection.api_request of <google.cloud.storage._http.Connection object at 0x7fd57c954c50>>, timeout=60, method='GET', path='/b/kfserving-samples/o', query_params={'projection': 'noAcl', 'prefix': 'models/sklearn/iris/'})), last exception: HTTPSConnectionPool(host='storage.googleapis.com', port=443): Max retries exceeded with url: /storage/v1/b/kfserving-samples/o?projection=noAcl&prefix=models%2Fsklearn%2Firis%2F&prettyPrint=false (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fd57c91bb90>: Failed to establish a new connection: [Errno 113] No route to host'))
Exit Code: 1
Started: Wed, 03 Feb 2021 13:53:53 +0530
Finished: Wed, 03 Feb 2021 13:57:45 +0530
Ready: False
Restart Count: 2
Limits:
cpu: 1
memory: 1Gi
Requests:
cpu: 100m
memory: 100Mi
Environment: <none>
Mounts:
/mnt/models from kfserving-provision-location (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-hw2rw (ro)
Containers:
kfserving-container:
Container ID:
Image: gcr.io/kfserving/sklearnserver#sha256:fd87e984a6092aae6efd28a2d596aac16d83d207a0269a503a221cb24cfd2f39
Image ID:
Port: 8080/TCP
Host Port: 0/TCP
Args:
--model_name=sklearn-iris
--model_dir=/mnt/models
--http_port=8080
--workers=0
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Limits:
cpu: 1
memory: 2Gi
Requests:
cpu: 1
memory: 2Gi
Environment:
PORT: 8080
K_REVISION: sklearn-iris-predictor-default-6xcgj
K_CONFIGURATION: sklearn-iris-predictor-default
K_SERVICE: sklearn-iris-predictor-default
K_INTERNAL_POD_NAME: sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96vqbgr (v1:metadata.name)
K_INTERNAL_POD_NAMESPACE: kfserving-test (v1:metadata.namespace)
Mounts:
/mnt/models from kfserving-provision-location (ro)
/var/log from knative-var-log (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-hw2rw (ro)
queue-proxy:
Container ID:
Image: gcr.io/knative-releases/knative.dev/serving/cmd/queue#sha256:0db974f58b48b219ab8047e11b481c2bbda52b7a2d54db5ed58e8659748ec125
Image ID:
Ports: 8022/TCP, 9090/TCP, 9091/TCP, 8012/TCP
Host Ports: 0/TCP, 0/TCP, 0/TCP, 0/TCP
State: Waiting
Reason: PodInitializing
Ready: False
Restart Count: 0
Requests:
cpu: 25m
Readiness: exec [/ko-app/queue -probe-period 0] delay=0s timeout=10s period=10s #success=1 #failure=3
Environment:
SERVING_NAMESPACE: kfserving-test
SERVING_SERVICE: sklearn-iris-predictor-default
SERVING_CONFIGURATION: sklearn-iris-predictor-default
SERVING_REVISION: sklearn-iris-predictor-default-6xcgj
QUEUE_SERVING_PORT: 8012
CONTAINER_CONCURRENCY: 0
REVISION_TIMEOUT_SECONDS: 300
SERVING_POD: sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96vqbgr (v1:metadata.name)
SERVING_POD_IP: (v1:status.podIP)
SERVING_LOGGING_CONFIG: {
"level": "info",
"development": false,
"outputPaths": ["stdout"],
"errorOutputPaths": ["stderr"],
"encoding": "json",
"encoderConfig": {
"timeKey": "ts",
"levelKey": "level",
"nameKey": "logger",
"callerKey": "caller",
"messageKey": "msg",
"stacktraceKey": "stacktrace",
"lineEnding": "",
"levelEncoder": "",
"timeEncoder": "iso8601",
"durationEncoder": "",
"callerEncoder": ""
}
}
SERVING_LOGGING_LEVEL:
SERVING_REQUEST_LOG_TEMPLATE: {"httpRequest": {"requestMethod": "{{.Request.Method}}", "requestUrl": "{{js .Request.RequestURI}}", "requestSize": "{{.Request.ContentLength}}", "status": {{.Response.Code}}, "responseSize": "{{.Response.Size}}", "userAgent": "{{js .Request.UserAgent}}", "remoteIp": "{{js .Request.RemoteAddr}}", "serverIp": "{{.Revision.PodIP}}", "referer": "{{js .Request.Referer}}", "latency": "{{.Response.Latency}}s", "protocol": "{{.Request.Proto}}"}, "traceId": "{{index .Request.Header "X-B3-Traceid"}}"}
SERVING_ENABLE_REQUEST_LOG: false
SERVING_REQUEST_METRICS_BACKEND: prometheus
TRACING_CONFIG_BACKEND: none
TRACING_CONFIG_ZIPKIN_ENDPOINT:
TRACING_CONFIG_STACKDRIVER_PROJECT_ID:
TRACING_CONFIG_DEBUG: false
TRACING_CONFIG_SAMPLE_RATE: 0.1
USER_PORT: 8080
SYSTEM_NAMESPACE: knative-serving
METRICS_DOMAIN: knative.dev/internal/serving
SERVING_READINESS_PROBE: {"tcpSocket":{"port":8080,"host":"127.0.0.1"},"successThreshold":1}
ENABLE_PROFILING: false
SERVING_ENABLE_PROBE_REQUEST_LOG: false
METRICS_COLLECTOR_ADDRESS:
Mounts:
/var/run/secrets/kubernetes.io/serviceaccount from default-token-hw2rw (ro)
Conditions:
Type Status
Initialized False
Ready False
ContainersReady False
PodScheduled True
Volumes:
knative-var-log:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
default-token-hw2rw:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-hw2rw
Optional: false
kfserving-provision-location:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
node.kubernetes.io/unreachable:NoExecute for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 7m50s default-scheduler Successfully assigned kfserving-test/sklearn-iris-predictor-default-6xcgj-deployment-7c97895d96vqbgr to minikube
Warning BackOff 36s kubelet, minikube Back-off restarting failed container
Normal Pulled 24s (x3 over 7m47s) kubelet, minikube Container image "gcr.io/kfserving/storage-initializer:v0.5.0-rc2" already present on machine
Normal Created 23s (x3 over 7m47s) kubelet, minikube Created container storage-initializer
Normal Started 23s (x3 over 7m46s) kubelet, minikube Started container storage-initializer

I don't think it is possible to do the same via kubectl set env command.
When I tried to run the updated command on my local setup passing the initcontainer name, it returns with the message:
warning: Deployment/dummy does not have any containers matching "dummy-init"
Command used:
kubectl set env -n dummy-ns deploy/dummy -c "dummy-init" dummy_env="true"
You can however use the kubectl edit command which will open the full yaml in edit mode and you can add the required environment variable to whichever container you need and save the spec. This will create a new pod with the new spec.
kubectl edit -n dummy-ns deploy/dummy -o yaml

Related

Prometheus & Alert Manager keeps crashing after updating the EKS version to 1.16

prometheus-prometheus-kube-prometheus-prometheus-0 0/2 Terminating 0 4s
alertmanager-prometheus-kube-prometheus-alertmanager-0 0/2 Terminating 0 10s
After updating EKS cluster to 1.16 from 1.15 everything works fine except these two pods, they keep on terminating and unable to initialise. Hence, prometheus monitoring does not work. I am getting below errors while describing the pods.
Error: failed to start container "prometheus": Error response from daemon: OCI runtime create failed: container_linux.go:362: creating new parent process caused: container_linux.go:1941: running lstat on namespace path "/proc/29271/ns/ipc" caused: lstat /proc/29271/ns/ipc: no such file or directory: unknown
Error: failed to start container "config-reloader": Error response from daemon: cannot join network of a non running container: 7e139521980afd13dad0162d6859352b0b2c855773d6d4062ee3e2f7f822a0b3
Error: cannot find volume "config" to mount into container "config-reloader"
Error: cannot find volume "config" to mount into container "prometheus"
here is my yaml file for the deployment:
apiVersion: v1
kind: Pod
metadata:
annotations:
kubernetes.io/psp: eks.privileged
creationTimestamp: "2021-04-30T16:39:14Z"
deletionGracePeriodSeconds: 600
deletionTimestamp: "2021-04-30T16:49:14Z"
generateName: prometheus-prometheus-kube-prometheus-prometheus-
labels:
app: prometheus
app.kubernetes.io/instance: prometheus-kube-prometheus-prometheus
app.kubernetes.io/managed-by: prometheus-operator
app.kubernetes.io/name: prometheus
app.kubernetes.io/version: 2.26.0
controller-revision-hash: prometheus-prometheus-kube-prometheus-prometheus-56d9fcf57
operator.prometheus.io/name: prometheus-kube-prometheus-prometheus
operator.prometheus.io/shard: "0"
prometheus: prometheus-kube-prometheus-prometheus
statefulset.kubernetes.io/pod-name: prometheus-prometheus-kube-prometheus-prometheus-0
name: prometheus-prometheus-kube-prometheus-prometheus-0
namespace: mo
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: StatefulSet
name: prometheus-prometheus-kube-prometheus-prometheus
uid: 326a09f2-319c-449d-904a-1dd0019c6d80
resourceVersion: "9337443"
selfLink: /api/v1/namespaces/monitoring/pods/prometheus-prometheus-kube-prometheus-prometheus-0
uid: e2be062f-749d-488e-a6cc-42ef1396851b
spec:
containers:
- args:
- --web.console.templates=/etc/prometheus/consoles
- --web.console.libraries=/etc/prometheus/console_libraries
- --config.file=/etc/prometheus/config_out/prometheus.env.yaml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention.time=10d
- --web.enable-lifecycle
- --storage.tsdb.no-lockfile
- --web.external-url=http://prometheus-kube-prometheus-prometheus.monitoring:9090
- --web.route-prefix=/
image: quay.io/prometheus/prometheus:v2.26.0
imagePullPolicy: IfNotPresent
name: prometheus
ports:
- containerPort: 9090
name: web
protocol: TCP
readinessProbe:
failureThreshold: 120
httpGet:
path: /-/ready
port: web
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /etc/prometheus/config_out
name: config-out
readOnly: true
- mountPath: /etc/prometheus/certs
name: tls-assets
readOnly: true
- mountPath: /prometheus
name: prometheus-prometheus-kube-prometheus-prometheus-db
- mountPath: /etc/prometheus/rules/prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
name: prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: prometheus-kube-prometheus-prometheus-token-mh66q
readOnly: true
- args:
- --listen-address=:8080
- --reload-url=http://localhost:9090/-/reload
- --config-file=/etc/prometheus/config/prometheus.yaml.gz
- --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml
- --watched-dir=/etc/prometheus/rules/prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
command:
- /bin/prometheus-config-reloader
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: SHARD
value: "0"
image: quay.io/prometheus-operator/prometheus-config-reloader:v0.47.0
imagePullPolicy: IfNotPresent
name: config-reloader
ports:
- containerPort: 8080
name: reloader-web
protocol: TCP
resources:
limits:
cpu: 100m
memory: 50Mi
requests:
cpu: 100m
memory: 50Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: FallbackToLogsOnError
volumeMounts:
- mountPath: /etc/prometheus/config
name: config
- mountPath: /etc/prometheus/config_out
name: config-out
- mountPath: /etc/prometheus/rules/prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
name: prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: prometheus-kube-prometheus-prometheus-token-mh66q
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
hostname: prometheus-prometheus-kube-prometheus-prometheus-0
nodeName: ip-10-1-49-45.ec2.internal
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 2000
runAsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccount: prometheus-kube-prometheus-prometheus
serviceAccountName: prometheus-kube-prometheus-prometheus
subdomain: prometheus-operated
terminationGracePeriodSeconds: 600
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- name: config
secret:
defaultMode: 420
secretName: prometheus-prometheus-kube-prometheus-prometheus
- name: tls-assets
secret:
defaultMode: 420
secretName: prometheus-prometheus-kube-prometheus-prometheus-tls-assets
- emptyDir: {}
name: config-out
- configMap:
defaultMode: 420
name: prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
name: prometheus-prometheus-kube-prometheus-prometheus-rulefiles-0
- emptyDir: {}
name: prometheus-prometheus-kube-prometheus-prometheus-db
- name: prometheus-kube-prometheus-prometheus-token-mh66q
secret:
defaultMode: 420
secretName: prometheus-kube-prometheus-prometheus-token-mh66q
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2021-04-30T16:39:14Z"
status: "True"
type: PodScheduled
phase: Pending
qosClass: Burstable
If someone needs to know the answer, in my case(the above situation) there were 2 Prometheus operators running in different different namespace, 1 in default & another monitoring namespace. so I removed the one from the default namespace and it resolved my pods crashing issue.

Unable to deploy mongodb community operator in openshift

I'm trying to deploy the mongodb community operator in openshift 3.11, using the following commands:
git clone https://github.com/mongodb/mongodb-kubernetes-operator.git
cd mongodb-kubernetes-operator
oc new-project mongodb
oc create -f deploy/crds/mongodb.com_mongodb_crd.yaml -n mongodb
oc create -f deploy/operator/role.yaml -n mongodb
oc create -f deploy/operator/role_binding.yaml -n mongodb
oc create -f deploy/operator/service_account.yaml -n mongodb
oc apply -f deploy/openshift/operator_openshift.yaml -n mongodb
oc apply -f deploy/crds/mongodb.com_v1_mongodb_openshift_cr.yaml -n mongodb
Operator pod is successfully running, but the mongodb replicaset pods do not spin up. Error is as follows:
[kubenode#master mongodb-kubernetes-operator]$ oc get pods
NAME READY STATUS RESTARTS AGE
example-openshift-mongodb-0 1/2 CrashLoopBackOff 4 2m
mongodb-kubernetes-operator-66bfcbcf44-9xvj7 1/1 Running 0 2m
[kubenode#master mongodb-kubernetes-operator]$ oc logs -f example-openshift-mongodb-0 -c mongodb-agent
panic: Failed to get current user: user: unknown userid 1000510000
goroutine 1 [running]:
com.tengen/cm/util.init.3()
/data/mci/2f46ec94982c5440960d2b2bf2b6ae15/mms-automation/build/go-dependencies/src/com.tengen/cm/util/user.go:14 +0xe5
I have gone through all the issues raised on the mongodb-kubernetes-operator repository which are related to this issue (reference), and found a suggestion to set the MANAGED_SECURITY_CONTEXT environment variable to true in the operator, mongodb and mongodb-agent containers.
I have done so for all of these containers, but am still facing the same issue.
Here is the confirmation that the environment variables are correctly set:
[kubenode#master mongodb-kubernetes-operator]$ oc set env statefulset.apps/example-openshift-mongodb --list
# statefulsets/example-openshift-mongodb, container mongodb-agent
AGENT_STATUS_FILEPATH=/var/log/mongodb-mms-automation/healthstatus/agent-health-status.json
AUTOMATION_CONFIG_MAP=example-openshift-mongodb-config
HEADLESS_AGENT=true
MANAGED_SECURITY_CONTEXT=true
# POD_NAMESPACE from field path metadata.namespace
# statefulsets/example-openshift-mongodb, container mongod
AGENT_STATUS_FILEPATH=/healthstatus/agent-health-status.json
MANAGED_SECURITY_CONTEXT=true
[kubenode#master mongodb-kubernetes-operator]$ oc set env deployment.apps/mongodb-kubernetes-operator --list
# deployments/mongodb-kubernetes-operator, container mongodb-kubernetes-operator
# WATCH_NAMESPACE from field path metadata.namespace
# POD_NAME from field path metadata.name
MANAGED_SECURITY_CONTEXT=true
OPERATOR_NAME=mongodb-kubernetes-operator
AGENT_IMAGE=quay.io/mongodb/mongodb-agent:10.19.0.6562-1
VERSION_UPGRADE_HOOK_IMAGE=quay.io/mongodb/mongodb-kubernetes-operator-version-upgrade-post-start-hook:1.0.2
Operator Information
Operator Version: 0.3.0
MongoDB Image used: 4.2.6
Cluster Information
[kubenode#master mongodb-kubernetes-operator]$ openshift version
openshift v3.11.0+62803d0-1
[kubenode#master mongodb-kubernetes-operator]$ kubectl version
Client Version: version.Info{Major:"1", Minor:"11+", GitVersion:"v1.11.0+d4cacc0", GitCommit:"d4cacc0", GitTreeState:"clean", BuildDate:"2018-10-15T09:45:30Z", GoVersion:"go1.10.2", Compiler:"gc", Platform:"linux/amd64"}
Server Version: version.Info{Major:"1", Minor:"11+", GitVersion:"v1.11.0+d4cacc0", GitCommit:"d4cacc0", GitTreeState:"clean", BuildDate:"2020-12-07T17:59:40Z", GoVersion:"go1.10.8", Compiler:"gc", Platform:"linux/amd64"}
Update
When I check the replica pod yaml (see below), I see three occurrences of runAsUser security context set as 1000510000. I'm not sure how, but this is being set even though I'm not setting it manually.
[kubenode#master mongodb-kubernetes-operator]$ oc get -o yaml pod example-openshift-mongodb-0
apiVersion: v1
kind: Pod
metadata:
annotations:
openshift.io/scc: restricted
creationTimestamp: 2021-01-19T07:45:05Z
generateName: example-openshift-mongodb-
labels:
app: example-openshift-mongodb-svc
controller-revision-hash: example-openshift-mongodb-6549495b
statefulset.kubernetes.io/pod-name: example-openshift-mongodb-0
name: example-openshift-mongodb-0
namespace: mongodb
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: StatefulSet
name: example-openshift-mongodb
uid: 3e91eb40-5a2a-11eb-a5e0-0050569b1f59
resourceVersion: "15616863"
selfLink: /api/v1/namespaces/mongodb/pods/example-openshift-mongodb-0
uid: 3ea17a28-5a2a-11eb-a5e0-0050569b1f59
spec:
containers:
- command:
- agent/mongodb-agent
- -cluster=/var/lib/automation/config/cluster-config.json
- -skipMongoStart
- -noDaemonize
- -healthCheckFilePath=/var/log/mongodb-mms-automation/healthstatus/agent-health-status.json
- -serveStatusPort=5000
- -useLocalMongoDbTools
env:
- name: AGENT_STATUS_FILEPATH
value: /var/log/mongodb-mms-automation/healthstatus/agent-health-status.json
- name: AUTOMATION_CONFIG_MAP
value: example-openshift-mongodb-config
- name: HEADLESS_AGENT
value: "true"
- name: MANAGED_SECURITY_CONTEXT
value: "true"
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: quay.io/mongodb/mongodb-agent:10.19.0.6562-1
imagePullPolicy: Always
name: mongodb-agent
readinessProbe:
exec:
command:
- /var/lib/mongodb-mms-automation/probes/readinessprobe
failureThreshold: 60
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources: {}
securityContext:
capabilities:
drop:
- KILL
- MKNOD
- SETGID
- SETUID
runAsUser: 1000510000
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/automation/config
name: automation-config
readOnly: true
- mountPath: /data
name: data-volume
- mountPath: /var/lib/mongodb-mms-automation/authentication
name: example-openshift-mongodb-agent-scram-credentials
- mountPath: /var/log/mongodb-mms-automation/healthstatus
name: healthstatus
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: mongodb-kubernetes-operator-token-lr9l4
readOnly: true
- command:
- /bin/sh
- -c
- |2
# run post-start hook to handle version changes
/hooks/version-upgrade
# wait for config to be created by the agent
while [ ! -f /data/automation-mongod.conf ]; do sleep 3 ; done ; sleep 2 ;
# start mongod with this configuration
exec mongod -f /data/automation-mongod.conf ;
env:
- name: AGENT_STATUS_FILEPATH
value: /healthstatus/agent-health-status.json
- name: MANAGED_SECURITY_CONTEXT
value: "true"
image: mongo:4.2.6
imagePullPolicy: IfNotPresent
name: mongod
resources: {}
securityContext:
capabilities:
drop:
- KILL
- MKNOD
- SETGID
- SETUID
runAsUser: 1000510000
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data-volume
- mountPath: /var/lib/mongodb-mms-automation/authentication
name: example-openshift-mongodb-agent-scram-credentials
- mountPath: /healthstatus
name: healthstatus
- mountPath: /hooks
name: hooks
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: mongodb-kubernetes-operator-token-lr9l4
readOnly: true
dnsPolicy: ClusterFirst
hostname: example-openshift-mongodb-0
imagePullSecrets:
- name: mongodb-kubernetes-operator-dockercfg-jhplw
initContainers:
- command:
- cp
- version-upgrade-hook
- /hooks/version-upgrade
image: quay.io/mongodb/mongodb-kubernetes-operator-version-upgrade-post-start-hook:1.0.2
imagePullPolicy: Always
name: mongod-posthook
resources: {}
securityContext:
capabilities:
drop:
- KILL
- MKNOD
- SETGID
- SETUID
runAsUser: 1000510000
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /hooks
name: hooks
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: mongodb-kubernetes-operator-token-lr9l4
readOnly: true
nodeName: node1.192.168.27.116.nip.io
nodeSelector:
node-role.kubernetes.io/compute: "true"
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 1000510000
seLinuxOptions:
level: s0:c23,c2
serviceAccount: mongodb-kubernetes-operator
serviceAccountName: mongodb-kubernetes-operator
subdomain: example-openshift-mongodb-svc
terminationGracePeriodSeconds: 30
volumes:
- name: data-volume
persistentVolumeClaim:
claimName: data-volume-example-openshift-mongodb-0
- name: automation-config
secret:
defaultMode: 416
secretName: example-openshift-mongodb-config
- name: example-openshift-mongodb-agent-scram-credentials
secret:
defaultMode: 384
secretName: example-openshift-mongodb-agent-scram-credentials
- emptyDir: {}
name: healthstatus
- emptyDir: {}
name: hooks
- name: mongodb-kubernetes-operator-token-lr9l4
secret:
defaultMode: 420
secretName: mongodb-kubernetes-operator-token-lr9l4
status:
conditions:
- lastProbeTime: null
lastTransitionTime: 2021-01-19T07:46:45Z
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: 2021-01-19T07:46:39Z
message: 'containers with unready status: [mongodb-agent]'
reason: ContainersNotReady
status: "False"
type: Ready
- lastProbeTime: null
lastTransitionTime: null
message: 'containers with unready status: [mongodb-agent]'
reason: ContainersNotReady
status: "False"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: 2021-01-19T07:45:05Z
status: "True"
type: PodScheduled
containerStatuses:
- containerID: docker://bd3ede9178bb78267bc19d1b5da0915d3bcd1d4dcee3e142c7583424bd2aa777
image: docker.io/mongo:4.2.6
imageID: docker-pullable://docker.io/mongo#sha256:c880f6b56f443bb4d01baa759883228cd84fa8d78fa1a36001d1c0a0712b5a07
lastState: {}
name: mongod
ready: true
restartCount: 0
state:
running:
startedAt: 2021-01-19T07:46:55Z
- containerID: docker://5e39c0b6269b8231bbf9cabb4ff3457d9f91e878eff23953e318a9475fb8a90e
image: quay.io/mongodb/mongodb-agent:10.19.0.6562-1
imageID: docker-pullable://quay.io/mongodb/mongodb-agent#sha256:790c2670ef7cefd61cfaabaf739de16dbd2e07dc3b539add0da21ab7d5ac7626
lastState:
terminated:
containerID: docker://5e39c0b6269b8231bbf9cabb4ff3457d9f91e878eff23953e318a9475fb8a90e
exitCode: 2
finishedAt: 2021-01-19T19:39:58Z
reason: Error
startedAt: 2021-01-19T19:39:58Z
name: mongodb-agent
ready: false
restartCount: 144
state:
waiting:
message: Back-off 5m0s restarting failed container=mongodb-agent pod=example-openshift-mongodb-0_mongodb(3ea17a28-5a2a-11eb-a5e0-0050569b1f59)
reason: CrashLoopBackOff
hostIP: 192.168.27.116
initContainerStatuses:
- containerID: docker://7c31cef2a68e3e6100c2cc9c83e3780313f1e8ab43bebca79ad4d48613f124bd
image: quay.io/mongodb/mongodb-kubernetes-operator-version-upgrade-post-start-hook:1.0.2
imageID: docker-pullable://quay.io/mongodb/mongodb-kubernetes-operator-version-upgrade-post-start-hook#sha256:e99105b1c54e12913ddaf470af8025111a6e6e4c8917fc61be71d1bc0328e7d7
lastState: {}
name: mongod-posthook
ready: true
restartCount: 0
state:
terminated:
containerID: docker://7c31cef2a68e3e6100c2cc9c83e3780313f1e8ab43bebca79ad4d48613f124bd
exitCode: 0
finishedAt: 2021-01-19T07:46:45Z
reason: Completed
startedAt: 2021-01-19T07:46:44Z
phase: Running
podIP: 10.129.0.119
qosClass: BestEffort
startTime: 2021-01-19T07:46:39Z

Why Istio "Authentication Policy" Example Page isn't working as expected?

The article here: https://istio.io/docs/tasks/security/authn-policy/
Specifically, when I follow the instruction on the Setup section, I can't connect any httpbin that are residing in namespace foo and bar. But the legacy's one is okay. I expect there is something wrong in the side car proxy being installed.
Here is the output of httpbin pod yaml file (after being injected with istioctl kubeinject --includeIPRanges "10.32.0.0/16" command). I use --includeIPRanges so that the pod can communicate with external ip (for my debugging purpose to install dnsutils, etc package)
apiVersion: v1
kind: Pod
metadata:
annotations:
sidecar.istio.io/inject: "true"
sidecar.istio.io/status: '{"version":"4120ea817406fd7ed43b7ecf3f2e22abe453c44d3919389dcaff79b210c4cd86","initContainers":["istio-init"],"containers":["istio-proxy"],"volumes":["istio-envoy","istio-certs"],"imagePullSecrets":null}'
creationTimestamp: 2018-08-15T11:40:59Z
generateName: httpbin-8b9cf99f5-
labels:
app: httpbin
pod-template-hash: "465795591"
version: v1
name: httpbin-8b9cf99f5-9c47z
namespace: foo
ownerReferences:
- apiVersion: extensions/v1beta1
blockOwnerDeletion: true
controller: true
kind: ReplicaSet
name: httpbin-8b9cf99f5
uid: 1450d75d-a080-11e8-aece-42010a940168
resourceVersion: "65722138"
selfLink: /api/v1/namespaces/foo/pods/httpbin-8b9cf99f5-9c47z
uid: 1454b68d-a080-11e8-aece-42010a940168
spec:
containers:
- image: docker.io/citizenstig/httpbin
imagePullPolicy: IfNotPresent
name: httpbin
ports:
- containerPort: 8000
protocol: TCP
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: default-token-pkpvf
readOnly: true
- args:
- proxy
- sidecar
- --configPath
- /etc/istio/proxy
- --binaryPath
- /usr/local/bin/envoy
- --serviceCluster
- httpbin
- --drainDuration
- 45s
- --parentShutdownDuration
- 1m0s
- --discoveryAddress
- istio-pilot.istio-system:15007
- --discoveryRefreshDelay
- 1s
- --zipkinAddress
- zipkin.istio-system:9411
- --connectTimeout
- 10s
- --statsdUdpAddress
- istio-statsd-prom-bridge.istio-system.istio-system:9125
- --proxyAdminPort
- "15000"
- --controlPlaneAuthPolicy
- NONE
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: INSTANCE_IP
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: ISTIO_META_POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: ISTIO_META_INTERCEPTION_MODE
value: REDIRECT
image: docker.io/istio/proxyv2:1.0.0
imagePullPolicy: IfNotPresent
name: istio-proxy
resources:
requests:
cpu: 10m
securityContext:
privileged: false
readOnlyRootFilesystem: true
runAsUser: 1337
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/istio/proxy
name: istio-envoy
- mountPath: /etc/certs/
name: istio-certs
readOnly: true
dnsPolicy: ClusterFirst
initContainers:
- args:
- -p
- "15001"
- -u
- "1337"
- -m
- REDIRECT
- -i
- 10.32.0.0/16
- -x
- ""
- -b
- 8000,
- -d
- ""
image: docker.io/istio/proxy_init:1.0.0
imagePullPolicy: IfNotPresent
name: istio-init
resources: {}
securityContext:
capabilities:
add:
- NET_ADMIN
privileged: true
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
nodeName: gke-tvlk-data-dev-default-medium-pool-46397778-q2sb
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- name: default-token-pkpvf
secret:
defaultMode: 420
secretName: default-token-pkpvf
- emptyDir:
medium: Memory
name: istio-envoy
- name: istio-certs
secret:
defaultMode: 420
optional: true
secretName: istio.default
status:
conditions:
- lastProbeTime: null
lastTransitionTime: 2018-08-15T11:41:01Z
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: 2018-08-15T11:44:28Z
status: "True"
type: Ready
- lastProbeTime: null
lastTransitionTime: 2018-08-15T11:40:59Z
status: "True"
type: PodScheduled
containerStatuses:
- containerID: docker://758e130a4c31a15c1b8bc1e1f72bd7739d5fa1103132861eea9ae1a6ae1f080e
image: citizenstig/httpbin:latest
imageID: docker-pullable://citizenstig/httpbin#sha256:b81c818ccb8668575eb3771de2f72f8a5530b515365842ad374db76ad8bcf875
lastState: {}
name: httpbin
ready: true
restartCount: 0
state:
running:
startedAt: 2018-08-15T11:41:01Z
- containerID: docker://9c78eac46a99457f628493975f5b0c5bbffa1dac96dab5521d2efe4143219575
image: istio/proxyv2:1.0.0
imageID: docker-pullable://istio/proxyv2#sha256:77915a0b8c88cce11f04caf88c9ee30300d5ba1fe13146ad5ece9abf8826204c
lastState:
terminated:
containerID: docker://52299a80a0fa8949578397357861a9066ab0148ac8771058b83e4c59e422a029
exitCode: 255
finishedAt: 2018-08-15T11:44:27Z
reason: Error
startedAt: 2018-08-15T11:41:02Z
name: istio-proxy
ready: true
restartCount: 1
state:
running:
startedAt: 2018-08-15T11:44:28Z
hostIP: 10.32.96.27
initContainerStatuses:
- containerID: docker://f267bb44b70d2d383ce3f9943ab4e917bb0a42ecfe17fe0ed294bde4d8284c58
image: istio/proxy_init:1.0.0
imageID: docker-pullable://istio/proxy_init#sha256:345c40053b53b7cc70d12fb94379e5aa0befd979a99db80833cde671bd1f9fad
lastState: {}
name: istio-init
ready: true
restartCount: 0
state:
terminated:
containerID: docker://f267bb44b70d2d383ce3f9943ab4e917bb0a42ecfe17fe0ed294bde4d8284c58
exitCode: 0
finishedAt: 2018-08-15T11:41:00Z
reason: Completed
startedAt: 2018-08-15T11:41:00Z
phase: Running
podIP: 10.32.19.61
qosClass: Burstable
startTime: 2018-08-15T11:40:59Z
Here is the example command when I got the error sleep.legacy -> httpbin.foo
> kubectl exec $(kubectl get pod -l app=sleep -n legacy -o jsonpath={.items..metadata.name}) -c sleep -n legacy -- curl http://httpbin.foo:8000/ip -s -o /dev/null -w "%{http_code}\n"
000
command terminated with exit code 7
** Here is the example command when I get success status: sleep.legacy -> httpbin.legacy **
> kubectl exec $(kubectl get pod -l app=sleep -n legacy -o jsonpath={.items..metadata.name}) -csleep -n legacy -- curl http://httpbin.legacy:8000/ip -s -o /dev/null -w "%{http_code}\n"
200
I have followed the instruction to ensure there is no mtls policy defined, etc.
> kubectl get policies.authentication.istio.io --all-namespaces
No resources found.
> kubectl get meshpolicies.authentication.istio.io
No resources found.
> kubectl get destinationrules.networking.istio.io --all-namespaces -o yaml | grep "host:"
host: istio-policy.istio-system.svc.cluster.local
host: istio-telemetry.istio-system.svc.cluster.local
NVM, I think I found why. There is configuration being messed up in my part.
If you take a look at the statsd address, it is defined with unrecognized hostname istio-statsd-prom-bridge.istio-system.istio-system:9125. I noticed that after looking at the proxy container being restarted/crashed multiple times.

RabbitMQ nodes not able to discover each other and join cluster

I'm new to RabbitMQ and trying to setup a Highly Available Queue using statefulsets. The tutorial I followed is here
After deploying the statefulset and service to kubernetes,
The nodes are not able to discover each other in the cluster and the pod goes to Status: CrashLoopBackOff. It seems the Peer Discovery is not working as expected and the node is not able to join the cluster.
My cluster nodes are
rabbit#rabbitmq-0, rabbit#rabbitmq-1 and rabbit#rabbitmq-2
$ kubectl exec -it rabbitmq-0 /bin/sh
/ # rabbitmqctl status
Status of node 'rabbit#rabbitmq-0'
Error: unable to connect to node 'rabbit#rabbitmq-0': nodedown
DIAGNOSTICS
===========
attempted to contact: ['rabbit#rabbitmq-0']
rabbit#rabbitmq-0:
* connected to epmd (port 4369) on rabbitmq-0
* epmd reports: node 'rabbit' not running at all
no other nodes on rabbitmq-0
* suggestion: start the node
current node details:
- node name: 'rabbitmq-cli-22#rabbitmq-0'
- home dir: /var/lib/rabbitmq
- cookie hash: 5X3n5Gy+r4FL+M53FHwv3w==
rabbitmq.conf
{ rabbit, [
{ loopback_users, [ ] },
{ tcp_listeners, [ 5672 ] },
{ ssl_listeners, [ ] },
{ hipe_compile, false },
{ cluster_nodes, { [ rabbit#rabbitmq-0, rabbit#rabbitmq-1, rabbit#rabbitmq-2], disc } },
{ssl_listeners, [5671]},
{ssl_options, [{cacertfile,"/etc/rabbitmq/ca_certificate.pem"},
{certfile,"/etc/rabbitmq/server_certificate.pem"},
{keyfile,"/etc/rabbitmq/server_key.pem"},
{verify,verify_peer},
{versions, ['tlsv1.2', 'tlsv1.1']}
{fail_if_no_peer_cert,false}]}
] },
{ rabbitmq_management, [ { listener, [
{ port, 15672 },
{ ssl, false }
] } ] }
].
$ kubectl get statefulset rabbitmq
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app: rabbitmq
name: rabbitmq
namespace: development
resourceVersion: "119265565"
selfLink: /apis/apps/v1/namespaces/development/statefulsets/rabbitmq
uid: 10c2fabc-cbb3-11e7-8821-00505695519e
spec:
podManagementPolicy: OrderedReady
replicas: 3
revisionHistoryLimit: 10
selector:
matchLabels:
app: rabbitmq
serviceName: rabbitmq
template:
metadata:
creationTimestamp: null
labels:
app: rabbitmq
spec:
containers:
- env:
- name: RABBITMQ_ERLANG_COOKIE
valueFrom:
secretKeyRef:
key: rabbitmq-erlang-cookie
name: rabbitmq-erlang-cookie
image: rabbitmq:1.0
imagePullPolicy: IfNotPresent
lifecycle:
postStart:
exec:
command:
- /bin/sh
- -c
- |
if [ -z "$(grep rabbitmq /etc/resolv.conf)" ]; then
sed "s/^search \([^ ]\+\)/search rabbitmq.\1 \1/" /etc/resolv.conf > /etc/resolv.conf.new;
cat /etc/resolv.conf.new > /etc/resolv.conf;
rm /etc/resolv.conf.new;
fi; until rabbitmqctl node_health_check; do sleep 1; done; if [[ "$HOSTNAME" != "rabbitmq-0" && -z "$(rabbitmqctl cluster_status | grep rabbitmq-0)" ]]; then
rabbitmqctl stop_app;
rabbitmqctl join_cluster rabbit#rabbitmq-0;
rabbitmqctl start_app;
fi; rabbitmqctl set_policy ha-all "." '{"ha-mode":"exactly","ha-params":3,"ha-sync-mode":"automatic"}'
name: rabbitmq
ports:
- containerPort: 5672
protocol: TCP
- containerPort: 5671
protocol: TCP
- containerPort: 15672
protocol: TCP
- containerPort: 25672
protocol: TCP
- containerPort: 4369
protocol: TCP
resources:
limits:
cpu: 400m
memory: 2Gi
requests:
cpu: 200m
memory: 1Gi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/lib/rabbitmq
name: rabbitmq-persistent-data-storage
- mountPath: /etc/rabbitmq
name: rabbitmq-config
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 10
volumes:
- name: rabbitmq-config
secret:
defaultMode: 420
secretName: rabbitmq-config
updateStrategy:
type: OnDelete
volumeClaimTemplates:
- metadata:
creationTimestamp: null
name: rabbitmq-persistent-data-storage
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Gi
status:
phase: Pending
status:
currentReplicas: 1
currentRevision: rabbitmq-4234207235
observedGeneration: 1
replicas: 1
updateRevision: rabbitmq-4234207235
$ kubectl get service rabbitmq
apiVersion: v1
kind: Service
metadata:
labels:
app: rabbitmq
name: rabbitmq
namespace: develop
resourceVersion: "59968950"
selfLink: /api/v1/namespaces/develop/services/rabbitmq
uid: ced85a60-cbae-11e7-8821-00505695519e
spec:
clusterIP: None
ports:
- name: tls-amqp
port: 5671
protocol: TCP
targetPort: 5671
- name: management
port: 15672
protocol: TCP
targetPort: 15672
selector:
app: rabbitmq
sessionAffinity: None
type: ClusterIP
status:
loadBalancer: {}
$ kubectl describe pod rabbitmq-0
Name: rabbitmq-0
Namespace: development
Node: node9/170.XX.X.Xx
Labels: app=rabbitmq
controller-revision-hash=rabbitmq-4234207235
Status: Running
IP: 10.25.128.XX
Controlled By: StatefulSet/rabbitmq
Containers:
rabbitmq:
Container ID: docker://f60b06283d3974382a068ded54782b24de4b6da3203c05772a77c65d76aa2e2f
Image: rabbitmq:1.0
Image ID: rabbitmq#sha256:6245a81a1fc0fb
Ports: 5672/TCP, 5671/TCP, 15672/TCP, 25672/TCP, 4369/TCP
State: Waiting
Reason: CrashLoopBackOff
Last State: Terminated
Reason: Completed
Exit Code: 0
Ready: False
Restart Count: 104
Limits:
cpu: 400m
memory: 2Gi
Requests:
cpu: 200m
memory: 1Gi
Environment:
RABBITMQ_ERLANG_COOKIE: <set to the key 'rabbitmq-erlang-cookie' in secret 'rabbitmq-erlang-cookie'> Optional: false
Mounts:
/etc/rabbitmq from rabbitmq-config (rw)
/var/lib/rabbitmq from rabbitmq-persistent-data-storage (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-lqbp6 (ro)
Conditions:
Type Status
Initialized True
Ready False
PodScheduled True
Volumes:
rabbitmq-persistent-data-storage:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: rabbitmq-persistent-data-storage-rabbitmq-0
ReadOnly: false
rabbitmq-config:
Type: Secret (a volume populated by a Secret)
SecretName: rabbitmq-config
Optional: false
default-token-lqbp6:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-lqbp6
Optional: false
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: <none>
Events: <none>
This problem is due to failed DNS resolution happening inside the Pod. The pods are not able to contact each other due to no valid DNS records.
In order to solve this, please try creating additional service, or edit an existing one to handle DNS resolution for this.
Creating an additional service for DNS probe, can be done as follows :
kind: Service
apiVersion: v1
metadata:
namespace: default
name: rabbitmq
labels:
app: rabbitmq
type: Service
spec:
ports:
- name: http
protocol: TCP
port: 15672
targetPort: 15672
- name: amqp
protocol: TCP
port: 5672
targetPort: 5672
selector:
app: rabbitmq
type: ClusterIP
clusterIP: None
Here you mention in the Service spec that it is of type ClusterIP with clusterIP as none. This should help pods resolve the DNS.
Cheers!!
Rishabh

Openshift/Kubernetes volumes_from

I'm trying to mimic volumes_from using Openshift/Kubernetes. I have container A packaging the app and container B server the packaged app.
I cannot use init containers since I'm stuck on kubernetes 1.2.
I've tried the postStart lifecycle hook, detailed here:
How to mimic '--volumes-from' in Kubernetes
But, Openshift/Kubernetes is always complaining that container A is contantly crashing because once it's done packaging, it exits.
How do I get Openshift/Kubernetes to stop complaining about container A crashing and just accept that it finished it's job?
Or is there another way of having one container build a package for another container to run?
Thanks in advance for your time.
Update 1:
I don't have kubectl, but using oc describe pod myapp-2-prehook:
me:~/Projects/myapp (master) $ oc describe pod myapp-2-prehook
Name: myapp-2-prehook
Namespace: myproject
Node: my.host/my.ip
Start Time: Tue, 01 Nov 2016 15:30:55 -1000
Labels: openshift.io/deployer-pod-for.name=myapp-2
Status: Failed
IP:
Controllers: <none>
Containers:
lifecycle:
Container ID: docker://97a5272ebfa56f0c40fdc95094f13da06dba889049f2cc964fe3e89f61bd7792
Image: my.ip:5000/myproject/myapp#sha256:cde5739c5f2bdc8c25b1dd514f698c543cfb6c8b68c3f1afbc7760e11597fde9
Image ID: docker://3be476fec505e5b979bac69d327d4ffb53b3f568e85547c5b66c229948435f44
Port:
Command:
scripts/build.sh
QoS Tier:
cpu: BestEffort
memory: BestEffort
State: Terminated
Reason: Error
Exit Code: 1
Started: Tue, 01 Nov 2016 15:31:21 -1000
Finished: Tue, 01 Nov 2016 15:31:42 -1000
Ready: False
Restart Count: 0
Environment Variables:
CUSTOM_VAR1: custom_value1
OPENSHIFT_DEPLOYMENT_NAME: myapp
OPENSHIFT_DEPLOYMENT_NAMESPACE: myproject
Conditions:
Type Status
Ready False
Volumes:
default-token-goe98:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-goe98
No events.
Output of oc get pod assessor-2-prehook -o yaml:
apiVersion: v1
kind: Pod
metadata:
annotations:
openshift.io/deployment.name: myapp-2
openshift.io/scc: restricted
creationTimestamp: 2016-11-02T01:30:55Z
labels:
openshift.io/deployer-pod-for.name: myapp-2
name: myapp-2-prehook
namespace: myproject
resourceVersion: "21512896"
selfLink: /api/v1/namespaces/myproject/pods/myapp-2-prehook
uid: ffcb7766-a09b-11e6-9053-005056a65cf8
spec:
activeDeadlineSeconds: 21600
containers:
- command:
- scripts/build.sh
env:
- name: CUSTOM_VAR1
value: custom_value1
- name: OPENSHIFT_DEPLOYMENT_NAME
value: myapp-2
- name: OPENSHIFT_DEPLOYMENT_NAMESPACE
value: myproject
image: my.ip:5000/myproject/myapp#sha256:cde5739c5f2bdc8c25b1dd514f698c543cfb6c8b68c3f1afbc7760e11597fde9
imagePullPolicy: IfNotPresent
name: lifecycle
resources: {}
securityContext:
privileged: false
seLinuxOptions:
level: s0:c21,c0
terminationMessagePath: /dev/termination-log
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: default-token-goe98
readOnly: true
dnsPolicy: ClusterFirst
host: my.host
imagePullSecrets:
- name: default-dockercfg-srrog
nodeName: my.host
restartPolicy: Never
securityContext:
seLinuxOptions:
level: s0:c21,c0
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
volumes:
- name: default-token-goe98
secret:
secretName: default-token-goe98
status:
conditions:
- lastProbeTime: null
lastTransitionTime: 2016-11-02T01:31:49Z
message: 'containers with unready status: [lifecycle]'
reason: ContainersNotReady
status: "False"
type: Ready
containerStatuses:
- containerID: docker://97a5272ebfa56f0c40fdc95094f13da06dba889049f2cc964fe3e89f61bd7792
image: my.ip:5000/myproject/myapp#sha256:cde5739c5f2bdc8c25b1dd514f698c543cfb6c8b68c3f1afbc7760e11597fde9
imageID: docker://3be476fec505e5b979bac69d327d4ffb53b3f568e85547c5b66c229948435f44
lastState: {}
name: lifecycle
ready: false
restartCount: 0
state:
terminated:
containerID: docker://97a5272ebfa56f0c40fdc95094f13da06dba889049f2cc964fe3e89f61bd7792
exitCode: 1
finishedAt: 2016-11-02T01:31:42Z
reason: Error
startedAt: 2016-11-02T01:31:21Z
hostIP: 128.49.90.62
phase: Failed
startTime: 2016-11-02T01:30:55Z