I try to use the master api to update resources.
In 1.2 to update a deployment resource I'm doing kubectl apply -f new updateddeployment.yaml
How to do the same action with the api?
I checked the code in pkg/kubectl/cmd/apply.go and I think the following lines of code shows what's behind the scene when you run kubectl apply -f:
// Compute a three way strategic merge patch to send to server.
patch, err := strategicpatch.CreateThreeWayMergePatch(original, modified, current,
versionedObject, true)
helper := resource.NewHelper(info.Client, info.Mapping)
_, err = helper.Patch(info.Namespace, info.Name, api.StrategicMergePatchType, patch)
And here is the code helper.Patch:
func (m *Helper) Patch(namespace, name string, pt api.PatchType, data []byte) (runtime.Object, error) {
return m.RESTClient.Patch(pt).
NamespaceIfScoped(namespace, m.NamespaceScoped).
Resource(m.Resource).
Name(name).
Body(data).
Do().
Get()
}
This API is not really convincingly designed, since it forces us to reimplement such basic stuff at the client side...
Anyway, here is my attempt to reinvent the hexagonal wheel in Python...
Python module kube_apply
Usage is like kube_apply.fromYaml(myStuff)
can read strings or opened file streams (via lib Yaml)
handles yaml files with several concatenated objects
implementation is rather braindead and first attempts
to insert the resource. If this fails, it tries a patch,
and if this also fails, it deletes the resource and
inserts it anew.
File: kube_apply.py
#!/usr/bin/python3
# coding: utf-8
# __________ ________________________________________________ #
# kube_apply - apply Yaml similar to kubectl apply -f file.yaml #
# #
# (C) 2019 Hermann Vosseler <Ichthyostega#web.de> #
# This is OpenSource software; licensed under Apache License v2+ #
# ############################################################### #
'''
Utility for the official Kubernetes python client: apply Yaml data.
While still limited to some degree, this utility attempts to provide
functionality similar to `kubectl apply -f`
- load and parse Yaml
- try to figure out the object type and API to use
- figure out if the resource already exists, in which case
it needs to be patched or replaced alltogether.
- otherwise just create a new resource.
Based on inspiration from `kubernetes/utils/create_from_yaml.py`
#since: 2/2019
#author: Ichthyostega
'''
import re
import yaml
import logging
import kubernetes.client
def runUsageExample():
''' demonstrate usage by creating a simple Pod through default client
'''
logging.basicConfig(level=logging.DEBUG)
#
# KUBECONFIG = '/path/to/special/kubecfg.yaml'
# import kubernetes.config
# client = kubernetes.config.new_client_from_config(config_file=KUBECONFIG)
# # --or alternatively--
# kubernetes.config.load_kube_config(config_file=KUBECONFIG)
fromYaml('''
kind: Pod
apiVersion: v1
metadata:
name: dummy-pod
labels:
blow: job
spec:
containers:
- name: sleepr
image: busybox
command:
- /bin/sh
- -c
- sleep 24000
''')
def fromYaml(rawData, client=None, **kwargs):
''' invoke the K8s API to create or replace an object given as YAML spec.
#param rawData: either a string or an opened input stream with a
YAML formatted spec, as you'd use for `kubectl apply -f`
#param client: (optional) preconfigured client environment to use for invocation
#param kwargs: (optional) further arguments to pass to the create/replace call
#return: response object from Kubernetes API call
'''
for obj in yaml.load_all(rawData):
createOrUpdateOrReplace(obj, client, **kwargs)
def createOrUpdateOrReplace(obj, client=None, **kwargs):
''' invoke the K8s API to create or replace a kubernetes object.
The first attempt is to create(insert) this object; when this is rejected because
of an existing object with same name, we attempt to patch this existing object.
As a last resort, if even the patch is rejected, we *delete* the existing object
and recreate from scratch.
#param obj: complete object specification, including API version and metadata.
#param client: (optional) preconfigured client environment to use for invocation
#param kwargs: (optional) further arguments to pass to the create/replace call
#return: response object from Kubernetes API call
'''
k8sApi = findK8sApi(obj, client)
try:
res = invokeApi(k8sApi, 'create', obj, **kwargs)
logging.debug('K8s: %s created -> uid=%s', describe(obj), res.metadata.uid)
except kubernetes.client.rest.ApiException as apiEx:
if apiEx.reason != 'Conflict': raise
try:
# asking for forgiveness...
res = invokeApi(k8sApi, 'patch', obj, **kwargs)
logging.debug('K8s: %s PATCHED -> uid=%s', describe(obj), res.metadata.uid)
except kubernetes.client.rest.ApiException as apiEx:
if apiEx.reason != 'Unprocessable Entity': raise
try:
# second attempt... delete the existing object and re-insert
logging.debug('K8s: replacing %s FAILED. Attempting deletion and recreation...', describe(obj))
res = invokeApi(k8sApi, 'delete', obj, **kwargs)
logging.debug('K8s: %s DELETED...', describe(obj))
res = invokeApi(k8sApi, 'create', obj, **kwargs)
logging.debug('K8s: %s CREATED -> uid=%s', describe(obj), res.metadata.uid)
except Exception as ex:
message = 'K8s: FAILURE updating %s. Exception: %s' % (describe(obj), ex)
logging.error(message)
raise RuntimeError(message)
return res
def patchObject(obj, client=None, **kwargs):
k8sApi = findK8sApi(obj, client)
try:
res = invokeApi(k8sApi, 'patch', obj, **kwargs)
logging.debug('K8s: %s PATCHED -> uid=%s', describe(obj), res.metadata.uid)
return res
except kubernetes.client.rest.ApiException as apiEx:
if apiEx.reason == 'Unprocessable Entity':
message = 'K8s: patch for %s rejected. Exception: %s' % (describe(obj), apiEx)
logging.error(message)
raise RuntimeError(message)
else:
raise
def deleteObject(obj, client=None, **kwargs):
k8sApi = findK8sApi(obj, client)
try:
res = invokeApi(k8sApi, 'delete', obj, **kwargs)
logging.debug('K8s: %s DELETED. uid was: %s', describe(obj), res.details and res.details.uid or '?')
return True
except kubernetes.client.rest.ApiException as apiEx:
if apiEx.reason == 'Not Found':
logging.warning('K8s: %s does not exist (anymore).', describe(obj))
return False
else:
message = 'K8s: deleting %s FAILED. Exception: %s' % (describe(obj), apiEx)
logging.error(message)
raise RuntimeError(message)
def findK8sApi(obj, client=None):
''' Investigate the object spec and lookup the corresponding API object
#param client: (optional) preconfigured client environment to use for invocation
#return: a client instance wired to the apriopriate API
'''
grp, _, ver = obj['apiVersion'].partition('/')
if ver == '':
ver = grp
grp = 'core'
# Strip 'k8s.io', camel-case-join dot separated parts. rbac.authorization.k8s.io -> RbacAuthorzation
grp = ''.join(part.capitalize() for part in grp.rsplit('.k8s.io', 1)[0].split('.'))
ver = ver.capitalize()
k8sApi = '%s%sApi' % (grp, ver)
return getattr(kubernetes.client, k8sApi)(client)
def invokeApi(k8sApi, action, obj, **args):
''' find a suitalbe function and perform the actual API invocation.
#param k8sApi: client object for the invocation, wired to correct API version
#param action: either 'create' (to inject a new objet) or 'replace','patch','delete'
#param obj: the full object spec to be passed into the API invocation
#param args: (optional) extraneous arguments to pass
#return: response object from Kubernetes API call
'''
# transform ActionType from Yaml into action_type for swagger API
kind = camel2snake(obj['kind'])
# determine namespace to place the object in, supply default
try: namespace = obj['metadata']['namespace']
except: namespace = 'default'
functionName = '%s_%s' %(action,kind)
if hasattr(k8sApi, functionName):
# namespace agnostic API
function = getattr(k8sApi, functionName)
else:
functionName = '%s_namespaced_%s' %(action,kind)
function = getattr(k8sApi, functionName)
args['namespace'] = namespace
if not 'create' in functionName:
args['name'] = obj['metadata']['name']
if 'delete' in functionName:
from kubernetes.client.models.v1_delete_options import V1DeleteOptions
obj = V1DeleteOptions()
return function(body=obj, **args)
def describe(obj):
return "%s '%s'" % (obj['kind'], obj['metadata']['name'])
def camel2snake(string):
string = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', string)
string = re.sub('([a-z0-9])([A-Z])', r'\1_\2', string).lower()
return string
if __name__=='__main__':
runUsageExample()
You could install the kubectl binary and invoke it from within your Python program a la:
exec(f"kubectl apply -f - <<EOF{yaml_manifests}EOF --prune")
Once server-side apply is ready this problem should get a bit easier as there will effectively be a k8s API endpoint you can hit (though still doesn't sound like it will be resource-agnostic, i.e. you will still have to PATCH /api/v1/some-k8s-resource specifically, whereas with kubectl apply you can input some heterogenous list of resources).
Related
version info:
python3.7
kubernetes==8.0.0
doc: https://github.com/kubernetes-client/python/tree/release-8.0/kubernetes
I only found the update API, not the redeploy API。
thanks
If you want to partially update an existing deployment use PATCH method. An example below
# create an instance of the API class
api_instance = kubernetes.client.AppsV1Api(kubernetes.client.ApiClient(configuration))
name = 'name_example' # str | name of the Deployment
namespace = 'namespace_example' # str | object name and auth scope, such as for teams and projects
body = NULL # object |
pretty = 'pretty_example' # str | If 'true', then the output is pretty printed. (optional)
dry_run = 'dry_run_example' # str | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed (optional)
try:
api_response = api_instance.patch_namespaced_deployment(name, namespace, body, pretty=pretty, dry_run=dry_run)
pprint(api_response)
except ApiException as e:
print("Exception when calling AppsV1Api->patch_namespaced_deployment: %s\n" % e)
If you want to replace the existing deployment with a new deployment use PUT method. An example below
# create an instance of the API class
api_instance = kubernetes.client.AppsV1Api(kubernetes.client.ApiClient(configuration))
name = 'name_example' # str | name of the Deployment
namespace = 'namespace_example' # str | object name and auth scope, such as for teams and projects
body = kubernetes.client.V1Deployment() # V1Deployment |
pretty = 'pretty_example' # str | If 'true', then the output is pretty printed. (optional)
dry_run = 'dry_run_example' # str | When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed (optional)
try:
api_response = api_instance.replace_namespaced_deployment(name, namespace, body, pretty=pretty, dry_run=dry_run)
pprint(api_response)
except ApiException as e:
print("Exception when calling AppsV1Api->replace_namespaced_deployment: %s\n" % e)
I want to use a function to read inputs file paths from a dataframe and send them to my snakemake rule. I also have a helper function to select the remote from which to pull the files.
from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
from snakemake.remote.SFTP import RemoteProvider as SFTPRemoteProvider
from os.path import join
import pandas as pd
configfile: "config.yaml"
units = pd.read_csv(config["units"]).set_index(["library", "unit"], drop=False)
TMP= join('data', 'tmp')
def access_remote(local_path):
""" Connnects to remote as defined in config file"""
provider = config['provider']
if provider == 'GS':
GS = GSRemoteProvider()
remote_path = GS.remote(join("gs://" + config['bucket'], local_path))
elif provider == 'SFTP':
SFTP = SFTPRemoteProvider(
username=config['user'],
private_key=config['ssh_key']
)
remote_path = SFTP.remote(
config['host'] + ":22" + join(base_path, local_path)
)
else:
remote_path = local_path
return remote_path
def get_fastqs(wc):
"""
Get fastq files (units) of a particular library - sample
combination from the unit sheet.
"""
fqs = units.loc[
(units.library == wc.library) &
(units.libtype == wc.libtype),
"fq1"
]
return {
"r1": list(map(access_remote, fqs.fq1.values)),
}
# Combine all fastq files from the same sample / library type combination
rule combine_units:
input: unpack(get_fastqs)
output:
r1 = join(TMP, "reads", "{library}_{libtype}.end1.fq.gz")
threads: 12
run:
shell("cat {i1} > {o1}".format(i1=input['r1'], o1=output['r1']))
My config file contains the bucket name and provider, which are passed to the function. This works as expected when running simply snakemake.
However, I would like to use the kubernetes integration, which requires passing the provider and bucket name in the command line. But when I run:
snakemake -n --kubernetes --default-remote-provider GS --default-remote-prefix bucket-name
I get this error:
ERROR :: MissingInputException in line 19 of Snakefile:
Missing input files for rule combine_units:
bucket-name/['bucket-name/lib1-unit1.end1.fastq.gz', 'bucket-name/lib1-unit2.end1.fastq.gz', 'bucket-name/lib1-unit3.end1.fastq.gz']
The bucket is applied twice (once mapped correctly to each element, and once before the whole list (which gets converted to a string). Did I miss something ? Is there a good way to work around this ?
I'm attempting to deploy an oracle service bus project to my locally hosted weblogic 12c server in an internet-restricted VM but the tools are indicating my JAR file isn't a valid application file. What is the proper way of building and deploying OSB projects to the weblogic host?
I've attempted to build use both the configjar utility to create jar files as well as exported directly from JDeveloper to a jar file. I've also attempted to use the ant task jwsc to build the osb project, but haven't been successful.
I've attempted to deploy via the deploy() WLST command, the wldeploy ANT task, and the the wldeploy utility tool but they run into an error and quit. I am able to take the .jar file and manually upload it through the service bus console without issue, though.
connect(username, password, adminUrl)
deploy(deploymentName,deploymentFile,targets=deploymentTarget)
startApplication(deploymentName)
I was expecting that the above sample code would deploy the application successfully, but instead the following error code is returned every time:
Deployment Message : weblogic.management.DeploymentException:
[J2EE:160177]The application at
"C:\jdeveloper\mywork\CommonServicesOSB\CrmConnections\test3.jar" was
not recognized as a valid application type. If this is an EAR file,
please ensure the META-INF/application.xml exists. EJB-JARs should
have a META-INF/ejb-jar.xml or corresponding annotations exist. If
this is an exploded WAR, the name of directory must be end with
".war". RARs require a META-INF/ra.xml. A JMS deployment should be an
XML file whose name ends with "-jms.xml". A JDBC deployment should be
an XML file whose name ends with "-jdbc.xml". For other application
types, consult the WebLogic Server documentation.
I'm guessing that I'm missing a crucial file or step, but the documentation I can find hasn't made this any clearer. Does anyone know how this is supposed to work?
In Weblogic 11c i used the following script.
Command to run script:
./oracle_common/common/bin/wlst.sh script.py import.properties path_jar.jar
from java.util import HashMap
from java.util import HashSet
from java.util import ArrayList
from java.io import FileInputStream
from com.bea.wli.sb.util import Refs
from com.bea.wli.config.customization import Customization
from com.bea.wli.sb.management.importexport import ALSBImportOperation
import sys
#=======================================================================================
# Entry function to deploy project configuration and resources
# into a ALSB domain
#=======================================================================================
def importToALSBDomain(importConfigFile, importJarPath):
try:
SessionMBean = None
print 'Loading Deployment config from :', importConfigFile
exportConfigProp = loadProps(importConfigFile)
adminUrl = exportConfigProp.get("adminUrl")
importUser = exportConfigProp.get("importUser")
importPassword = exportConfigProp.get("importPassword")
#importJar = exportConfigProp.get("importJar")
customFile = exportConfigProp.get("customizationFile")
passphrase = exportConfigProp.get("passphrase")
project = exportConfigProp.get("project")
connectToServer(importUser, importPassword, adminUrl)
print 'Attempting to import :', importJarPath, "on ALSB Admin Server listening on :", adminUrl
theBytes = readBinaryFile(importJarPath)
print 'Read file', importJarPath
sessionName = createSessionName()
print 'Created session', sessionName
SessionMBean = getSessionManagementMBean(sessionName)
print 'SessionMBean started session'
ALSBConfigurationMBean = findService(String("ALSBConfiguration.").concat(sessionName), "com.bea.wli.sb.management.configuration.ALSBConfigurationMBean")
print "ALSBConfiguration MBean found", ALSBConfigurationMBean
ALSBConfigurationMBean.uploadJarFile(theBytes)
print 'Jar Uploaded'
if project == None:
print 'No project specified, additive deployment performed'
alsbJarInfo = ALSBConfigurationMBean.getImportJarInfo()
alsbImportPlan = alsbJarInfo.getDefaultImportPlan()
alsbImportPlan.setPassphrase(passphrase)
alsbImportPlan.setPreserveExistingEnvValues(true)
importResult = ALSBConfigurationMBean.importUploaded(alsbImportPlan)
SessionMBean.activateSession(sessionName, "Complete test import with customization using wlst")
else:
print 'ALSB project', project, 'will get overlaid'
alsbJarInfo = ALSBConfigurationMBean.getImportJarInfo()
alsbImportPlan = alsbJarInfo.getDefaultImportPlan()
alsbImportPlan.setPassphrase(passphrase)
operationMap=HashMap()
operationMap = alsbImportPlan.getOperations()
print
print 'Default importPlan'
printOpMap(operationMap)
set = operationMap.entrySet()
alsbImportPlan.setPreserveExistingEnvValues(true)
#boolean
abort = false
#list of created ref
createdRef = ArrayList()
for entry in set:
ref = entry.getKey()
op = entry.getValue()
#set different logic based on the resource type
type = ref.getTypeId
if type == Refs.SERVICE_ACCOUNT_TYPE or type == Refs.SERVICE_PROVIDER_TYPE:
if op.getOperation() == ALSBImportOperation.Operation.Create:
print 'Unable to import a service account or a service provider on a target system', ref
abort = true
elif op.getOperation() == ALSBImportOperation.Operation.Create:
#keep the list of created resources
createdRef.add(ref)
if abort == true :
print 'This jar must be imported manually to resolve the service account and service provider dependencies'
SessionMBean.discardSession(sessionName)
raise
print
print 'Modified importPlan'
printOpMap(operationMap)
importResult = ALSBConfigurationMBean.importUploaded(alsbImportPlan)
printDiagMap(importResult.getImportDiagnostics())
if importResult.getFailed().isEmpty() == false:
print 'One or more resources could not be imported properly'
raise
#customize if a customization file is specified
#affects only the created resources
if customFile != None :
print 'Loading customization File', customFile
print 'Customization applied to the created resources only', createdRef
iStream = FileInputStream(customFile)
customizationList = Customization.fromXML(iStream)
filteredCustomizationList = ArrayList()
setRef = HashSet(createdRef)
# apply a filter to all the customizations to narrow the target to the created resources
for customization in customizationList:
print customization
newcustomization = customization.clone(setRef)
filteredCustomizationList.add(newcustomization)
ALSBConfigurationMBean.customize(filteredCustomizationList)
SessionMBean.activateSession(sessionName, "Complete test import with customization using wlst")
print "Deployment of : " + importJarPath + " successful"
except:
print "Unexpected error:", sys.exc_info()[0]
if SessionMBean != None:
SessionMBean.discardSession(sessionName)
raise
#=======================================================================================
# Utility function to print the list of operations
#=======================================================================================
def printOpMap(map):
set = map.entrySet()
for entry in set:
op = entry.getValue()
print op.getOperation(),
ref = entry.getKey()
print ref
print
#=======================================================================================
# Utility function to print the diagnostics
#=======================================================================================
def printDiagMap(map):
set = map.entrySet()
for entry in set:
diag = entry.getValue().toString()
print diag
print
#=======================================================================================
# Utility function to load properties from a config file
#=======================================================================================
def loadProps(configPropFile):
propInputStream = FileInputStream(configPropFile)
configProps = Properties()
configProps.load(propInputStream)
return configProps
#=======================================================================================
# Connect to the Admin Server
#=======================================================================================
def connectToServer(username, password, url):
connect(username, password, url)
domainRuntime()
#=======================================================================================
# Utility function to read a binary file
#=======================================================================================
def readBinaryFile(fileName):
file = open(fileName, 'rb')
bytes = file.read()
return bytes
#=======================================================================================
# Utility function to create an arbitrary session name
#=======================================================================================
def createSessionName():
sessionName = String("SessionScript"+Long(System.currentTimeMillis()).toString())
return sessionName
#=======================================================================================
# Utility function to load a session MBeans
#=======================================================================================
def getSessionManagementMBean(sessionName):
SessionMBean = findService("SessionManagement", "com.bea.wli.sb.management.configuration.SessionManagementMBean")
SessionMBean.createSession(sessionName)
return SessionMBean
# IMPORT script init
try:
# import the service bus configuration
# argv[1] is the export config properties file
importToALSBDomain(sys.argv[1], sys.argv[2])
except:
print "Unexpected error: ", sys.exc_info()[0]
dumpStack()
raise
and with the following import.properties file:
##################################################################
# OSB Admin Configuration #
##################################################################
adminUrl=t3://localhost:7001
importUser=weblogic
importPassword=weblogic89
i have docker image containing python files which should download satellite imageries from scihub website. The docker image is working fine. Now when i want to create the deployment thorugh kubectl so that i can expose it as a service, its's container keeps on crashing. That's what the pod description says when seen through kubectl describe pod.
this is how i am trying to deploy sudo kubectl run back --image=back:latest --port=8080 --image-pull-policy Never. i also tried changing the port but it did not work. Here are the files within docker image.
Docker File
FROM python:3.7-stretch
COPY . /code
WORKDIR /code
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "ingestion.py"]
** ingestion **
import os
import shutil
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger("ingestion")
import requests
import datahub
scihub_username = os.environ["scihub_username"]
scihub_password = os.environ["scihub_password"]
result_url = "http://" + os.environ["CDINRW_BASE_URL"] + "/jobs/" + os.environ["CDINRW_JOB_ID"] + "/results"
logger.info("Searching the Copernicus Open Access Hub")
scenes = datahub.search(username=scihub_username,
password=scihub_password,
producttype=os.getenv("producttype"),
platformname=os.getenv("platformname"),
days_back=os.getenv("days_back", 2),
footprint=os.getenv("footprint"),
max_cloud_cover_percentage=os.getenv("max_cloud_cover_percentage"),
start_date = os.getenv("start_date"),
end_date = os.getenv("end_date"))
logger.info("Found {} relevant scenes".format(len(scenes)))
job_results = []
for scene in scenes:
# do not donwload a scene that has already been ingested
if os.path.exists(os.path.join("/out_data", scene["title"]+".SAFE")):
logger.info("The scene {} already exists in /out_data and will not be downloaded again.".format(scene["title"]))
filename = scene["title"]+".SAFE"
else:
logger.info("Starting the download of scene {}".format(scene["title"]))
filename = datahub.download(scene, "/tmp", scihub_username, scihub_password, unpack=True)
logger.info("The download was successful.")
shutil.move(filename, "/out_data")
result_message = {"description": "test",
"type": "Raster",
"format": "SAFE",
"filename": os.path.basename(filename)}
job_results.append(result_message)
res = requests.put(result_url, json=job_results, timeout=60)
res.raise_for_status()
*datahub
import logging
import os
import urllib.parse
import zipfile
import requests
# constructing URLs for querying the data hub
_BASE_URL = "https://scihub.copernicus.eu/dhus/"
SITE = {}
SITE["SEARCH"] = _BASE_URL + "search?format=xml&sortedby=beginposition&order=desc&rows=100&start={offset}&q="
_PRODUCT_URL = _BASE_URL + "odata/v1/Products('{uuid}')/"
SITE["CHECKSUM"] = _PRODUCT_URL + "Checksum/Value/$value"
SITE["SAFEZIP"] = _PRODUCT_URL + "$value"
logger = logging.getLogger(__name__)
def _build_search_url(producttype=None, platformname=None, days_back=2, footprint=None, max_cloud_cover_percentage=None, start_date=None, end_date=None):
search_terms = []
if producttype:
search_terms.append("producttype:{}".format(producttype))
if platformname:
search_terms.append("platformname:{}".format(platformname))
if start_date and end_date:
search_terms.append(
"beginPosition:[{}+TO+{}]".format(start_date, end_date))
elif days_back:
search_terms.append(
"beginPosition:[NOW-{}DAYS+TO+NOW]".format(days_back))
if footprint:
search_terms.append("footprint:%22Intersects({})%22".format(
footprint.replace(" ", "+")))
if max_cloud_cover_percentage:
search_terms.append("cloudcoverpercentage:[0+TO+{}]".format(max_cloud_cover_percentage))
url = SITE["SEARCH"] + "+AND+".join(search_terms)
return url
def _unpack(zip_file, directory, remove_after=False):
with zipfile.ZipFile(zip_file) as zf:
# This assumes that the zipfile only contains the .SAFE directory at root level
safe_path = zf.namelist()[0]
zf.extractall(path=directory)
if remove_after:
os.remove(zip_file)
return os.path.normpath(os.path.join(directory, safe_path))
def search(username, password, producttype=None, platformname=None ,days_back=2, footprint=None, max_cloud_cover_percentage=None, start_date=None, end_date=None):
""" Search the Copernicus SciHub
Parameters
----------
username : str
user name for the Copernicus SciHub
password : str
password for the Copernicus SciHub
producttype : str, optional
product type to filter for in the query (see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
platformname : str, optional
plattform name to filter for in the query (see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
days_back : int, optional
number of days before today that will be searched. Default are the last 2 days. If start and end date are set the days_back parameter is ignored
footprint : str, optional
well-known-text representation of the footprint
max_cloud_cover_percentage: str, optional
percentage of cloud cover per scene. Can only be used in combination with Sentinel-2 imagery.
(see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
start_date: str, optional
start point of the search extent has to be used in combination with end_date
end_date: str, optional
end_point of the search extent has to be used in combination with start_date
Returns
-------
list
a list of scenes that match the search parameters
"""
import xml.etree.cElementTree as ET
scenes = []
search_url = _build_search_url(producttype, platformname, days_back, footprint, max_cloud_cover_percentage, start_date, end_date)
logger.info("Search URL: {}".format(search_url))
offset = 0
rowsBreak = 5000
name_space = {"atom": "http://www.w3.org/2005/Atom",
"opensearch": "http://a9.com/-/spec/opensearch/1.1/"}
while offset < rowsBreak: # Next pagination page:
response = requests.get(search_url.format(offset=offset), auth=(username, password))
root = ET.fromstring(response.content)
if offset == 0:
rowsBreak = int(
root.find("opensearch:totalResults", name_space).text)
for e in root.iterfind("atom:entry", name_space):
uuid = e.find("atom:id", name_space).text
title = e.find("atom:title", name_space).text
begin_position = e.find(
"atom:date[#name='beginposition']", name_space).text
end_position = e.find(
"atom:date[#name='endposition']", name_space).text
footprint = e.find("atom:str[#name='footprint']", name_space).text
scenes.append({
"id": uuid,
"title": title,
"begin_position": begin_position,
"end_position": end_position,
"footprint": footprint})
# Ultimate DHuS pagination page size limit (rows per page).
offset += 100
return scenes
def download(scene, directory, username, password, unpack=True):
""" Download a Sentinel scene based on its uuid
Parameters
----------
scene : dict
the scene to be downloaded
path : str
the path where the file will be downloaded to
username : str
username for the Copernicus SciHub
password : str
password for the Copernicus SciHub
unpack: boolean, optional
flag that defines whether the downloaded product should be unpacked after download. defaults to true
Raises
------
ValueError
if the size of the downloaded file does not match the Content-Length header
ValueError
if the checksum of the downloaded file does not match the checksum provided by the Copernicus SciHub
Returns
-------
str
path to the downloaded file
"""
import hashlib
md5hash = hashlib.md5()
md5sum = requests.get(SITE["CHECKSUM"].format(
uuid=scene["id"]), auth=(username, password)).text
download_path = os.path.join(directory, scene["title"] + ".zip")
# overwrite if path already exists
if os.path.exists(download_path):
os.remove(download_path)
url = SITE["SAFEZIP"].format(uuid=scene["id"])
rsp = requests.get(url, auth=(username, password), stream=True)
cl = rsp.headers.get("Content-Length")
size = int(cl) if cl else -1
# Actually fetch now:
with open(download_path, "wb") as f: # Do not read as a whole into memory:
written = 0
for block in rsp.iter_content(8192):
f.write(block)
written += len(block)
md5hash.update(block)
written = os.path.getsize(download_path)
if size > -1 and written != size:
raise ValueError("{}: size mismatch, {} bytes written but expected {} bytes to write!".format(
download_path, written, size))
elif md5sum:
calculated = md5hash.hexdigest()
expected = md5sum.lower()
POD events
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning BackOff 2m39s (x18636 over 2d19h) kubelet, minikube Back-off restarting failed container
the system which wants to use this service already has another main front end service running(which just runs the application ) on 8081 so maybe i need to expose this on the same port. How can i make the deployments running?
Our airflow implementation sends out http requests to get services to do tasks. We want those services to let airflow know when they complete their task, so we are sending a callback url to the service which they will call when their task is complete. I can't seem to find a callback sensor, however. How do people handle this normally?
There is no such thing as a callback or webhook sensor in Airflow. The sensor definition follows as taken from the documentation:
Sensors are a certain type of operator that will keep running until a certain criterion is met. Examples include a specific file landing in HDFS or S3, a partition appearing in Hive, or a specific time of the day. Sensors are derived from BaseSensorOperator and run a poke method at a specified poke_interval until it returns True.
This means that a sensor is an operator that performs polling behavior on external systems. In that sense, your external services should have a way of keeping state for each executed task - either internally or externally - so that a polling sensor can check on that state.
This way you can use for example the airflow.operators.HttpSensor that polls an HTTP endpoint until a condition is met. Or even better, write your own custom sensor that gives you the opportunity to do more complex processing and keep state.
Otherwise, if the service outputs data in a storage system you can use a sensor that polls a database for example. I believe you get the idea.
I'm attaching a custom operator example that I've written for integrating with the Apache Livy API. The sensor does two things: a) submits a Spark job through the REST API and b) waits for the job to be completed.
The operator extends the SimpleHttpOperator and at the same time implements the HttpSensor thus combining both functionalities.
class LivyBatchOperator(SimpleHttpOperator):
"""
Submits a new Spark batch job through
the Apache Livy REST API.
"""
template_fields = ('args',)
ui_color = '#f4a460'
#apply_defaults
def __init__(self,
name,
className,
file,
executorMemory='1g',
driverMemory='512m',
driverCores=1,
executorCores=1,
numExecutors=1,
args=[],
conf={},
timeout=120,
http_conn_id='apache_livy',
*arguments, **kwargs):
"""
If xcom_push is True, response of an HTTP request will also
be pushed to an XCom.
"""
super(LivyBatchOperator, self).__init__(
endpoint='batches', *arguments, **kwargs)
self.http_conn_id = http_conn_id
self.method = 'POST'
self.endpoint = 'batches'
self.name = name
self.className = className
self.file = file
self.executorMemory = executorMemory
self.driverMemory = driverMemory
self.driverCores = driverCores
self.executorCores = executorCores
self.numExecutors = numExecutors
self.args = args
self.conf = conf
self.timeout = timeout
self.poke_interval = 10
def execute(self, context):
"""
Executes the task
"""
payload = {
"name": self.name,
"className": self.className,
"executorMemory": self.executorMemory,
"driverMemory": self.driverMemory,
"driverCores": self.driverCores,
"executorCores": self.executorCores,
"numExecutors": self.numExecutors,
"file": self.file,
"args": self.args,
"conf": self.conf
}
print payload
headers = {
'X-Requested-By': 'airflow',
'Content-Type': 'application/json'
}
http = HttpHook(self.method, http_conn_id=self.http_conn_id)
self.log.info("Submitting batch through Apache Livy API")
response = http.run(self.endpoint,
json.dumps(payload),
headers,
self.extra_options)
# parse the JSON response
obj = json.loads(response.content)
# get the new batch Id
self.batch_id = obj['id']
log.info('Batch successfully submitted with Id %s', self.batch_id)
# start polling the batch status
started_at = datetime.utcnow()
while not self.poke(context):
if (datetime.utcnow() - started_at).total_seconds() > self.timeout:
raise AirflowSensorTimeout('Snap. Time is OUT.')
sleep(self.poke_interval)
self.log.info("Batch %s has finished", self.batch_id)
def poke(self, context):
'''
Function that the sensors defined while deriving this class should
override.
'''
http = HttpHook(method='GET', http_conn_id=self.http_conn_id)
self.log.info("Calling Apache Livy API to get batch status")
# call the API endpoint
endpoint = 'batches/' + str(self.batch_id)
response = http.run(endpoint)
# parse the JSON response
obj = json.loads(response.content)
# get the current state of the batch
state = obj['state']
# check the batch state
if (state == 'starting') or (state == 'running'):
# if state is 'starting' or 'running'
# signal a new polling cycle
self.log.info('Batch %s has not finished yet (%s)',
self.batch_id, state)
return False
elif state == 'success':
# if state is 'success' exit
return True
else:
# for all other states
# raise an exception and
# terminate the task
raise AirflowException(
'Batch ' + str(self.batch_id) + ' failed (' + state + ')')
Hope this will help you a bit.