Converting script to execute external program using multiple cores - subprocess

I'm a real beginner at python but I have the following script working successfully. It firstly creates a list of .xml files and then executes the program for each .xml.
Each .xml takes 2-4 minutes to complete and I need to run thousands, so I've been trying to speed up my script by using multiprocessing, but it appears beyond my skills.
Any suggestions on how to modify it would be greatly appreciated.
# import modules
import os, sys, shutil, subprocess, fnmatch
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"T:\erin\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
def runPhoenix(project_file):
print "Running Phoenix #: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
print "Phoenix Complete #: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT"))
# Create list of XMLs
project_files = []
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# run project files
for project_file in project_files:
runPhoenix(project_file)
print "completed"
EDIT 1: I have managed to re-write my code a bit more in the format I think multiprocessing needs...
# import modules
import os, sys, shutil, subprocess, fnmatch, time
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"C:\TheHillsPilot\Phoenix\GeneralRuns\ProjectXMLs\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
# Definition: print messages when using IDLE
def log(msg):
print msg
# Definition: Create list of XMLs
def createlist():
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# Definition: Run Phoenix
def runPhoenix(project_file):
log("Running Phoenix #: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file)
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
log("Phoenix Complete #: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")))
# Definition: main
def main():
log("creating list of XMLs")
createlist()
for project_file in project_files:
runPhoenix(project_file)
# Process: Create empty list
project_files = []
# Process: Run main
if __name__ == '__main__':
log("True")
time_start = time.clock()
main()
time_end = time.clock()
log("Time taken in main in seconds(s) is : {}".format(str(time_end - time_start)))
log("completed")

--- Part 1 Understanding the problem ---
Your question looks a bit complicated. Let me see if I understand your Python program correctly. Your program does two main things:
Look into a project folder, and find the xml files than match some criteria, and create a list of file names of the matched xml files.
Use the runPhoenix.exe function to process, or possibly convert each of the xml files in the old list to a new list of "phoenix" files.
I know very little of html and xml, and never heard of phoenix program for xml files.
But I think your problem in general trying to speed up a list of time consuming jobs by executing them in parallel.
Let me give a specific example of your general problem. You have, say, 1,000 text files in English, and you want to translate the English text files into Spanish. For now, you have only one translator doing the job sequentially and it takes a very long time.
So you would like to get say, 4 translators to do the jobs in parallel.
One possible solution is to use the Python multiprocessing package, which can create of a pool of say, 4 translator worker processes doing the jobs at the same time. This way, you can be up to 4 times faster.
If you think I understand your problem correctly, I can suggest a rough Python multiprocessing program example to do some simple text processing for your reference.
--- Part 2 Create / Print text files ---
# *** Python 3.6.5 Text file create/print functions - tlfong01 2018apr18hkt1521 ***
textFileNameLinelistDict = \
{
'0' : {'TextFileName': 'textFile1.txt', 'Linelist' :['line11\n', 'line12\n']},
'1' : {'TextFileName': 'textFile2.txt', 'Linelist' :['line21\n', 'line22\n']},
}
def createManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
lineList = textFileNameLinelistDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def createTwoTextFiles():
createManyTextFiles(textFileNameLinelistDict)
return
def printTwoTextFiles():
printManyTextFiles(textFileNameLinelistDict)
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
textFile1.txt
line11
line12
textFile2.txt
line21
line22
--- Part 3 Translating text files ---
Now I have written a 'translation' function which inputs a text file and output a new text file with all the lower case letters shifted to upper case.
# *** Python 3.6.5 Text file translate - tlfong01 2018apr18hk1940 ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11z\n', 'line12z\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21z\n', 'line22z\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def translateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def shiftUpperCase(string):
return string.upper()
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def translateTwoTextFiles():
translateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoInputTextFiles()
translateTwoTextFiles()
printTwoOutputTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
'''
inputTextFile1.txt
line11z
line12z
inputTextFile2.txt
line21z
line22z
outputTextFile1.txt
LINE11Z
LINE12Z
outputTextFile2.txt
LINE21Z
LINE22Z
'''
--- Part 4 Multiprocessing using Pool ---
I have written more tests for multiprocessing. So far so good.
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr18hk2153 ***
# *** Text file dictionary ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11x\n', 'line12x\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21y\n', 'line22y\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
# *** Create text file ***
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
# *** Print text files ***
def printOneTextFile(textFileName):
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
printOneTextFile(textFileName)
return
# *** Sequential translate text files ***
def shiftUpperCase(string):
return string.upper()
def translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction):
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def sequentialTranslateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction)
return
def shiftUpperCaseFileNameList(fileNameList):
translateOneTextFile(fileNameList[0], fileNameList[1], shiftUpperCase)
return
# *** Test functions ***
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def sequentialTranslateTwoTextFiles():
sequentialTranslateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
def sequentialShiftUpperCaseManyTextFiles():
inputTextFile1 = 'inputTextFile1.txt'
inputTextFile2 = 'inputTextFile2.txt'
outputTextFile1 = 'outputTetFile1.txt'
outputTextFile2 = 'outputTetFile2.txt'
fileNameFunctionList0 = [inputTextFile1, outputTextFile1, shiftUpperCase]
fileNameFunctionList1 = [inputTextFile2, outputTextFile2, shiftUpperCase]
shiftUpperCaseFileNameList(fileNameFunctionList0)
shiftUpperCaseFileNameList(fileNameFunctionList1)
printOneTextFile(inputTextFile1)
printOneTextFile(outputTextFile1)
printOneTextFile(inputTextFile2)
printOneTextFile(outputTextFile2)
return
def parallelShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
outputTextFileName1 = 'outputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName2 = 'outputTextFile3.txt'
# *** parallel translating the input files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
# *** print input out files ***
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def main():
# *** Sequential translation using text file dictionary ***
createTwoTextFiles()
printTwoInputTextFiles()
sequentialTranslateTwoTextFiles()
printTwoOutputTextFiles()
# *** Sequential shift upper case using text file name lists ***
sequentialShiftUpperCaseManyTextFiles()
# *** Parallel shift upper case using text file name lists ***
parallelShiftUpperCaseManyTextFiles()
return
if __name__ == '__main__':
main()
# *** Sample output ***
inputTextFile1.txt
line11x
line12x
inputTextFile2.txt
line21y
line22y
outputTextFile1.txt
LINE11X
LINE12X
outputTextFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTetFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTetFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTextFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile3.txt
LINE21Y
LINE22Y
--- Part 5 Multiprocessing using Apply Sync ---
Last time I used the "Pool" thing to do multiprocessing. This time I am using the "Apply Async" method. I have addded the ApplyAsync segment to the original function, to make it easier to compare and contrast.
enter '''
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr20hk1549 ***
def parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName1 = 'outputTextFile8.txt'
outputTextFileName2 = 'outputTextFile9.txt'
# *** Using pool to translate the text files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
print('\n*** Using pool to translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
# *** Using Apply Async translate the text files ***
pool = mp.Pool(2)
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName1, outputTextFileName1],))
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName2, outputTextFileName2],))
pool.close()
pool.join()
# *** print input out files ***
print('\n*** Using Apply Async translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def test0():
createTwoTextFiles()
#printTwoInputTextFiles()
#sequentialTranslateTwoTextFiles()
#printTwoOutputTextFiles()
#sequentialShiftUpperCaseManyTextFiles()
parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles()
return
def main():
test0()
return
if __name__ == '__main__':
main()
# *** End ***
# *** Sample output ***
'''
'''
>>>
RESTART: D:\work\rpi3b\programs\parallel_python\programs\mtp01_2018apr1905.py
*** Using pool to translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
*** Using Apply Async translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
>>>
'''
--- Part 6 ---
/To be continued, ...

Related

AttributeError: 'module' object has no attribute 'check_output'

import subprocess
def check_output(cmd):
""" https://docs.python.org/2/library/subprocess.html#subprocess.Popen
Implementation subprocess.check_output() for Python 2.6
"""
process_list = []
cmd_list = cmd.strip().split("|")
for i, sub_cmd in enumerate(cmd_list):
STDIN = None
if i > 0:
STDIN = process_list[i - 1].stdout
process_list.append(subprocess.check_output(sub_cmd, stdin=STDIN, stdout=subprocess.PIPE, shell=True))
if len(process_list) == 0:
return ''
output = process_list[i].communicate()[0]
return output
print(check_output('ls -la /var | grep log'))
I am facing issue of AttributeError: 'module' object has no attribute 'check_output' in Thonny python Every time run my program, I tried call() also but it is showing same error

py.test capture unhandled exception

We are using py.test 2.8.7 and I have the below method which creates a separate log file for every test-case. However this does not handle unhandled Exceptions. So if a code snippet throws an Exception instead of failing with an assert, the stack-trace of the Exception is not logged into the separate file. Can someone please help me in how I could capture these Exceptions?
def remove_special_chars(input):
"""
Replaces all special characters which ideally shout not be included in the name of a file
Such characters will be replaced with a dot so we know there was something useful there
"""
for special_ch in ["/", "\\", "<", ">", "|", "&", ":", "*", "?", "\"", "'"]:
input = input.replace(special_ch, ".")
return input
def assemble_test_fqn(node):
"""
Assembles a fully-qualified name for our test-case which will be used as its test log file name
"""
current_node = node
result = ""
while current_node is not None:
if current_node.name == "()":
current_node = current_node.parent
continue
if result != "":
result = "." + result
result = current_node.name + result
current_node = current_node.parent
return remove_special_chars(result)
# This fixture creates a logger per test-case
#pytest.yield_fixture(scope="function", autouse=True)
def set_log_file_per_method(request):
"""
Creates a separate file logging handler for each test method
"""
# Assembling the location of the log folder
test_log_dir = "%s/all_test_logs" % (request.config.getoption("--output-dir"))
# Creating the log folder if it does not exist
if not os.path.exists(test_log_dir):
os.makedirs(test_log_dir)
# Adding a file handler
test_log_file = "%s/%s.log" % (test_log_dir, assemble_test_fqn(request.node))
file_handler = logging.FileHandler(filename=test_log_file, mode="w")
file_handler.setLevel("INFO")
log_format = request.config.getoption("--log-format")
log_formatter = logging.Formatter(log_format)
file_handler.setFormatter(log_formatter)
logging.getLogger('').addHandler(file_handler)
yield
# After the test finished, we remove the file handler
file_handler.close()
logging.getLogger('').removeHandler(file_handler)
I have ended-up with a custom plugin:
import io
import os
import pytest
def remove_special_chars(text):
"""
Replaces all special characters which ideally shout not be included in the name of a file
Such characters will be replaced with a dot so we know there was something useful there
"""
for special_ch in ["/", "\\", "<", ">", "|", "&", ":", "*", "?", "\"", "'"]:
text = text.replace(special_ch, ".")
return text
def assemble_test_fqn(node):
"""
Assembles a fully-qualified name for our test-case which will be used as its test log file name
The result will also include the potential path of the log file as the parents are appended to the fqn with a /
"""
current_node = node
result = ""
while current_node is not None:
if current_node.name == "()":
current_node = current_node.parent
continue
if result != "":
result = "/" + result
result = remove_special_chars(current_node.name) + result
current_node = current_node.parent
return result
def as_unicode(text):
"""
Encodes a text into unicode
If it's already unicode, we do not touch it
"""
if isinstance(text, unicode):
return text
else:
return unicode(str(text))
class TestReport:
"""
Holds a test-report
"""
def __init__(self, fqn):
self._fqn = fqn
self._errors = []
self._sections = []
def add_error(self, error):
"""
Adds an error (either an Exception or an assertion error) to the list of errors
"""
self._errors.append(error)
def add_sections(self, sections):
"""
Adds captured sections to our internal list of sections
Since tests can have multiple phases (setup, call, teardown) this will be invoked for all phases
If for a newer phase we already captured a section, we override it in our already existing internal list
"""
interim = []
for current_section in self._sections:
section_to_add = current_section
# If the current section we already have is also present in the input parameter,
# we override our existing section with the one from the input as that's newer
for index, input_section in enumerate(sections):
if current_section[0] == input_section[0]:
section_to_add = input_section
sections.pop(index)
break
interim.append(section_to_add)
# Adding the new sections from the input parameter to our internal list
for input_section in sections:
interim.append(input_section)
# And finally overriding our internal list of sections
self._sections = interim
def save_to_file(self, log_folder):
"""
Saves the current report to a log file
"""
# Adding a file handler
test_log_file = "%s/%s.log" % (log_folder, self._fqn)
# Creating the log folder if it does not exist
if not os.path.exists(os.path.dirname(test_log_file)):
os.makedirs(os.path.dirname(test_log_file))
# Saving the report to the given log file
with io.open(test_log_file, 'w', encoding='UTF-8') as f:
for error in self._errors:
f.write(as_unicode(error))
f.write(u"\n\n")
for index, section in enumerate(self._sections):
f.write(as_unicode(section[0]))
f.write(u":\n")
f.write((u"=" * (len(section[0]) + 1)) + u"\n")
f.write(as_unicode(section[1]))
if index < len(self._sections) - 1:
f.write(u"\n")
class ReportGenerator:
"""
A py.test plugin which collects the test-reports and saves them to a separate file per test
"""
def __init__(self, output_dir):
self._reports = {}
self._output_dir = output_dir
#pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(self, item, call):
outcome = yield
# Generating the fully-qualified name of the underlying test
fqn = assemble_test_fqn(item)
# Getting the already existing report for the given test from our internal dict or creating a new one if it's not already present
# We need to do this as this method will be invoked for each phase (setup, call, teardown)
if fqn not in self._reports:
report = TestReport(fqn)
self._reports.update({fqn: report})
else:
report = self._reports[fqn]
result = outcome.result
# Appending the sections for the current phase to the test-report
report.add_sections(result.sections)
# If we have an error, we add that as well to the test-report
if hasattr(result, "longrepr") and result.longrepr is not None:
error = result.longrepr
error_text = ""
if isinstance(error, str) or isinstance(error, unicode):
error_text = as_unicode(error)
elif isinstance(error, tuple):
error_text = u"\n".join([as_unicode(e) for e in error])
elif hasattr(error, "reprcrash") and hasattr(error, "reprtraceback"):
if error.reprcrash is not None:
error_text += str(error.reprcrash)
if error.reprtraceback is not None:
if error_text != "":
error_text += "\n\n"
error_text += str(error.reprtraceback)
else:
error_text = as_unicode(error)
report.add_error(error_text)
# Finally saving the report
# We need to do this for all phases as we don't know if and when a test would fail
# This will essentially override the previous log file for a test if we are in a newer phase
report.save_to_file("%s/all_test_logs" % self._output_dir)
def pytest_configure(config):
config._report_generator = ReportGenerator("result")
config.pluginmanager.register(config._report_generator)

Container keeps on crashing while creating a deployment from a docker image in minikube

i have docker image containing python files which should download satellite imageries from scihub website. The docker image is working fine. Now when i want to create the deployment thorugh kubectl so that i can expose it as a service, its's container keeps on crashing. That's what the pod description says when seen through kubectl describe pod.
this is how i am trying to deploy sudo kubectl run back --image=back:latest --port=8080 --image-pull-policy Never. i also tried changing the port but it did not work. Here are the files within docker image.
Docker File
FROM python:3.7-stretch
COPY . /code
WORKDIR /code
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "ingestion.py"]
** ingestion **
import os
import shutil
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger("ingestion")
import requests
import datahub
scihub_username = os.environ["scihub_username"]
scihub_password = os.environ["scihub_password"]
result_url = "http://" + os.environ["CDINRW_BASE_URL"] + "/jobs/" + os.environ["CDINRW_JOB_ID"] + "/results"
logger.info("Searching the Copernicus Open Access Hub")
scenes = datahub.search(username=scihub_username,
password=scihub_password,
producttype=os.getenv("producttype"),
platformname=os.getenv("platformname"),
days_back=os.getenv("days_back", 2),
footprint=os.getenv("footprint"),
max_cloud_cover_percentage=os.getenv("max_cloud_cover_percentage"),
start_date = os.getenv("start_date"),
end_date = os.getenv("end_date"))
logger.info("Found {} relevant scenes".format(len(scenes)))
job_results = []
for scene in scenes:
# do not donwload a scene that has already been ingested
if os.path.exists(os.path.join("/out_data", scene["title"]+".SAFE")):
logger.info("The scene {} already exists in /out_data and will not be downloaded again.".format(scene["title"]))
filename = scene["title"]+".SAFE"
else:
logger.info("Starting the download of scene {}".format(scene["title"]))
filename = datahub.download(scene, "/tmp", scihub_username, scihub_password, unpack=True)
logger.info("The download was successful.")
shutil.move(filename, "/out_data")
result_message = {"description": "test",
"type": "Raster",
"format": "SAFE",
"filename": os.path.basename(filename)}
job_results.append(result_message)
res = requests.put(result_url, json=job_results, timeout=60)
res.raise_for_status()
*datahub
import logging
import os
import urllib.parse
import zipfile
import requests
# constructing URLs for querying the data hub
_BASE_URL = "https://scihub.copernicus.eu/dhus/"
SITE = {}
SITE["SEARCH"] = _BASE_URL + "search?format=xml&sortedby=beginposition&order=desc&rows=100&start={offset}&q="
_PRODUCT_URL = _BASE_URL + "odata/v1/Products('{uuid}')/"
SITE["CHECKSUM"] = _PRODUCT_URL + "Checksum/Value/$value"
SITE["SAFEZIP"] = _PRODUCT_URL + "$value"
logger = logging.getLogger(__name__)
def _build_search_url(producttype=None, platformname=None, days_back=2, footprint=None, max_cloud_cover_percentage=None, start_date=None, end_date=None):
search_terms = []
if producttype:
search_terms.append("producttype:{}".format(producttype))
if platformname:
search_terms.append("platformname:{}".format(platformname))
if start_date and end_date:
search_terms.append(
"beginPosition:[{}+TO+{}]".format(start_date, end_date))
elif days_back:
search_terms.append(
"beginPosition:[NOW-{}DAYS+TO+NOW]".format(days_back))
if footprint:
search_terms.append("footprint:%22Intersects({})%22".format(
footprint.replace(" ", "+")))
if max_cloud_cover_percentage:
search_terms.append("cloudcoverpercentage:[0+TO+{}]".format(max_cloud_cover_percentage))
url = SITE["SEARCH"] + "+AND+".join(search_terms)
return url
def _unpack(zip_file, directory, remove_after=False):
with zipfile.ZipFile(zip_file) as zf:
# This assumes that the zipfile only contains the .SAFE directory at root level
safe_path = zf.namelist()[0]
zf.extractall(path=directory)
if remove_after:
os.remove(zip_file)
return os.path.normpath(os.path.join(directory, safe_path))
def search(username, password, producttype=None, platformname=None ,days_back=2, footprint=None, max_cloud_cover_percentage=None, start_date=None, end_date=None):
""" Search the Copernicus SciHub
Parameters
----------
username : str
user name for the Copernicus SciHub
password : str
password for the Copernicus SciHub
producttype : str, optional
product type to filter for in the query (see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
platformname : str, optional
plattform name to filter for in the query (see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
days_back : int, optional
number of days before today that will be searched. Default are the last 2 days. If start and end date are set the days_back parameter is ignored
footprint : str, optional
well-known-text representation of the footprint
max_cloud_cover_percentage: str, optional
percentage of cloud cover per scene. Can only be used in combination with Sentinel-2 imagery.
(see https://scihub.copernicus.eu/userguide/FullTextSearch#Search_Keywords for allowed values)
start_date: str, optional
start point of the search extent has to be used in combination with end_date
end_date: str, optional
end_point of the search extent has to be used in combination with start_date
Returns
-------
list
a list of scenes that match the search parameters
"""
import xml.etree.cElementTree as ET
scenes = []
search_url = _build_search_url(producttype, platformname, days_back, footprint, max_cloud_cover_percentage, start_date, end_date)
logger.info("Search URL: {}".format(search_url))
offset = 0
rowsBreak = 5000
name_space = {"atom": "http://www.w3.org/2005/Atom",
"opensearch": "http://a9.com/-/spec/opensearch/1.1/"}
while offset < rowsBreak: # Next pagination page:
response = requests.get(search_url.format(offset=offset), auth=(username, password))
root = ET.fromstring(response.content)
if offset == 0:
rowsBreak = int(
root.find("opensearch:totalResults", name_space).text)
for e in root.iterfind("atom:entry", name_space):
uuid = e.find("atom:id", name_space).text
title = e.find("atom:title", name_space).text
begin_position = e.find(
"atom:date[#name='beginposition']", name_space).text
end_position = e.find(
"atom:date[#name='endposition']", name_space).text
footprint = e.find("atom:str[#name='footprint']", name_space).text
scenes.append({
"id": uuid,
"title": title,
"begin_position": begin_position,
"end_position": end_position,
"footprint": footprint})
# Ultimate DHuS pagination page size limit (rows per page).
offset += 100
return scenes
def download(scene, directory, username, password, unpack=True):
""" Download a Sentinel scene based on its uuid
Parameters
----------
scene : dict
the scene to be downloaded
path : str
the path where the file will be downloaded to
username : str
username for the Copernicus SciHub
password : str
password for the Copernicus SciHub
unpack: boolean, optional
flag that defines whether the downloaded product should be unpacked after download. defaults to true
Raises
------
ValueError
if the size of the downloaded file does not match the Content-Length header
ValueError
if the checksum of the downloaded file does not match the checksum provided by the Copernicus SciHub
Returns
-------
str
path to the downloaded file
"""
import hashlib
md5hash = hashlib.md5()
md5sum = requests.get(SITE["CHECKSUM"].format(
uuid=scene["id"]), auth=(username, password)).text
download_path = os.path.join(directory, scene["title"] + ".zip")
# overwrite if path already exists
if os.path.exists(download_path):
os.remove(download_path)
url = SITE["SAFEZIP"].format(uuid=scene["id"])
rsp = requests.get(url, auth=(username, password), stream=True)
cl = rsp.headers.get("Content-Length")
size = int(cl) if cl else -1
# Actually fetch now:
with open(download_path, "wb") as f: # Do not read as a whole into memory:
written = 0
for block in rsp.iter_content(8192):
f.write(block)
written += len(block)
md5hash.update(block)
written = os.path.getsize(download_path)
if size > -1 and written != size:
raise ValueError("{}: size mismatch, {} bytes written but expected {} bytes to write!".format(
download_path, written, size))
elif md5sum:
calculated = md5hash.hexdigest()
expected = md5sum.lower()
POD events
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Warning BackOff 2m39s (x18636 over 2d19h) kubelet, minikube Back-off restarting failed container
the system which wants to use this service already has another main front end service running(which just runs the application ) on 8081 so maybe i need to expose this on the same port. How can i make the deployments running?

Is there a way to convert juniper "json" or "xml" config to "set" or "show" config?

We use juniper hardware with junos version 15. In this version we can export our config as "json" or "xml" which we want to use to edit it with our automation tooling.
Importing however is only possible in "set" or "show" format.
Is there a tool to convert "json" or "xml" format to "set" or "show" format?
I can only find converters between "show" and "set".
We can't upgrade to version 16 where the import of "json" would be possible.
Here's a script I made at work, throw it in your bin and you can it via providing a filename or piping output. This assumes linux or mac so the os.isatty function works, but the logic can work anywhere:
usage demo:
person#laptop ~ > head router.cfg
## Last commit: 2021-04-20 21:21:39 UTC by vit
version 15.1X12.2;
groups {
BACKBONE-PORT {
interfaces {
<*> {
mtu 9216;
unit <*> {
family inet {
mtu 9150;
person#laptop ~ > convert.py router.cfg | head
set groups BACKBONE-PORT interfaces <*> mtu 9216
set groups BACKBONE-PORT interfaces <*> unit <*> family inet mtu 9150
set groups BACKBONE-PORT interfaces <*> unit <*> family inet6 mtu 9150
set groups BACKBONE-PORT interfaces <*> unit <*> family mpls maximum-labels 5
<... output removed... >
convert.py:
#!/usr/bin/env python3
# Class that attempts to parse out Juniper JSON into set format
# I think it works? still testing
#
# TODO:
# accumulate annotations and provide them as commands at the end. Will be weird as annotations have to be done after an edit command
from argparse import ArgumentParser, RawTextHelpFormatter
import sys, os, re
class TokenStack():
def __init__(self):
self._tokens = []
def push(self, token):
self._tokens.append(token)
def pop(self):
if not self._tokens:
return None
item = self._tokens[-1]
self._tokens = self._tokens[:-1]
return item
def peek(self):
if not self._tokens:
return None
return self._tokens[-1]
def __str__(self):
return " ".join(self._tokens)
def __repr__(self):
return " ".join(self._tokens)
def main():
# get file
a = ArgumentParser(prog="convert_jpr_json",
description="This program takes in Juniper style JSON (blah { format) and prints it in a copy pastable display set format",
epilog=f"Either supply with a filename or pipe config contents into this program and it'll print out the display set view.\nEx:\n{B}convert_jpr_json <FILENAME>\ncat <FILENAME> | convert_jpr_json{WHITE}",
formatter_class=RawTextHelpFormatter)
a.add_argument('file', help="juniper config in JSON format", nargs="?")
args = a.parse_args()
if not args.file and os.isatty(0):
a.print_help()
die("Please supply filename or provide piped input")
file_contents = None
if args.file:
try:
file_contents = open(args.file, "r").readlines()
except IOError as e:
die(f"Issue opening file {args.file}: {e}")
print(output_text)
else:
file_contents = sys.stdin.readlines()
tokens = TokenStack()
in_comment = False
new_config = []
for line_num, line in enumerate(file_contents):
if line.startswith("version ") or len(line) == 0:
continue
token = re.sub(r"^(.+?)#+[^\"]*$", r"\1", line.strip())
token = token.strip()
if (any(token.startswith(_) for _ in ["!", "#"])):
# annotations currently not supported
continue
if token.startswith("/*"):
# we're in a comment now until the next token (this will break if a multiline comment with # style { happens, but hopefully no-one is that dumb
in_comment = True
continue
if "inactive: " in token:
token = token.split("inactive: ")[1]
new_config.append(f"deactivate {tokens} {token}")
if token[-1] == "{":
in_comment = False
tokens.push(token.strip("{ "))
elif token[-1] == "}":
if not tokens.pop():
die("Invalid json supplied: unmatched closing } encountered on line " + f"{line_num}")
elif token[-1] == ";":
new_config.append(f"set {tokens} {token[:-1]}")
if tokens.peek():
print(tokens)
die("Unbalanced JSON: expected closing }, but encountered EOF")
print("\n".join(new_config))
def die(msg): print(f"\n{B}{RED}FATAL ERROR{WHITE}: {msg}"); exit(1)
RED = "\033[31m"; GREEN = "\033[32m"; YELLOW = "\033[33m"; B = "\033[1m"; WHITE = "\033[0m"
if __name__ == "__main__": main()
You can load XML configuration using edit-config RPC or load-configuration RPC. For more details:
https://www.juniper.net/documentation/en_US/junos/topics/reference/tag-summary/netconf-edit-config.html
https://www.juniper.net/documentation/en_US/junos/topics/reference/tag-summary/junos-xml-protocol-load-configuration.html
XML content can be loaded via an "op" script by placing the content inside a call to junos:load-configuration() template defined in "junos.xsl". Something like the following:
version 1.1;
ns jcs = "http://xml.juniper.net/junos/commit-scripts/1.0";
import "../import/junos.xsl";
var $arguments = {
<argument> {
<name> "file";
<description> "Filename of XML content to load";
}
<argument> {
<name> "action";
<description> "Mode for the load (override, replace, merge)";
}
}
param $file;
param $action = "replace";
match / {
<op-script-results> {
var $configuration = slax:document($file);
var $connection = jcs:open();
call jcs:load-configuration($connection, $configuration, $action);
}
}
Thanks,
Phil

Pycuda test_driver.py raises Attribute Error

I'm trying to install pycuda on Linux Mint with a GeForce 960M and Cuda 8.0 installed. When I run the test_driver.py script it outputs the following error:
============================= test session starts ==============================
platform linux2 -- Python 2.7.12, pytest-3.0.3, py-1.4.31, pluggy-0.4.0
rootdir: /home/milton/Downloads/pycuda-2016.1.2, inifile:
collected 28 items
test_driver.py ...................x.....F..
=================================== FAILURES ===================================
________________________ TestDriver.test_multi_context _________________________
args = (,), kwargs = {}
pycuda = <module 'pycuda' from '/home/milton/miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/init.pyc'>
ctx = <pycuda._driver.Context object at 0x7f540e39d758>
clear_context_caches = <function clear_context_caches at 0x7f540ee26758>
collect =<built-in function collect>
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
inner_f(*args, **kwargs)
../../../miniconda2/lib/python2.7/site-packages/pycuda-2016.1.2-py2.7-linux-x86_64.egg/pycuda/tools.py:460:
self = <test_driver.TestDriver instance at 0x7f540c21fc20>
#mark_cuda_test
def test_multi_context(self):
if drv.get_version() < (2,0,0):
return
if drv.get_version() >= (2,2,0):
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
E AttributeError: type object 'compute_mode' has no attribute 'EXCLUSIVE'
test_driver.py:638: AttributeError
================ 1 failed, 26 passed, 1 xfailed in 6.92 seconds ================
python driver compute mode only supports following modes:
DEFAULT,
PROHIBITED,
EXCLUSIVE_PROCESS
so please change this:
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE:
to
if drv.Context.get_device().compute_mode == drv.compute_mode.EXCLUSIVE_PROCESS:
in your test_driver.py file