script runs in jupyter notebook but not vscode - visual-studio-code

When I run this code in Jupyter notebook it runs without any error but when I want to run it in VSCODE I get an error , It works by url and a token ,then I got the Json file and exported it to csv file to open in Excel :
This is the code ;
import json
import requests
import pandas
url = "url"
headers = {"Cookie":"token"}
response = requests.get(url, headers=headers)
data = response.json()
print(data)
a=data['tickers'][0]['items'][0]['days'][0]['items']
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = []
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '.' + key))
elif isinstance(data, list):
rows = []
for i in range(len(data)):
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
if __name__ == '__main__':
json_data = a
df = json_to_dataframe(json_data)
print(df)
df.to_excel("output.xlsx")
This is the Error ;
ModuleNotFoundError Traceback (most recent call last)
---> 59 df.to_excel("output.xlsx")
-> 2026 formatter.write(
--> 730 writer = ExcelWriter(stringify_path(writer), engine=engine)
---> 18 from openpyxl.workbook import Workbook
ModuleNotFoundError: No module named 'openpyxl'
What am I missing ?

Run this command from cmd window in VSCODE.
pip install openpyxl

Related

Get list of all notebooks in my databricks workspace

How do I get a list of all notebooks in my workspace & store their names along with full path in csv file, I have tried using Databricks CLI option but that doesn't seem to have recursive operation.
databricks workspace list
As we can see in code there is no recursive option:
https://github.com/databricks/databricks-cli/blob/master/databricks_cli/workspace/cli.py (def ls_cli)
Example solution is to import cli in python and extend it:
from databricks_cli.sdk import ApiClient
from databricks_cli.sdk import service
host = "your_host"
token = "your_token"
client = ApiClient(host=host, token=token)
objects = []
workspace = service.WorkspaceService(client)
def list_workspace_objects(path):
elements = workspace.list(path).get('objects')
if elements is not None:
for object in elements:
objects.append(object)
if(object['object_type'] == 'DIRECTORY'):
list_workspace_objects(object['path'])
list_workspace_objects("/")
print(objects)
You can use below code directly . Note : Tested Code
from pyspark.sql.types import IntegerType
from pyspark.sql.types import *
from pyspark.sql import Row
import base64
import requests
import json
databricks_instance ="databricks Instance"
url_list = f"{databricks_instance}/api/2.0/workspace/list"
url_export = f"{databricks_instance}/api/2.0/workspace/export"
payload = json.dumps({
"path": "/"
})
headers = {
'Authorization': 'Bearer token',
'Content-Type': 'application/json'
}
response = requests.request("GET", url_list, headers=headers, data=payload).json()
notebooks = []
# Getting the all notebooks list for given notebooks.
def list_notebooks(mylist):
for element in mylist['objects']:
if element['object_type'] == 'NOTEBOOK':
notebooks.append(element)
if element['object_type'] == 'DIRECTORY':
payload_inner = json.dumps({
"path": element['path']
})
response_inner = requests.request("GET", url_list, headers=headers, data=payload_inner).json()
if len(response_inner) != 0:
list_notebooks(response_inner)
return notebooks
result = list_notebooks(response)
print(result[0])

Parameterize the find method in python using mongo

Files to upload will be like WFSIV0101202001.318.tar.gz,WFSIV0101202001.2624.tar.gz etc.
INPUT_FILE_PATH = 'C:\Files to upload'
try:
import os
from google.cloud import storage
import sys
import pymongo
import pymongo.errors
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
except:
print("missing modules")
try:
mongo_client = MongoClient(host="xyz.com", port=27017)
Db = mongo_client['abcd']
coll = Db['shopper_journey_sitedata']
except ConnectionFailure:
print("Connection failed")
date=[]
# Thirdpartyid=[]
input_files = os.listdir(INPUT_FILE_PATH)
# looping through input files
for input_file in input_files:
x = input_file.split(".")
date.append(x[0][5:13])
tp_site_id = x[1]
# print(tp_site_id)
cur = coll.find({"third_party_site_id":tp_site_id})
for doc in cur:
print(doc)
Now i want to parameterize the find() method for every id, so that on each iteration i should get st_site_id ?
above code i tried but ist giving error as "Datas:name error"
You can do one thing
coll.find({"third_party_site_id": { $in :
[318,2624,2621,2622,102,078]}})
If Tid is an array, then you could replace 318 in your query to Tid[I]

Jupyter Importing Ipynb files Error: no module named 'mynotebook'

I need to import different ipynb files, so I tried this:
https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Importing%20Notebooks.html
But I get no module named 'mynotebook' found. (I even tried it with other notebooks names, which definitely exist, but still not working)
Do you have any ideas about what I could do?
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell
def find_notebook(fullname, path=None):
name = fullname.rsplit('.', 1)[-1]
if not path:
path = ['']
for d in path:
nb_path = os.path.join(d, name + ".ipynb")
if os.path.isfile(nb_path):
return nb_path
# let import Notebook_Name find "Notebook Name.ipynb"
nb_path = nb_path.replace("_", " ")
if os.path.isfile(nb_path):
return nb_path
class NotebookLoader(object):
def __init__(self, path=None):
self.shell = InteractiveShell.instance()
self.path = path
def load_module(self, fullname):
"""import a notebook as a module"""
path = find_notebook(fullname, self.path)
print ("importing Jupyter notebook from %s" % path)
# load the notebook object
with io.open(path, 'r', encoding='utf-8') as f:
nb = read(f, 4)
# create the module and add it to sys.modules
# if name in sys.modules:
# return sys.modules[name]
mod = types.ModuleType(fullname)
mod.__file__ = path
mod.__loader__ = self
mod.__dict__['get_ipython'] = get_ipython
sys.modules[fullname] = mod
# extra work to ensure that magics that would affect the user_ns
# actually affect the notebook module's ns
save_user_ns = self.shell.user_ns
self.shell.user_ns = mod.__dict__
try:
for cell in nb.cells:
if cell.cell_type == 'code':
# transform the input to executable Python
code = self.shell.input_transformer_manager.transform_cell(cell.source)
# run the code in themodule
exec(code, mod.__dict__)
finally:
self.shell.user_ns = save_user_ns
return mod
class NotebookFinder(object):
def __init__(self):
self.loaders = {}
def find_module(self, fullname, path=None):
nb_path = find_notebook(fullname, path)
if not nb_path:
return
key = path
if path:
# lists aren't hashable
key = os.path.sep.join(path)
if key not in self.loaders:
self.loaders[key] = NotebookLoader(path)
return self.loaders[key]
sys.meta_path.append(NotebookFinder())
import mynotebook
I just want to import the code of another jupyter file
WOW, i also face this problem. I create a new env and after open jupyter, it can't find nbformat in my new installed env, so just:
pip install nbformat

McAfee Update download script

I'm setting PC with McAfee install on them and be told that I need to stop the program going on line to download update (DAT). I need to create a script to download dat file from McAfee web site and put this file on server where McAfee can access and install this.
Has anyone done this in past.
I actually have done this. I haven't tested this script in a year or two but here is what I was using. This isn't written in Powershell but if you change the directories I think this can run on Windows.
#!/usr/bin/python
import ftplib
import tarfile
import shutil
import os
import re
import time
scannerDir = "/usr/local/uvscan/"
tmp = "/tmp/avscanner/"
def downloadDat():
datfile = ""
r = re.compile("^avvdat")
ftp = ftplib.FTP("ftp.nai.com", "anonymous", "email#yourdomain.com")
ftp.cwd("/pub/datfiles/english")
list = ftp.nlst()
for x in list:
if r.search(x):
datFile = x
f = open(tmp + "datfile", 'wb')
ftp.retrbinary("RETR " + datFile, f.write)
f.close()
ftp.quit()
def unpackDat():
tFile = tarfile.open(tmp + "datfile", 'r')
for f in tFile.getnames():
tFile.extract(f, tmp)
def createDirs():
if os.path.isdir(tmp) == False:
os.mkdir(tmp, 0700)
os.chown(tmp, 0, 95)
os.chmod(tmp, 0755)
def doCleanup():
shutil.rmtree(tmp)
def installFiles():
shutil.copyfile(tmp + "/avvclean.dat", scannerDir + "/avvclean.dat")
shutil.copyfile(tmp + "/avvnames.dat", scannerDir + "/avvnames.dat")
shutil.copyfile(tmp + "/avvscan.dat", scannerDir + "/avvscan.dat")
def isOld():
if os.path.isfile(scannerDir + "/avvclean.dat"):
if time.time() - os.path.getctime(scannerDir + "/avvclean.dat") < 80000:
return True
else:
return False
else:
return True
def main():
if isOld():
createDirs()
downloadDat()
unpackDat()
installFiles()
doCleanup()
if __name__ == "__main__":
main()

web automation - auto check link

I'm new to web app and I want to check when there's a new version of dota map, I'll check links in getdota.com.
How can I do this and which language, I want it checks every time you start warcraft, and auto download new map to specific folder.
My question is : Can you give a link to a specific article about web automation or something like that.
Thanks first :)
Below is an example in Python.
It parses getdota.com page, reads parameters for POST request for downloading a map, gets the file and saves it in configured directory (by default current directory).
#!/usr/bin/env python
import urllib
import urllib2
import sgmllib
from pprint import pprint
import os.path
import sys
url = 'http://www.getdota.com/'
download_url = 'http://www.getdota.com/app/getmap/'
chunk = 10000
directory = '' #directory where file should be saved, if empty uses current dir
class DotaParser(sgmllib.SGMLParser):
def parse(self, s):
self.feed(s)
self.close()
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.URL = ''
self.post_args = {}
def getArgs(self):
return self.post_args
def start_input(self, attributes):
d = dict(attributes)
if d.get('id', None) == None:
return
if d['id'] in ["input_mirror2", "input_file_name2", "input_map_id2", "input_language2", "input_language_id2"]:
self.post_args[d['name']] = d['value']
if __name__ == '__main__':
dotap = DotaParser()
data = urllib2.urlopen(urllib2.Request('http://www.getdota.com/')).read()
dotap.parse(data)
data = urllib.urlencode(dotap.getArgs())
request = urllib2.Request(download_url, data)
response = urllib2.urlopen(request)
page = response.read()
#download file
fname = directory + page.split('/')[-1]
if os.path.isfile(fname):
print "No newer file available"
sys.exit(0)
f = open(fname, 'w')
print "New file available. Saving in: %s" % fname
webFile = urllib.urlopen(page)
c = webFile.read(chunk)
while(c):
f.write(c)
c = webFile.read(chunk)
f.close()
webFile.close()