I have a class that I want to share between different files or computers? I can pickle and load it from the same jupyter notebook. However, I cannot load it from a different machine/or different notebook. I have tried the following,
Initialize and and save from a jupyter notebook
# Simplified class definition
class MyClass:
def __init__(self, name):
self.name = name
self.dataToIndex = {}
self.index = 0
def addData(self, dataReceived):
self.dataToIndex[dataReceived] = self.index
self.index += 1
# initialize
my_dataset = MyClass("My_test_dataset")
# add some data
my_dataset.addData("One")
my_dataset.addData("Two")
# check data
my_dataset.dataToIndex
# save from one juputer notebook
import pickle
with open("path.obj", "wb") as inp:
pickle.dump(my_dataset, inp, pickle.HIGHEST_PROTOCOL)
# Read from the same jupyter notebook
with open("path.obj", 'rb') as inp:
transferred = pickle.load(inp)
# Output looks good
transferred.dataToIndex
{'One': 0, 'Two': 1}
Read from the new jupyter notebook
import pickle
with open("path.obj", 'rb') as inp:
transferred = pickle.load(inp)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-1-44a1a18ebb1b> in <module>
2 # Read from the same jupyter notebook
3 with open("path.obj", 'rb') as inp:
----> 4 transferred = pickle.load(inp)
AttributeError: Can't get attribute 'MyClass' on <module '__main__'>
Now, I want to be able to load into a different python script on in a different jupyter notebook.
I have checked this, Saving an Object (Data persistence)
and
https://www.stefaanlippens.net/python-pickling-and-dealing-with-attributeerror-module-object-has-no-attribute-thing.html
But could not figure out a solution. Any help is appreciated.
There is a tons of helpful posts on this. However, most of those fall under, how to save from within the class etc. For a future beginner like me, I just want to provide a simple solution that worked for me.
Take the class out of the notebook and put in a script like, my_class_def.py
class MyClass:
def __init__(self, name):
self.name = name
self.dataToIndex = {}
self.index = 0
def addData(self, dataReceived):
self.dataToIndex[dataReceived] = self.index
self.index += 1
Then use the pickle to save as it is.
In the different(new) script, import the class first using,
from my_class_def import MyClass
and then load the picked file.
Related
Trying to receive input from WCT_Control into WCT_DataPull
Cant figure out how to get the data into WCT_DataPull to perform an action with it. I think I am going about this backwards, but I also think I have been staring at it too long.
Essentially, the user enters the information necessary into a GUI to connect to a specific SQL table (predetermined) and then saves the data in the table and outputs it as a csv file backup.
I want the user to click the submit button and that creates the backup. However, at this point when I click the button, it will store all the data in the variables (If I put a print statement in I see the correct values), but I cant seem to figure out how to get the variables to WCT_DataPull, where the backup creation action is performed.
WCT_Control
from PyQt5.QtWidgets import *
from WCT_View import Ui_MainWindow
class Controller(QMainWindow, Ui_MainWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setupUi(self)
self.run.clicked.connect(lambda : self.submit())
def submit(self):
self.run.clicked.connect()
server = self.server_entry.text()
database = self.data_entry.text()
station = self.station_entry.text()
app = self.app_entry.text()
backup_name = self.filename_entry.text()
self.server_entry.setText('')
self.data_entry.setText('')
self.station_entry.setText('')
self.app_entry.setText('')
self.filename_entry.setText('')
return server, database, station, app, backup_name
WCT_DataPull
from WCT_Control import *
import pyodbc
import csv
pull_data = Controller()
def write_bak():
driver = 'ODBC Driver 17 for SQL Server'
serv, data, stat, app, bak_name = pull_data.submit()
conn = pyodbc.connect('DRIVER={0};SERVER={1};DATABASE={2};Trusted_Connection=yes'.format(driver, serv, data))
cursor = conn.cursor()
rows = cursor.execute("""
select DnsName, PackageName, Code, Value from WorkstationApplicationSettings
where DnsName=? and PackageName=?
""", stat, app).fetchall()
for row in rows:
print(row.PackageName,':', row.Code, ':', row.Value)
with open(bak_name, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows)
So you just have to do the things in opposite way, instead of using PYQT5 in WCT script, use the WCT function in PYQT5 script
WCT_Control
from PyQt5.QtWidgets import *
from WCT_DataPull import write_bak
class Controller(QMainWindow, Ui_MainWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setupUi(self)
self.run.clicked.connect(lambda : self.submit())
def submit(self):
self.run.clicked.connect()
server = self.server_entry.text()
database = self.data_entry.text()
station = self.station_entry.text()
app = self.app_entry.text()
backup_name = self.filename_entry.text()
self.server_entry.setText('')
self.data_entry.setText('')
self.station_entry.setText('')
self.app_entry.setText('')
self.filename_entry.setText('')
write_bak(serv, data, stat, app, bak_name)
WCT_DataPull
import pyodbc
import csv
def write_bak(serv, data, stat, app, bak_name):
driver = 'ODBC Driver 17 for SQL Server'
conn = pyodbc.connect('DRIVER={0};SERVER={1};DATABASE={2};Trusted_Connection=yes'.format(driver, serv, data))
cursor = conn.cursor()
rows = cursor.execute("""
select DnsName, PackageName, Code, Value from WorkstationApplicationSettings
where DnsName=? and PackageName=?
""", stat, app).fetchall()
for row in rows:
print(row.PackageName,':', row.Code, ':', row.Value)
with open(bak_name, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows)
Also Make sure to write the code to run the WCT_control by using Qapplication
enter code here
from locust import HttpLocust, TaskSet, task
class ExampleTask(TaskSet):
csvfile = open('failed.csv', 'r')
data = csvfile.readlines()
bakdata = list(data)
#task
def fun(self):
try:
value = self.data.pop().split(',')
print('------This is the value {}'.format(value[0]))
except IndexError:
self.data = list(self.bakdata)
class ExampleUser(HttpLocust):
host = 'https://www.google.com'
task_set = ExampleTask
Following my csv file:
516,True,success
517,True,success
518,True,success
519,True,success
520,True,success
521,True,success
522,True,success
523,True,success
524,True,success
525,True,success
526,True,success
527,True,success
528,True,success
529,True,success
530,True,success
531,True,success
532,True,success
533,True,success
534,True,success
535,True,success
536,True,success
537,True,success
538,True,success
539,True,success
540,True,success
541,True,success
542,True,success
543,True,success
544,True,success
545,True,success
546,True,success
547,True,success
548,True,success
549,True,success
550,True,success
551,True,success
552,True,success
553,True,success
554,True,success
555,True,success
556,True,success
557,True,success
558,True,success
559,True,success
Here after csv file end , locust does not takes unique value, it takes same value for all the users which is simulated.
I'm not 100% sure, but I think your problem is this line:
self.data = list(self.bakdata)
This will give each User instance a different copy of the list.
It should work if you change it to:
ExampleTask.data = list(self.bakdata)
Or you can use locust-plugins's CSVReader, see the example here:
https://github.com/SvenskaSpel/locust-plugins/blob/master/examples/csvreader_ex.py
It is puzzling to me that there is a tfdv.load_statistics() function, but no corresponding tfdv.write_statistics() function. How do I go about saving the statistics, and then loading them again?
e.g.
import tensorflow_data_validation as tfdv
stats = tfdv.generate_statistics_from_dataframe(df)
# how do I save?
# load back for later use
saved_stats = tfdv.load_statistics('saved_stats.stats')
I can save the string representation to a file, but this is not the format that load_statistics expects.
with open('saved_stats.stats', 'w') as o:
o.write(str(stats))
Pointers anyone?
have you tried this : tfdv.utils.stats_util.write_stats_text ?
In the current tfdv version 1.3.0 there are the following methods that can be used:
load_stats_text
write_stats_text
Example:
import tensorflow_data_validation as tfdv
stats = tfdv.generate_statistics_from_dataframe(df)
stats_path = "my-stats-file.stats"
# saving
tfdv.write_stats_text(stats, stats_path)
# loading
stats = tfdv.load_stats_text(stats_path)
Okay figure out this hacky way to do it.
df = ... # create pandas df
from tensorflow_metadata.proto.v0 import statistics_pb2
import tensorflow_data_validation as tfdv
stats = tfdv.generate_statistics_from_dataframe(df)
# save it
with open('saved_stats.stats', 'wb') as o:
o.write(stats.SerializeToString())
# load back for later use
with open('saved_stats.stats', 'rb') as i:
loaded_stats = statistics_pb2.FromString(i.read())
There's a function called tfdv.load_stats_binary that you can use to solve this problem.
I try following AI Platform tutorial to upload a model and a prediction routine but one part fail and I don't understand why.
My prediction class is the same as in their tutorial:
%%writefile predictor.py
import os
import pickle
import numpy as np
from sklearn.datasets import load_iris
from sklearn.externals import joblib
class MyPredictor(object):
def __init__(self, model, preprocessor):
self._model = model
self._preprocessor = preprocessor
self._class_names = load_iris().target_names
def predict(self, instances, **kwargs):
inputs = np.asarray(instances)
preprocessed_inputs = self._preprocessor.preprocess(inputs)
if kwargs.get('probabilities'):
probabilities = self._model.predict_proba(preprocessed_inputs)
return probabilities.tolist()
else:
outputs = self._model.predict(preprocessed_inputs)
return [self._class_names[class_num] for class_num in outputs]
#classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.joblib')
model = joblib.load(model_path)
preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl')
with open(preprocessor_path, 'rb') as f:
preprocessor = pickle.load(f)
return cls(model, preprocessor)
the code I use to create my model in cloud is:
! gcloud beta ai-platform versions create {VERSION_NAME} \
--model {MODEL_NAME} \
--runtime-version 1.13 \
--python-version 3.5 \
--origin gs://{BUCKET_NAME}/custom_prediction_routine_tutorial/model/ \
--package-uris gs://{BUCKET_NAME}/custom_prediction_routine_tutorial/my_custom_code-0.1.tar.gz \
--prediction-class predictor.MyPredictor
But I end up with such an odd error:
ERROR: (gcloud.beta.ai-platform.versions.create) Bad model detected with error: "Failed to load model: Unexpected error when loading the model: 'ascii' codec can't decode byte 0xf9 in position 2: ordinal not in range(128) (Error code: 0)"
The thing is that when I run the same command without the:
--prediction-class predictor.MyPredictor
it work fine.
Does someone know the reason of this ? I think model.joblib might have an encoding problem but when I load it myself there is nothing wrong
I've find the solution,
In the tutorial they use pickle to save the preprocessor object created, and Joblib to save the model.
You need to use Joblib to save both and then send it to google storage.
I'm kind of a newbie to Python, and I'm writing some code to take data via a user input and put it into a .csv file. To do that, the program needs to pass data from class to class.
To teach myself how to pass data, I took code from here. I did have to alter the code a bit to get it to start up, making sure that the make_widget and print_it functions can pull the "name" variable stored in self.app_data data structure properly.
from tkinter import *
from tkinter import ttk
class MyApp(Tk):
def __init__(self):
Tk.__init__(self)
self.app_data={'name': StringVar}
container = ttk.Frame(self)
container.pack(side="top", fill="both", expand = True)
self.frames = {}
for F in (PageOne, PageTwo):
frame = F(container, self)
self.frames[F] = frame
frame.grid(row=0, column=0, sticky = NSEW)
self.show_frame(PageOne)
def show_frame(self, cont):
frame = self.frames[cont]
frame.tkraise()
class PageOne(ttk.Frame):
def __init__(self, parent, controller):
ttk.Frame.__init__(self, parent)
self.controller=controller
ttk.Label(self, text='PageOne').grid(padx=(20,20), pady=(20,20))
self.make_widget(controller)
def make_widget(self, controller):
self.controller=controller
self.some_entry = ttk.Entry(self, textvariable=self.controller.app_data['name'], width=8)
self.some_entry.grid()
button1 = ttk.Button(self, text='Next Page',command=lambda: controller.show_frame(PageTwo))
button1.grid()
class PageTwo(ttk.Frame):
def __init__(self, parent, controller):
ttk.Frame.__init__(self, parent)
self.controller=controller
ttk.Label(self, text='PageTwo').grid(padx=(20,20), pady=(20,20))
button1 = ttk.Button(self, text='Previous Page',command=lambda: controller.show_frame(PageOne))
button1.grid()
button2 = ttk.Button(self, text='press to print', command= self.print_it())
button2.grid()
def print_it(self):
value=self.controller.app_data['name'].get()
print ('The value stored in StartPage some_entry = ', value)#What do I put here
#to print the value of some_input from PageOne
When I run this program, it does start up, and I can move from frame to frame, but it does not print the "name" variable.
When I close the window, I get the error:
TypeError: get() missing 1 required positional argument: 'self'
Which the traceback blames on the line:
value=self.controller.app_data['name'].get()
What am I doing wrong? For what it's worth, I'm writing the code in Python 3.5.
I really appreciate any help that you guys could give me.