AttributeError: 'Index' object has no attribute 'index' - visualization

I have saved & loaded a simple XGBClassifier(random_sate = 100) model, trained on Heart Disease prediction dataset(target variable mapped to 0s & 1s). I am trying to create a dtreeviz plot for the same:
from dtreeviz.trees import *
viz = dtreeviz(loaded_model, X_train, y_train, tree_index = 10,feature_names = X_train.columns,
class_names = ['Absence', 'Presence'], target_name = 'Heart Disease')
viz.view()
However, I am getting the following error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-18-2a3024516ed1> in <module>
1 from dtreeviz.trees import *
----> 2 viz = dtreeviz(loaded_model, X_train, y_train, tree_index = 10, feature_names = X_train.columns,
3 class_names = ['Absence', 'Presence'], target_name = 'Heart Disease')
4 viz.view()
~\anaconda3\lib\site-packages\dtreeviz\trees.py in dtreeviz(tree_model, x_data, y_data, feature_names, target_name, class_names, tree_index, precision, orientation, instance_orientation, show_root_edge_labels, show_node_labels, show_just_path, fancy, histtype, highlight_path, X, max_X_features_LR, max_X_features_TD, depth_range_to_display, label_fontsize, ticks_fontsize, fontname, title, title_fontsize, colors, cmap, scale)
816 if shadow_tree.is_classifier():
817 nbins = get_num_bins(histtype, n_classes)
--> 818 node_heights = shadow_tree.get_split_node_heights(X_data, y_data, nbins=nbins)
819
820 internal = []
~\anaconda3\lib\site-packages\dtreeviz\models\shadow_decision_tree.py in get_split_node_heights(self, X_train, y_train, nbins)
273 for node in self.internal:
274 # print(node.feature_name(), node.id)
--> 275 X_feature = X_train[:, node.feature()]
276 overall_feature_range = (np.min(X_feature), np.max(X_feature))
277 # print(f"range {overall_feature_range}")
~\anaconda3\lib\site-packages\dtreeviz\models\shadow_decision_tree.py in feature(self)
506 """Returns feature index used at this node"""
507
--> 508 return self.shadow_tree.get_node_feature(self.id)
509
510 def feature_name(self) -> (str, None):
~\anaconda3\lib\site-packages\dtreeviz\models\xgb_decision_tree.py in get_node_feature(self, id)
76 feature_name = self._get_nodes_values("Feature")[id]
77 try:
---> 78 return self.feature_names.index(feature_name)
79 except ValueError as error:
80 return self.__class__.NO_FEATURE
AttributeError: 'Index' object has no attribute 'index'
I have been trying resolve this since yesterday, however, I can not find any solution.
Kindly help!
Thanks,
Neel

Related

Error Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select

I have the following code taken directly from here with some pretty little modifications:
import pandas as pd
import torch
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
from torch import cuda
df = pd.read_pickle('df_final.pkl')
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = 'cuda' if cuda.is_available() else 'cpu'
text = ''.join(df[(df['col1'] == 'type') & (df['col2'] == 2)].col3.to_list())
preprocess_text = text.strip().replace("\n","")
t5_prepared_Text = "summarize: "+preprocess_text
#print ("original text preprocessed: \n", preprocess_text)
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length = 500000).to(device)
# summmarize
summary_ids = model.generate(tokenized_text,
num_beams=4,
no_repeat_ngram_size=2,
min_length=30,
max_length=100,
early_stopping=True)
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print ("\n\nSummarized text: \n",output)
When executing the model_generate() part i get an error like this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-e8e9819a85dc> in <module>
12 min_length=30,
13 max_length=100,
---> 14 early_stopping=True).to(device)
15
16 output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
~\Anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_no_grad(*args, **kwargs)
47 def decorate_no_grad(*args, **kwargs):
48 with self:
---> 49 return func(*args, **kwargs)
50 return decorate_no_grad
51
~\Anaconda3\lib\site-packages\transformers\generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, **model_specific_kwargs)
383 encoder = self.get_encoder()
384
--> 385 encoder_outputs: tuple = encoder(input_ids, attention_mask=attention_mask)
386
387 # Expand input ids if num_beams > 1 or num_return_sequences > 1
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\transformers\modeling_t5.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, past_key_value_states, use_cache, output_attentions, output_hidden_states, return_dict)
701 if inputs_embeds is None:
702 assert self.embed_tokens is not None, "You have to intialize the model with valid token embeddings"
--> 703 inputs_embeds = self.embed_tokens(input_ids)
704
705 batch_size, seq_length = input_shape
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
112 return F.embedding(
113 input, self.weight, self.padding_idx, self.max_norm,
--> 114 self.norm_type, self.scale_grad_by_freq, self.sparse)
115
116 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1482 # remove once script supports set_grad_enabled
1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1485
1486
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
​
I've searched this error and fouund some other threads like this one and this one but they didn't help me much since their case seems to be completely different. In my case there are no custom instances or classes created, so i don't know how to fix this or where the error come from.
Could you please tell me where is the error coming from and how could i fix it?
Thank you very much in advance.
Try explicitly moving your model to the GPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)

How to fix "float() argument must be a string or a number, not 'PngImageFile" error when keras calling numpy.asarray with dtype of float32

I am learning neuron network by building multilayer perceptron on a binary classification problem using keras with tensorflow as backend.
Here is the source of image data.
I have followed this and this.
From those issues I found, i think the error is related to corrupted image, but I tried those suggestion inside those links by verifying the image, the image have no problem for me but the error still persists.
The stacktrace shows that the error was occured when keras is trying to convert the image data to numpy array with data type of float32, so I tried converting the image to numpy array myself, and converting it like numpy.asarray(image) works but not numpy.asarray(image, dtype='float32') which was what keras was doing.
Assuming all import are in place.
So the code to data preparation
image_data_path = '../data/breast_histopathology'
image_width = 50
image_height = 50
train_size_as_percentage = 0.8
validate_size_percentage_of_train_data = 0.1
data_extract_path = image_data_path + '_prep'
train_data_path = data_extract_path + '/training'
test_data_path = data_extract_path + '/testing'
validation_data_path = data_extract_path + '/validation'
if os.path.isdir(data_extract_path):
shutil.rmtree(data_extract_path)
os.makedirs(train_data_path)
os.makedirs(train_data_path + '/0')
os.makedirs(train_data_path + '/1')
os.makedirs(test_data_path)
os.makedirs(test_data_path + '/0')
os.makedirs(test_data_path + '/1')
os.makedirs(validation_data_path)
os.makedirs(validation_data_path + '/0')
os.makedirs(validation_data_path + '/1')
image_paths = [image_path for image_path in glob.glob(image_data_path + '/**/*', recursive=True)]
random.seed(128)
random.shuffle(image_paths)
training_size = int(len(image_paths) * train_size_as_percentage)
training_image_paths = image_paths[:training_size]
testing_image_paths = image_paths[training_size:]
validation_size = int(len(training_image_paths) * validate_size_percentage_of_train_data)
validation_image_paths = training_image_paths[:validation_size]
training_image_paths = training_image_paths[validation_size:]
datasets = [
(train_data_path, training_image_paths),
(test_data_path, testing_image_paths),
(validation_data_path, validation_image_paths)
]
for data_path, image_paths in datasets:
for image_path in image_paths:
filename = image_path.split(os.path.sep)[-1]
# filename would be, 10253_idx5_x1001_y1001_class0.png,
# the character before . and word after class are the
# labeling for the image
class_label = filename[-5:-4]
copy_destination = '{}/{}/{}'.format(data_path, class_label, filename)
if os.path.isfile(image_path):
try:
image = PIL.Image.open(image_path)
image.verify()
# print('=============')
# print(filename)
# print(image_path)
# print(image)
# print(image.size)
# print(image.format)
# print(image.mode)
# print(image.verify())
# print(numpy.asarray(image, dtype='float32'))
# print('XXXXXXXXXXXXX')
width, height = image.size
if width == height == image_width and image.format == 'PNG':
shutil.copy2(image_path, copy_destination)
except Exception as e:
print(str(e))
pass
Code to build and train the model
image_generator = keras_preprocessing.image.ImageDataGenerator()
train_data_generator = image_generator.flow_from_directory(
directory=train_data_path,
target_size=(image_width, image_height),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)
validation_data_generator = image_generator.flow_from_directory(
directory=validation_data_path,
target_size=(image_width, image_height),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)
test_data_generator = image_generator.flow_from_directory(
directory=test_data_path,
target_size=(image_width, image_height),
color_mode='rgb',
batch_size=1,
class_mode='categorical',
shuffle=False)
input_layer = keras_layers.Input(shape=(image_width, image_height))
hidden_layer_output_neuron = int((image_width + 1) / 2)
hidden_layer_0 = keras_layers.Dense(
units=hidden_layer_output_neuron,
activation=keras.activations.relu,
use_bias=True)(input_layer)
hidden_layer_1_output_unit = 16
hidden_layer_1 = keras_layers.Dense(
units=hidden_layer_1_output_unit,
activation=keras.activations.relu,
use_bias=True)(hidden_layer_0)
hidden_layer_2_output_unit = 8
hidden_layer_2 = keras_layers.Dense(units=hidden_layer_2_output_unit, activation=keras.activations.relu, use_bias=True)(hidden_layer_1)
output_layer = keras_layers.Dense(
units=1,
activation=keras.activations.relu,
use_bias=True)(hidden_layer_0)
learning_rate = 0.001
model = keras_models.Model(inputs=input_layer, outputs=output_layer)
model.compile(
optimizer=keras_optimizers.SGD(lr=learning_rate),
loss=keras_losses.binary_crossentropy,
metrics=[keras_metrics.Recall()])
model.fit_generator(
generator=train_data_generator,
steps_per_epoch=train_data_generator.n // train_data_generator.batch_size,
validation_data=validation_data_generator,
validation_steps=validation_data_generator.n // validation_data_generator.batch_size,
epochs=100)
Expected result: No error
Actual result:
Epoch 1/100
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-85-e2ffe31934fb> in <module>
4 validation_data=validation_data_generator,
5 validation_steps=validation_data_generator.n // validation_data_generator.batch_size,
----> 6 epochs=10)
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1513 shuffle=shuffle,
1514 initial_epoch=initial_epoch,
-> 1515 steps_name='steps_per_epoch')
1516
1517 def evaluate_generator(self,
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
211 step = 0
212 while step < target_steps:
--> 213 batch_data = _get_next_batch(generator, mode)
214 if batch_data is None:
215 if is_dataset:
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py in _get_next_batch(generator, mode)
353 """Retrieves the next batch of input data."""
354 try:
--> 355 generator_output = next(generator)
356 except (StopIteration, errors.OutOfRangeError):
357 return None
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get(self)
653 except Exception: # pylint: disable=broad-except
654 self.stop()
--> 655 six.reraise(*sys.exc_info())
656
657
/usr/local/lib/python3.7/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get(self)
647 try:
648 while self.is_running():
--> 649 inputs = self.queue.get(block=True).get()
650 self.queue.task_done()
651 if inputs is not None:
/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
/usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get_index(uid, i)
443 The value at index `i`.
444 """
--> 445 return _SHARED_SEQUENCES[uid][i]
446
447
/usr/local/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in __getitem__(self, idx)
63 index_array = self.index_array[self.batch_size * idx:
64 self.batch_size * (idx + 1)]
---> 65 return self._get_batches_of_transformed_samples(index_array)
66
67 def __len__(self):
/usr/local/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in _get_batches_of_transformed_samples(self, index_array)
225 target_size=self.target_size,
226 interpolation=self.interpolation)
--> 227 x = img_to_array(img, data_format=self.data_format)
228 # Pillow images should be closed after `load_img`,
229 # but not PIL images.
/usr/local/lib/python3.7/site-packages/keras_preprocessing/image/utils.py in img_to_array(img, data_format, dtype)
280 # or (channel, height, width)
281 # but original PIL image has format (width, height, channel)
--> 282 x = np.asarray(img, dtype=dtype)
283 if len(x.shape) == 3:
284 if data_format == 'channels_first':
/usr/local/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
TypeError: float() argument must be a string or a number, not 'PngImageFile'

Autoencoder in fastai

I'm trying to build an autoencoder with fast.ai version 1.0.52 and struggling with how to set labels to be equal to original images. I was
following this blog post: https://alanbertl.com/autoencoder-with-fast-ai/
I replaced ImageItemList in the original code with ImageList since it was changed in the latest fastai versions.
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.vision import *
from fastai.data_block import *
from fastai.basic_train import *
import pandas as pd
x = np.random.randint(256, size=(1000, 16384))
x = x/255
x = x.reshape(-1,128,128)
x = np.stack([x,x,x],1)
x.shape
class ArraysImageList(ImageList,FloatList):
def __init__(self, items:Iterator, log:bool=False, **kwargs):
if isinstance(items, ItemList):
items = items.items
super(FloatList,self).__init__(items,**kwargs)
def get(self,i):
return Tensor(super(FloatList,self).get(i).astype('float32'))
x_il = ArraysImageList(x)
x_ils = x_il.split_by_rand_pct()
lls = x_ils.label_from_lists(x_ils.train, x_ils.valid)
Here's the error message I get.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-33-cbada9e18af9> in <module>
----> 1 lls = x_ils.label_from_lists(x_ils.train, x_ils.valid)
~/.local/lib/python3.6/site-packages/fastai/data_block.py in label_from_lists(self, train_labels, valid_labels, label_cls, **kwargs)
484 self.valid = self.valid._label_list(x=self.valid, y=self.train.y.new(valid_labels, **kwargs))
485 self.__class__ = LabelLists
--> 486 self.process()
487 return self
488
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self)
520 "Process the inner datasets."
521 xp,yp = self.get_processors()
--> 522 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n)
523 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
524 for ds in self.lists:
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, xp, yp, name)
683 def process(self, xp:PreProcessor=None, yp:PreProcessor=None, name:str=None):
684 "Launch the processing on `self.x` and `self.y` with `xp` and `yp`."
--> 685 self.y.process(yp)
686 if getattr(self.y, 'filter_missing_y', False):
687 filt = array([o is None for o in self.y.items])
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, processor)
73 if processor is not None: self.processor = processor
74 self.processor = listify(self.processor)
---> 75 for p in self.processor: p.process(self)
76 return self
77
~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, ds)
334
335 def process(self, ds):
--> 336 if self.classes is None: self.create_classes(self.generate_classes(ds.items))
337 ds.classes = self.classes
338 ds.c2i = self.c2i
~/.local/lib/python3.6/site-packages/fastai/data_block.py in generate_classes(self, items)
391 for c in items: classes = classes.union(set(c))
392 classes = list(classes)
--> 393 classes.sort()
394 return classes
395
RuntimeError: bool value of Tensor with more than one value is ambiguous
Ultimately, I want to read images using a dataframe with image paths. So I also tried the following:
import sklearn
cv = sklearn.model_selection.GroupKFold(n_splits=5)
train_inds, valid_inds = next(cv.split(iso_image_df.group, groups=iso_image_df.group))
img_lists = (ImageList.from_df(iso_image_df, resized_img_path, cols=0).split_by_idxs(train_inds, valid_inds))
src = img_lists.label_from_lists(img_lists.train, img_lists.valid)
data = (src.databunch(bs = 32).normalize(imagenet_stats))
data.show_batch(rows=3, figsize=(10, 10))
Here I get the following error message:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-146-2514de511e64> in <module>
----> 1 data.show_batch(rows=3, figsize=(10, 10))
~/.local/lib/python3.6/site-packages/fastai/basic_data.py in show_batch(self, rows, ds_type, reverse, **kwargs)
190 #TODO: get rid of has_arg if possible
191 if has_arg(self.train_ds.y.reconstruct, 'x'):
--> 192 ys = [self.train_ds.y.reconstruct(grab_idx(y, i), x=x) for i,x in enumerate(xs)]
193 else : ys = [self.train_ds.y.reconstruct(grab_idx(y, i)) for i in range(n_items)]
194 self.train_ds.x.show_xys(xs, ys, **kwargs)
~/.local/lib/python3.6/site-packages/fastai/basic_data.py in <listcomp>(.0)
190 #TODO: get rid of has_arg if possible
191 if has_arg(self.train_ds.y.reconstruct, 'x'):
--> 192 ys = [self.train_ds.y.reconstruct(grab_idx(y, i), x=x) for i,x in enumerate(xs)]
193 else : ys = [self.train_ds.y.reconstruct(grab_idx(y, i)) for i in range(n_items)]
194 self.train_ds.x.show_xys(xs, ys, **kwargs)
~/.local/lib/python3.6/site-packages/fastai/data_block.py in reconstruct(self, t, x)
89 def reconstruct(self, t:Tensor, x:Tensor=None):
90 "Reconstruct one of the underlying item for its data `t`."
---> 91 return self[0].reconstruct(t,x) if has_arg(self[0].reconstruct, 'x') else self[0].reconstruct(t)
92
93 def new(self, items:Iterator, processor:PreProcessors=None, **kwargs)->'ItemList':
AttributeError: 'Image' object has no attribute 'reconstruct'
Any help is highly appreciated!
The lls are being used to create the databunch.
I've looked at it and given the API change in fastai libs I created the databunch without using the lls that were causing the error:
bs = 64
db = (ImageImageList.from_folder(mnist)
.split_by_folder()
.label_from_func(get_y_fn)
.databunch(bs=bs,num_workers=4))
EDIT: you'll need the get_y_fn; it is very simply defined
def get_y_fn(x): return x
the lls aren't used for anything else anyway
This should fix your problem, let me know if this worked for you.

AttributeError: 'NoneType' object has no attribute 'setCallSite' pyspark after indexedRowMatrix columnSimilarities()

I'm working on a code that was correctly executed with the dataframe before, but this time when I execute it, I get an error. (The only difference is that I used persist() on the dataframe this time.)
simMat = IndexedRMat.columnSimilarities()
executes correctly, but then this part:
columns = ['product1', 'product2', 'sim']
vals = simMat.entries.map(lambda e: (e.i, e.j, e.value)).collect()
dfsim = spark.createDataFrame(vals, columns)
generates this error:
AttributeErrorTraceback (most recent call last)
<ipython-input-100-11502084c71b> in <module>()
1 columns = ['product1', 'product2', 'sim']
----> 2 vals = simMat.entries.map(lambda e: (e.i, e.j, e.value)).collect()
3 dfsim = spark.createDataFrame(vals, columns)
/opt/spark-2.3.0-SNAPSHOT-bin-spark-master/python/pyspark/rdd.pyc in collect(self)
806 to be small, as all the data is loaded into the driver's memory.
807 """
--> 808 with SCCallSiteSync(self.context) as css:
809 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
810 return list(_load_from_socket(port, self._jrdd_deserializer))
/opt/spark-2.3.0-SNAPSHOT-bin-spark-master/python/pyspark/traceback_utils.pyc in __enter__(self)
70 def __enter__(self):
71 if SCCallSiteSync._spark_stack_depth == 0:
---> 72 self._context._jsc.setCallSite(self._call_site)
73 SCCallSiteSync._spark_stack_depth += 1
74
AttributeError: 'NoneType' object has no attribute 'setCallSite'
What does it mean? I'm new to spark and didn't find an explanation for this type of error..

scipy curve_fit error: Result from function call is not a proper array of floats

I have an [x,y] dataset and I would like to fit a function to it.
This are x and y
parang = np.array([ 61.1725 , 62.140625, 62.93275 , 63.701625, 65.89225 ,
66.476875, 68.33525 , 68.902375, 72.03975 , 72.590375,
73.144125, 73.670625, 80.36525 , 80.80275 , 87.505375,
87.90375 , 100.557875, 100.8915 ])
q = np.array([-0.03699417, -0.03451252, -0.03851238, -0.0393034 , -0.04059193,
-0.03941371, -0.04206476, -0.04153004, -0.04721763, -0.04667099,
-0.03996427, -0.03872865, -0.05054322, -0.0466561 , -0.05476921,
-0.05274144, -0.0474299 , -0.04974607])
and then I want to fit a function to the data that goes as follows:
def fq(x,bq,cuq):
qval = bq*stndqu[0]*np.cos(np.radians(2*x))+cuq*stndqu[1]*np.sin(np.radians(2*x))
print qval
print qval.dtype
return qval
where 'bq,cuq' are the parameter I need to fit and stndqu are global parameters I obtain as:
stnd = input(r'P ($\%$) and $\theta$ of pol. standard? (as tuple)')
p = stnd[0]/100.
ang = np.radians(stnd[1])
x,y = sympy.symbols('x y')
stndqu = sympy.solve([sympy.sqrt(x**2+y**2)-p,(0.5*sympy.atan(y/x))-ang],[x,y])[1]
and P and theta are 2.73 and 95. The stndqu[0] and stndqu[1] I get out from that block are
0.0272334985720932 and 0.00190435173321445
To find the parameters 'bq' and 'cuq' of the function that fit my data I do:
qpopt,pconv = scio.curve_fit(fq, parang, q)
and here's the result:
[-0.0129614827538107 -0.0137658898997091 -0.0144124082012406
-0.0150294169782742 -0.0167265263727253 -0.0171633151430064
-0.0185034265676582 -0.0188971421096823 -0.0209373417940197
-0.0212701779430718 -0.0215969783128203 -0.0219002154908251
-0.0250793309165333 -0.0252411052388773 -0.0269646924974054
-0.0270214005655701 -0.0260909416985902 -0.0259956074319825]
object
[-0.0129614827538107 -0.0137658898997091 -0.0144124082012406
-0.0150294169782742 -0.0167265263727253 -0.0171633151430064
-0.0185034265676582 -0.0188971421096823 -0.0209373417940197
-0.0212701779430718 -0.0215969783128203 -0.0219002154908251
-0.0250793309165333 -0.0252411052388773 -0.0269646924974054
-0.0270214005655701 -0.0260909416985902 -0.0259956074319825]
object
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: array cannot be safely cast to required type
---------------------------------------------------------------------------
error Traceback (most recent call last)
/Users/mj/Documents/NACO/VLT/DataReduction/<ipython-input-57-cac353117232> in <module>()
----> 1 qpopt,pconv = scio.curve_fit(fq, parang, q)
/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scipy/optimize/minpack.pyc in curve_fit(f, xdata, ydata, p0, sigma, **kw)
408 # Remove full_output from kw, otherwise we're passing it in twice.
409 return_full = kw.pop('full_output', False)
--> 410 res = leastsq(func, p0, args=args, full_output=1, **kw)
411 (popt, pcov, infodict, errmsg, ier) = res
412
/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scipy/optimize/minpack.pyc in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag, warning)
268 if (maxfev == 0):
269 maxfev = 200*(n+1)
--> 270 retval = _minpack._lmdif(func,x0,args,full_output,ftol,xtol,gtol,maxfev,epsfcn,factor,diag)
271 else:
272 if col_deriv:
error: Result from function call is not a proper array of floats.
I tried specifying the type of the qval element making it
def fq(x,bq,cuq):
qval = np.array(
bq*stndqu[0]*np.cos(np.radians(2*x))+cuq*stndqu[1]*np.sin(np.radians(2*x)),
dtype=float)
and then the result changes to:
qpopt = scio.curve_fit(fq, parang, q)
[-0.01296148 -0.01376589 -0.01441241 -0.01502942 -0.01672653 -0.01716332
-0.01850343 -0.01889714 -0.02093734 -0.02127018 -0.02159698 -0.02190022
-0.02507933 -0.02524111 -0.02696469 -0.0270214 -0.02609094 -0.02599561]
float64
[-0.01296148 -0.01376589 -0.01441241 -0.01502942 -0.01672653 -0.01716332
-0.01850343 -0.01889714 -0.02093734 -0.02127018 -0.02159698 -0.02190022
-0.02507933 -0.02524111 -0.02696469 -0.0270214 -0.02609094 -0.02599561]
float64
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
TypeError: array cannot be safely cast to required type
---------------------------------------------------------------------------
error Traceback (most recent call last)
/Users/mj/Documents/NACO/VLT/DataReduction/<ipython-input-50-1f4d3764f7ae> in <module>()
----> 1 qpopt = scio.curve_fit(fq, parang, q)
/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scipy/optimize/minpack.pyc in curve_fit(f, xdata, ydata, p0, sigma, **kw)
408 # Remove full_output from kw, otherwise we're passing it in twice.
409 return_full = kw.pop('full_output', False)
--> 410 res = leastsq(func, p0, args=args, full_output=1, **kw)
411 (popt, pcov, infodict, errmsg, ier) = res
412
/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scipy/optimize/minpack.pyc in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag, warning)
268 if (maxfev == 0):
269 maxfev = 200*(n+1)
--> 270 retval = _minpack._lmdif(func,x0,args,full_output,ftol,xtol,gtol,maxfev,epsfcn,factor,diag)
271 else:
272 if col_deriv:
error: Result from function call is not a proper array of floats.
So no progress...
Can someone tell me where is this going wrong?
Thank you very much in advance!
M.
Since stndqu is the result of a call to sympy.solve, it is a symbolic object still. The numbers that you see when printing qval from within your function are probably sympy floats (and thus generic objects to numpy). You should convert stndqu into a numpy array before using it with scipy.curve_fit:
stndqun = numpy.array([sympy.N(i) for i in stndqu])