Fastai: Error Message in learn.predict when using custom Datablock/ Dataloader - image-segmentation
I need some help with my Fastai pipeline.
I want to do semantic segmentation on a 2 channel input image with augmentation.
I adapted my procedure from the good introduction in medium
I have 2 channel images that are saved as NumPy arrays (.npy) of the size 2x 426 x 476.
See my code below:
%matplotlib inline
import torch
print(torch.__version__)
print(torch.cuda.is_available())
import fastai
print(fastai.__version__)
# other imports
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from fastai.vision.all import *
from sklearn.model_selection import StratifiedKFold
import torch
import fastai
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from fastai.vision.all import *
imgs_path = Path('./data')
lbls_path = Path('./label1')
def open_im(fn, chnls=None, cls=torch.Tensor):
im = (np.load(fn)).astype('float32')
return cls(im)
# The map_filename function makes it easier to map from one folder to another by replacing strings
def map_filename(base_fn, str1, str2):
return Path(str(base_fn).replace(str1, str2))
# get items from both datasets
items = get_files('./data', extensions='.npy')
masks = get_files('./label1', extensions='.npy')
items_mask = masks
items = items
items
idx=2
img_pipe = Pipeline(open_im)
img = img_pipe(items[idx])
mask_pipe = Pipeline([partial(map_filename, str1='data', str2='label1'),
partial(open_im, cls=TensorMask)])
mask = mask_pipe(items_mask[idx])
print(img.shape, mask.shape)
_, ax = plt.subplots(1, 2, figsize=(12,5))
ax[0].imshow(img.permute(1, 2, 0)[..., :1]/20000)
mask.show(ctx=ax[1])
plt.show()
def show_img(tensor_img, ctx=None):
ctx = plt.subplot() if ctx is None else ctx
#normalize to fit between 0 and 1
if tensor_img.max() > 0:
tensor_img = tensor_img / tensor_img.max()
ctx.imshow(tensor_img.permute(1, 2, 0)[..., :1])
# To create this DataBlock we don't need to specify the get_items function
# because we will pass the list of files as the source
db = DataBlock(blocks=(TransformBlock([open_im, lambda x: x/10000]),
TransformBlock([partial(map_filename, str1='data', str2='label1'),
partial(open_im, cls=TensorMask)])),
splitter=RandomSplitter(valid_pct=0.2, seed=0)
)
db.summary(source=items)
ds = db.datasets(source=items)
dl = db.dataloaders(source=items, bs=1)
batch = dl.one_batch()
print(batch[0].shape, batch[1].shape)
import albumentations as A
import pdb
class SegmentationAlbumentationsTransform(ItemTransform):
# split_idx=0
def __init__(self, aug, **kwargs):
super().__init__(**kwargs)
self.aug = aug
def encodes(self, x):
img,mask = x
img = img/img.max()
aug = self.aug(image=np.array(img.permute(1,2,0)), mask=np.array(mask))
return TensorImage(aug['image'].transpose(2,0,1)), TensorMask(aug['mask'])
aug_pipe = A.Compose([
A.ShiftScaleRotate(p=.9),
A.HorizontalFlip(),
A.RandomCrop(384, 384),
A.Rotate(limit=(-90, 90)),
# A.RandomBrightnessContrast(contrast_limit=0.0, p=1., brightness_by_max=False)
])
# Create our class with this aug_pipe
aug = SegmentationAlbumentationsTransform(aug_pipe)
# And check the results
idx = 5
aug_number = 4
# Display original and some augmented samples
_, ax = plt.subplots(aug_number+1, 2, figsize=(8,aug_number*4))
show_img(ds[idx][0], ctx=ax[0,0])
ds[idx][1].show(ctx=ax[0,1])
# print(ds[idx][0])
for i in range(1, aug_number+1):
img, mask = aug.encodes(ds[idx])
show_img(img, ctx=ax[i,0])
mask.show(ctx=ax[i,1])
db = DataBlock(blocks=(TransformBlock([open_im]),
TransformBlock([partial(map_filename, str1='data', str2='label1'),
partial(open_im, cls=TensorMask)])),
splitter=RandomSplitter(valid_pct=0.2),
item_tfms=aug,
)
dl = db.dataloaders(items, bs=1)
idx=3
img, mask = dl.do_item(idx)
fig, (ax1, ax2) = plt.subplots(1,2)
tensor_img=img
ax1.imshow(tensor_img.permute(1, 2, 0)[..., :1])
ax2.imshow(mask)
#axs[1].imshow(mask)
plt.show()
print(np.shape(img))
print(np.shape(mask))
def acc_metric(input, target):
target = target.squeeze(1)
return (input.argmax(dim=1)==target).float().mean()
def loss_fn(pred, targ):
targ[targ==255] = 1
return torch.nn.functional.cross_entropy(pred, targ.squeeze(1).type(torch.long))
db = DataBlock(blocks=(TransformBlock([open_im]),
TransformBlock([partial(map_filename, str1='data', str2='label1'),
partial(open_im, cls=TensorMask)])),
splitter=RandomSplitter(valid_pct=0.2),
item_tfms=aug,
)
dl = db.dataloaders(items, bs=1)
learn = unet_learner(dls = dl, arch = resnet18, pretrained = True,normalize = False,n_in=2, n_out=2, loss_func=loss_fn, metrics=acc_metric)
learn.lr_find()
learn.fit_one_cycle(20,lr_max=6e-5, wd=0.8)
learn.fine_tune(8)
learn.export()
img,mask=dl.do_item(3)
cat, tensor, probs=learn.predict(items[1], masks[1])
cat, tensor, probs=learn.predict(img)
I tried to predict images in three different ways and also with learn.get_preds() and the dataloader, but it was not successful. The problem seems to be the encodes function for the masks, and images for the augmentation.
When i run: cat, tensor, probs=learn.predict(img)
The following error appears, but I don’t know how to fix this.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_14397/663310027.py in <module>
----> 1 cat, tensor, probs=learn.predict(img)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/learner.py in predict(self, item, rm_type_tfms, with_input)
264 def predict(self, item, rm_type_tfms=None, with_input=False):
265 dl = self.dls.test_dl([item], rm_type_tfms=rm_type_tfms, num_workers=0)
--> 266 inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
267 i = getattr(self.dls, 'n_inp', -1)
268 inp = (inp,) if i==1 else tuplify(inp)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, reorder, cbs, **kwargs)
251 if with_loss: ctx_mgrs.append(self.loss_not_reduced())
252 with ContextManagers(ctx_mgrs):
--> 253 self._do_epoch_validate(dl=dl)
254 if act is None: act = getattr(self.loss_func, 'activation', noop)
255 res = cb.all_tensors()
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
201 if dl is None: dl = self.dls[ds_idx]
202 self.dl = dl
--> 203 with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
204
205 def _do_epoch(self):
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
161
162 def _with_events(self, f, event_type, ex, final=noop):
--> 163 try: self(f'before_{event_type}'); f()
164 except ex: self(f'after_cancel_{event_type}')
165 self(f'after_{event_type}'); final()
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/learner.py in all_batches(self)
167 def all_batches(self):
168 self.n_iter = len(self.dl)
--> 169 for o in enumerate(self.dl): self.one_batch(*o)
170
171 def _do_one_batch(self):
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/data/load.py in __iter__(self)
107 self.before_iter()
108 self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
--> 109 for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
110 if self.device is not None: b = to_device(b, self.device)
111 yield self.after_batch(b)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/torch/utils/data/dataloader.py in __next__(self)
519 if self._sampler_iter is None:
520 self._reset()
--> 521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
~/miniconda3/envs/fastai/lib/python3.9/site-packages/torch/utils/data/dataloader.py in _next_data(self)
559 def _next_data(self):
560 index = self._next_index() # may raise StopIteration
--> 561 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
562 if self._pin_memory:
563 data = _utils.pin_memory.pin_memory(data)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
32 raise StopIteration
33 else:
---> 34 data = next(self.dataset_iter)
35 return self.collate_fn(data)
36
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/data/load.py in create_batches(self, samps)
116 if self.dataset is not None: self.it = iter(self.dataset)
117 res = filter(lambda o:o is not None, map(self.do_item, samps))
--> 118 yield from map(self.do_batch, self.chunkify(res))
119
120 def new(self, dataset=None, cls=None, **kwargs):
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/basics.py in chunked(it, chunk_sz, drop_last, n_chunks)
214 if not isinstance(it, Iterator): it = iter(it)
215 while True:
--> 216 res = list(itertools.islice(it, chunk_sz))
217 if res and (len(res)==chunk_sz or not drop_last): yield res
218 if len(res)<chunk_sz: return
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastai/data/load.py in do_item(self, s)
131 def prebatched(self): return self.bs is None
132 def do_item(self, s):
--> 133 try: return self.after_item(self.create_item(s))
134 except SkipItemException: return None
135 def chunkify(self, b): return b if self.prebatched else chunked(b, self.bs, self.drop_last)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in __call__(self, o)
198 self.fs = self.fs.sorted(key='order')
199
--> 200 def __call__(self, o): return compose_tfms(o, tfms=self.fs, split_idx=self.split_idx)
201 def __repr__(self): return f"Pipeline: {' -> '.join([f.name for f in self.fs if f.name != 'noop'])}"
202 def __getitem__(self,i): return self.fs[i]
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in compose_tfms(x, tfms, is_enc, reverse, **kwargs)
148 for f in tfms:
149 if not is_enc: f = f.decode
--> 150 x = f(x, **kwargs)
151 return x
152
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
111 "A transform that always take tuples as items"
112 _retain = True
--> 113 def __call__(self, x, **kwargs): return self._call1(x, '__call__', **kwargs)
114 def decode(self, x, **kwargs): return self._call1(x, 'decode', **kwargs)
115 def _call1(self, x, name, **kwargs):
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in _call1(self, x, name, **kwargs)
115 def _call1(self, x, name, **kwargs):
116 if not _is_tuple(x): return getattr(super(), name)(x, **kwargs)
--> 117 y = getattr(super(), name)(list(x), **kwargs)
118 if not self._retain: return y
119 if is_listy(y) and not isinstance(y, tuple): y = tuple(y)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in __call__(self, x, **kwargs)
71 #property
72 def name(self): return getattr(self, '_name', _get_name(self))
---> 73 def __call__(self, x, **kwargs): return self._call('encodes', x, **kwargs)
74 def decode (self, x, **kwargs): return self._call('decodes', x, **kwargs)
75 def __repr__(self): return f'{self.name}:\nencodes: {self.encodes}decodes: {self.decodes}'
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in _call(self, fn, x, split_idx, **kwargs)
81 def _call(self, fn, x, split_idx=None, **kwargs):
82 if split_idx!=self.split_idx and self.split_idx is not None: return x
---> 83 return self._do_call(getattr(self, fn), x, **kwargs)
84
85 def _do_call(self, f, x, **kwargs):
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/transform.py in _do_call(self, f, x, **kwargs)
87 if f is None: return x
88 ret = f.returns(x) if hasattr(f,'returns') else None
---> 89 return retain_type(f(x, **kwargs), x, ret)
90 res = tuple(self._do_call(f, x_, **kwargs) for x_ in x)
91 return retain_type(res, x)
~/miniconda3/envs/fastai/lib/python3.9/site-packages/fastcore/dispatch.py in __call__(self, *args, **kwargs)
116 elif self.inst is not None: f = MethodType(f, self.inst)
117 elif self.owner is not None: f = MethodType(f, self.owner)
--> 118 return f(*args, **kwargs)
119
120 def __get__(self, inst, owner):
/tmp/ipykernel_14397/3758110305.py in encodes(self, x)
7
8 def encodes(self, x):
----> 9 img,mask = x
10 img = img/img.max()
11 aug = self.aug(image=np.array(img.permute(1,2,0)), mask=np.array(mask))
ValueError: not enough values to unpack (expected 2, got 1)
Also cat, tensor, probs=learn.predict(items1, masks1) gives the error:
TypeError: slice indices must be integers or None or have an __index__ method
But I don't know why.
Related
AttributeError: 'Index' object has no attribute 'index'
I have saved & loaded a simple XGBClassifier(random_sate = 100) model, trained on Heart Disease prediction dataset(target variable mapped to 0s & 1s). I am trying to create a dtreeviz plot for the same: from dtreeviz.trees import * viz = dtreeviz(loaded_model, X_train, y_train, tree_index = 10,feature_names = X_train.columns, class_names = ['Absence', 'Presence'], target_name = 'Heart Disease') viz.view() However, I am getting the following error: --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-18-2a3024516ed1> in <module> 1 from dtreeviz.trees import * ----> 2 viz = dtreeviz(loaded_model, X_train, y_train, tree_index = 10, feature_names = X_train.columns, 3 class_names = ['Absence', 'Presence'], target_name = 'Heart Disease') 4 viz.view() ~\anaconda3\lib\site-packages\dtreeviz\trees.py in dtreeviz(tree_model, x_data, y_data, feature_names, target_name, class_names, tree_index, precision, orientation, instance_orientation, show_root_edge_labels, show_node_labels, show_just_path, fancy, histtype, highlight_path, X, max_X_features_LR, max_X_features_TD, depth_range_to_display, label_fontsize, ticks_fontsize, fontname, title, title_fontsize, colors, cmap, scale) 816 if shadow_tree.is_classifier(): 817 nbins = get_num_bins(histtype, n_classes) --> 818 node_heights = shadow_tree.get_split_node_heights(X_data, y_data, nbins=nbins) 819 820 internal = [] ~\anaconda3\lib\site-packages\dtreeviz\models\shadow_decision_tree.py in get_split_node_heights(self, X_train, y_train, nbins) 273 for node in self.internal: 274 # print(node.feature_name(), node.id) --> 275 X_feature = X_train[:, node.feature()] 276 overall_feature_range = (np.min(X_feature), np.max(X_feature)) 277 # print(f"range {overall_feature_range}") ~\anaconda3\lib\site-packages\dtreeviz\models\shadow_decision_tree.py in feature(self) 506 """Returns feature index used at this node""" 507 --> 508 return self.shadow_tree.get_node_feature(self.id) 509 510 def feature_name(self) -> (str, None): ~\anaconda3\lib\site-packages\dtreeviz\models\xgb_decision_tree.py in get_node_feature(self, id) 76 feature_name = self._get_nodes_values("Feature")[id] 77 try: ---> 78 return self.feature_names.index(feature_name) 79 except ValueError as error: 80 return self.__class__.NO_FEATURE AttributeError: 'Index' object has no attribute 'index' I have been trying resolve this since yesterday, however, I can not find any solution. Kindly help! Thanks, Neel
Error Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
I have the following code taken directly from here with some pretty little modifications: import pandas as pd import torch import json from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config from torch import cuda df = pd.read_pickle('df_final.pkl') model = T5ForConditionalGeneration.from_pretrained('t5-base') tokenizer = T5Tokenizer.from_pretrained('t5-base') device = 'cuda' if cuda.is_available() else 'cpu' text = ''.join(df[(df['col1'] == 'type') & (df['col2'] == 2)].col3.to_list()) preprocess_text = text.strip().replace("\n","") t5_prepared_Text = "summarize: "+preprocess_text #print ("original text preprocessed: \n", preprocess_text) tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length = 500000).to(device) # summmarize summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=100, early_stopping=True) output = tokenizer.decode(summary_ids[0], skip_special_tokens=True) print ("\n\nSummarized text: \n",output) When executing the model_generate() part i get an error like this: --------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-12-e8e9819a85dc> in <module> 12 min_length=30, 13 max_length=100, ---> 14 early_stopping=True).to(device) 15 16 output = tokenizer.decode(summary_ids[0], skip_special_tokens=True) ~\Anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_no_grad(*args, **kwargs) 47 def decorate_no_grad(*args, **kwargs): 48 with self: ---> 49 return func(*args, **kwargs) 50 return decorate_no_grad 51 ~\Anaconda3\lib\site-packages\transformers\generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, **model_specific_kwargs) 383 encoder = self.get_encoder() 384 --> 385 encoder_outputs: tuple = encoder(input_ids, attention_mask=attention_mask) 386 387 # Expand input ids if num_beams > 1 or num_return_sequences > 1 ~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs) 539 result = self._slow_forward(*input, **kwargs) 540 else: --> 541 result = self.forward(*input, **kwargs) 542 for hook in self._forward_hooks.values(): 543 hook_result = hook(self, input, result) ~\Anaconda3\lib\site-packages\transformers\modeling_t5.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, past_key_value_states, use_cache, output_attentions, output_hidden_states, return_dict) 701 if inputs_embeds is None: 702 assert self.embed_tokens is not None, "You have to intialize the model with valid token embeddings" --> 703 inputs_embeds = self.embed_tokens(input_ids) 704 705 batch_size, seq_length = input_shape ~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs) 539 result = self._slow_forward(*input, **kwargs) 540 else: --> 541 result = self.forward(*input, **kwargs) 542 for hook in self._forward_hooks.values(): 543 hook_result = hook(self, input, result) ~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input) 112 return F.embedding( 113 input, self.weight, self.padding_idx, self.max_norm, --> 114 self.norm_type, self.scale_grad_by_freq, self.sparse) 115 116 def extra_repr(self): ~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 1482 # remove once script supports set_grad_enabled 1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type) -> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 1485 1486 RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select I've searched this error and fouund some other threads like this one and this one but they didn't help me much since their case seems to be completely different. In my case there are no custom instances or classes created, so i don't know how to fix this or where the error come from. Could you please tell me where is the error coming from and how could i fix it? Thank you very much in advance.
Try explicitly moving your model to the GPU. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)
How to fix "float() argument must be a string or a number, not 'PngImageFile" error when keras calling numpy.asarray with dtype of float32
I am learning neuron network by building multilayer perceptron on a binary classification problem using keras with tensorflow as backend. Here is the source of image data. I have followed this and this. From those issues I found, i think the error is related to corrupted image, but I tried those suggestion inside those links by verifying the image, the image have no problem for me but the error still persists. The stacktrace shows that the error was occured when keras is trying to convert the image data to numpy array with data type of float32, so I tried converting the image to numpy array myself, and converting it like numpy.asarray(image) works but not numpy.asarray(image, dtype='float32') which was what keras was doing. Assuming all import are in place. So the code to data preparation image_data_path = '../data/breast_histopathology' image_width = 50 image_height = 50 train_size_as_percentage = 0.8 validate_size_percentage_of_train_data = 0.1 data_extract_path = image_data_path + '_prep' train_data_path = data_extract_path + '/training' test_data_path = data_extract_path + '/testing' validation_data_path = data_extract_path + '/validation' if os.path.isdir(data_extract_path): shutil.rmtree(data_extract_path) os.makedirs(train_data_path) os.makedirs(train_data_path + '/0') os.makedirs(train_data_path + '/1') os.makedirs(test_data_path) os.makedirs(test_data_path + '/0') os.makedirs(test_data_path + '/1') os.makedirs(validation_data_path) os.makedirs(validation_data_path + '/0') os.makedirs(validation_data_path + '/1') image_paths = [image_path for image_path in glob.glob(image_data_path + '/**/*', recursive=True)] random.seed(128) random.shuffle(image_paths) training_size = int(len(image_paths) * train_size_as_percentage) training_image_paths = image_paths[:training_size] testing_image_paths = image_paths[training_size:] validation_size = int(len(training_image_paths) * validate_size_percentage_of_train_data) validation_image_paths = training_image_paths[:validation_size] training_image_paths = training_image_paths[validation_size:] datasets = [ (train_data_path, training_image_paths), (test_data_path, testing_image_paths), (validation_data_path, validation_image_paths) ] for data_path, image_paths in datasets: for image_path in image_paths: filename = image_path.split(os.path.sep)[-1] # filename would be, 10253_idx5_x1001_y1001_class0.png, # the character before . and word after class are the # labeling for the image class_label = filename[-5:-4] copy_destination = '{}/{}/{}'.format(data_path, class_label, filename) if os.path.isfile(image_path): try: image = PIL.Image.open(image_path) image.verify() # print('=============') # print(filename) # print(image_path) # print(image) # print(image.size) # print(image.format) # print(image.mode) # print(image.verify()) # print(numpy.asarray(image, dtype='float32')) # print('XXXXXXXXXXXXX') width, height = image.size if width == height == image_width and image.format == 'PNG': shutil.copy2(image_path, copy_destination) except Exception as e: print(str(e)) pass Code to build and train the model image_generator = keras_preprocessing.image.ImageDataGenerator() train_data_generator = image_generator.flow_from_directory( directory=train_data_path, target_size=(image_width, image_height), color_mode='rgb', batch_size=32, class_mode='categorical', shuffle=True) validation_data_generator = image_generator.flow_from_directory( directory=validation_data_path, target_size=(image_width, image_height), color_mode='rgb', batch_size=32, class_mode='categorical', shuffle=True) test_data_generator = image_generator.flow_from_directory( directory=test_data_path, target_size=(image_width, image_height), color_mode='rgb', batch_size=1, class_mode='categorical', shuffle=False) input_layer = keras_layers.Input(shape=(image_width, image_height)) hidden_layer_output_neuron = int((image_width + 1) / 2) hidden_layer_0 = keras_layers.Dense( units=hidden_layer_output_neuron, activation=keras.activations.relu, use_bias=True)(input_layer) hidden_layer_1_output_unit = 16 hidden_layer_1 = keras_layers.Dense( units=hidden_layer_1_output_unit, activation=keras.activations.relu, use_bias=True)(hidden_layer_0) hidden_layer_2_output_unit = 8 hidden_layer_2 = keras_layers.Dense(units=hidden_layer_2_output_unit, activation=keras.activations.relu, use_bias=True)(hidden_layer_1) output_layer = keras_layers.Dense( units=1, activation=keras.activations.relu, use_bias=True)(hidden_layer_0) learning_rate = 0.001 model = keras_models.Model(inputs=input_layer, outputs=output_layer) model.compile( optimizer=keras_optimizers.SGD(lr=learning_rate), loss=keras_losses.binary_crossentropy, metrics=[keras_metrics.Recall()]) model.fit_generator( generator=train_data_generator, steps_per_epoch=train_data_generator.n // train_data_generator.batch_size, validation_data=validation_data_generator, validation_steps=validation_data_generator.n // validation_data_generator.batch_size, epochs=100) Expected result: No error Actual result: Epoch 1/100 --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-85-e2ffe31934fb> in <module> 4 validation_data=validation_data_generator, 5 validation_steps=validation_data_generator.n // validation_data_generator.batch_size, ----> 6 epochs=10) /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) 1513 shuffle=shuffle, 1514 initial_epoch=initial_epoch, -> 1515 steps_name='steps_per_epoch') 1516 1517 def evaluate_generator(self, /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs) 211 step = 0 212 while step < target_steps: --> 213 batch_data = _get_next_batch(generator, mode) 214 if batch_data is None: 215 if is_dataset: /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py in _get_next_batch(generator, mode) 353 """Retrieves the next batch of input data.""" 354 try: --> 355 generator_output = next(generator) 356 except (StopIteration, errors.OutOfRangeError): 357 return None /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get(self) 653 except Exception: # pylint: disable=broad-except 654 self.stop() --> 655 six.reraise(*sys.exc_info()) 656 657 /usr/local/lib/python3.7/site-packages/six.py in reraise(tp, value, tb) 691 if value.__traceback__ is not tb: 692 raise value.with_traceback(tb) --> 693 raise value 694 finally: 695 value = None /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get(self) 647 try: 648 while self.is_running(): --> 649 inputs = self.queue.get(block=True).get() 650 self.queue.task_done() 651 if inputs is not None: /usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/pool.py in get(self, timeout) 655 return self._value 656 else: --> 657 raise self._value 658 659 def _set(self, i, obj): /usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception) 119 job, i, func, args, kwds = task 120 try: --> 121 result = (True, func(*args, **kwds)) 122 except Exception as e: 123 if wrap_exception and func is not _helper_reraises_exception: /usr/local/lib/python3.7/site-packages/tensorflow/python/keras/utils/data_utils.py in get_index(uid, i) 443 The value at index `i`. 444 """ --> 445 return _SHARED_SEQUENCES[uid][i] 446 447 /usr/local/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in __getitem__(self, idx) 63 index_array = self.index_array[self.batch_size * idx: 64 self.batch_size * (idx + 1)] ---> 65 return self._get_batches_of_transformed_samples(index_array) 66 67 def __len__(self): /usr/local/lib/python3.7/site-packages/keras_preprocessing/image/iterator.py in _get_batches_of_transformed_samples(self, index_array) 225 target_size=self.target_size, 226 interpolation=self.interpolation) --> 227 x = img_to_array(img, data_format=self.data_format) 228 # Pillow images should be closed after `load_img`, 229 # but not PIL images. /usr/local/lib/python3.7/site-packages/keras_preprocessing/image/utils.py in img_to_array(img, data_format, dtype) 280 # or (channel, height, width) 281 # but original PIL image has format (width, height, channel) --> 282 x = np.asarray(img, dtype=dtype) 283 if len(x.shape) == 3: 284 if data_format == 'channels_first': /usr/local/lib/python3.7/site-packages/numpy/core/numeric.py in asarray(a, dtype, order) 536 537 """ --> 538 return array(a, dtype, copy=False, order=order) 539 540 TypeError: float() argument must be a string or a number, not 'PngImageFile'
Autoencoder in fastai
I'm trying to build an autoencoder with fast.ai version 1.0.52 and struggling with how to set labels to be equal to original images. I was following this blog post: https://alanbertl.com/autoencoder-with-fast-ai/ I replaced ImageItemList in the original code with ImageList since it was changed in the latest fastai versions. %reload_ext autoreload %autoreload 2 %matplotlib inline from fastai.imports import * from fastai.vision import * from fastai.data_block import * from fastai.basic_train import * import pandas as pd x = np.random.randint(256, size=(1000, 16384)) x = x/255 x = x.reshape(-1,128,128) x = np.stack([x,x,x],1) x.shape class ArraysImageList(ImageList,FloatList): def __init__(self, items:Iterator, log:bool=False, **kwargs): if isinstance(items, ItemList): items = items.items super(FloatList,self).__init__(items,**kwargs) def get(self,i): return Tensor(super(FloatList,self).get(i).astype('float32')) x_il = ArraysImageList(x) x_ils = x_il.split_by_rand_pct() lls = x_ils.label_from_lists(x_ils.train, x_ils.valid) Here's the error message I get. --------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-33-cbada9e18af9> in <module> ----> 1 lls = x_ils.label_from_lists(x_ils.train, x_ils.valid) ~/.local/lib/python3.6/site-packages/fastai/data_block.py in label_from_lists(self, train_labels, valid_labels, label_cls, **kwargs) 484 self.valid = self.valid._label_list(x=self.valid, y=self.train.y.new(valid_labels, **kwargs)) 485 self.__class__ = LabelLists --> 486 self.process() 487 return self 488 ~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self) 520 "Process the inner datasets." 521 xp,yp = self.get_processors() --> 522 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n) 523 #progress_bar clear the outputs so in some case warnings issued during processing disappear. 524 for ds in self.lists: ~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, xp, yp, name) 683 def process(self, xp:PreProcessor=None, yp:PreProcessor=None, name:str=None): 684 "Launch the processing on `self.x` and `self.y` with `xp` and `yp`." --> 685 self.y.process(yp) 686 if getattr(self.y, 'filter_missing_y', False): 687 filt = array([o is None for o in self.y.items]) ~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, processor) 73 if processor is not None: self.processor = processor 74 self.processor = listify(self.processor) ---> 75 for p in self.processor: p.process(self) 76 return self 77 ~/.local/lib/python3.6/site-packages/fastai/data_block.py in process(self, ds) 334 335 def process(self, ds): --> 336 if self.classes is None: self.create_classes(self.generate_classes(ds.items)) 337 ds.classes = self.classes 338 ds.c2i = self.c2i ~/.local/lib/python3.6/site-packages/fastai/data_block.py in generate_classes(self, items) 391 for c in items: classes = classes.union(set(c)) 392 classes = list(classes) --> 393 classes.sort() 394 return classes 395 RuntimeError: bool value of Tensor with more than one value is ambiguous Ultimately, I want to read images using a dataframe with image paths. So I also tried the following: import sklearn cv = sklearn.model_selection.GroupKFold(n_splits=5) train_inds, valid_inds = next(cv.split(iso_image_df.group, groups=iso_image_df.group)) img_lists = (ImageList.from_df(iso_image_df, resized_img_path, cols=0).split_by_idxs(train_inds, valid_inds)) src = img_lists.label_from_lists(img_lists.train, img_lists.valid) data = (src.databunch(bs = 32).normalize(imagenet_stats)) data.show_batch(rows=3, figsize=(10, 10)) Here I get the following error message: --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-146-2514de511e64> in <module> ----> 1 data.show_batch(rows=3, figsize=(10, 10)) ~/.local/lib/python3.6/site-packages/fastai/basic_data.py in show_batch(self, rows, ds_type, reverse, **kwargs) 190 #TODO: get rid of has_arg if possible 191 if has_arg(self.train_ds.y.reconstruct, 'x'): --> 192 ys = [self.train_ds.y.reconstruct(grab_idx(y, i), x=x) for i,x in enumerate(xs)] 193 else : ys = [self.train_ds.y.reconstruct(grab_idx(y, i)) for i in range(n_items)] 194 self.train_ds.x.show_xys(xs, ys, **kwargs) ~/.local/lib/python3.6/site-packages/fastai/basic_data.py in <listcomp>(.0) 190 #TODO: get rid of has_arg if possible 191 if has_arg(self.train_ds.y.reconstruct, 'x'): --> 192 ys = [self.train_ds.y.reconstruct(grab_idx(y, i), x=x) for i,x in enumerate(xs)] 193 else : ys = [self.train_ds.y.reconstruct(grab_idx(y, i)) for i in range(n_items)] 194 self.train_ds.x.show_xys(xs, ys, **kwargs) ~/.local/lib/python3.6/site-packages/fastai/data_block.py in reconstruct(self, t, x) 89 def reconstruct(self, t:Tensor, x:Tensor=None): 90 "Reconstruct one of the underlying item for its data `t`." ---> 91 return self[0].reconstruct(t,x) if has_arg(self[0].reconstruct, 'x') else self[0].reconstruct(t) 92 93 def new(self, items:Iterator, processor:PreProcessors=None, **kwargs)->'ItemList': AttributeError: 'Image' object has no attribute 'reconstruct' Any help is highly appreciated!
The lls are being used to create the databunch. I've looked at it and given the API change in fastai libs I created the databunch without using the lls that were causing the error: bs = 64 db = (ImageImageList.from_folder(mnist) .split_by_folder() .label_from_func(get_y_fn) .databunch(bs=bs,num_workers=4)) EDIT: you'll need the get_y_fn; it is very simply defined def get_y_fn(x): return x the lls aren't used for anything else anyway This should fix your problem, let me know if this worked for you.
TypeError: iteration over a 0-d array when trying to use TextLMDataBunch.from_csv in FastAI
The library expects utf-8. I tried to convert my us-ascii file to utf-8 using: iconv -f us-ascii -t utf-8 src.csv > target.csv When I did: file -I target.csv It still showed charset as us-ascii. Then I found out that us-ascii is a subset of utf-8 and that file will only guess the file type. However, if I take use src.csv as input to the TextLMDataBunch.from_csv() library, it works. If I do: cat src.csv > target.csv And then use target.csv as input to the same library, it doesn't work and gives the following error: TypeError Traceback (most recent call last) <ipython-input-118-44bc7147d2a4> in <module>() ----> 1 data_lm = TextLMDataBunch.from_csv(sample_p, 'voila.csv') /usr/local/lib/python3.6/dist-packages/fastai/text/data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, header, text_cols, label_cols, label_delim, **kwargs) 180 test_df = None if test is None else pd.read_csv(Path(path)/test, header=header) 181 return cls.from_df(path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, --> 182 label_cols, label_delim, **kwargs) 183 184 #classmethod /usr/local/lib/python3.6/dist-packages/fastai/text/data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, **kwargs) 165 src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor), 166 TextList.from_df(valid_df, path, cols=text_cols, processor=processor)) --> 167 src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes, sep=label_delim) 168 if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols)) 169 return src.databunch(**kwargs) /usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs) 356 assert isinstance(fv, Callable) 357 def _inner(*args, **kwargs): --> 358 self.train = ft(*args, **kwargs) 359 assert isinstance(self.train, LabelList) 360 self.valid = fv(*args, **kwargs) /usr/local/lib/python3.6/dist-packages/fastai/text/data.py in label_for_lm(self, **kwargs) 285 "A special labelling method for language models." 286 self.__class__ = LMTextList --> 287 return self.label_const(0, label_cls=LMLabel) 288 289 def reconstruct(self, t:Tensor): /usr/local/lib/python3.6/dist-packages/fastai/data_block.py in label_const(self, const, **kwargs) 211 def label_const(self, const:Any=0, **kwargs)->'LabelList': 212 "Label every item with `const`." --> 213 return self.label_from_func(func=lambda o: const, **kwargs) 214 215 def label_empty(self): /usr/local/lib/python3.6/dist-packages/fastai/data_block.py in label_from_func(self, func, **kwargs) 219 def label_from_func(self, func:Callable, **kwargs)->'LabelList': 220 "Apply `func` to every input to get its label." --> 221 return self.label_from_list([func(o) for o in self.items], **kwargs) 222 223 def label_from_folder(self, **kwargs)->'LabelList': TypeError: iteration over a 0-d array Can someone please tell me what is wrong? I am trying this on Google Colab and tried the character encoding changes on Colab and on my Mac but with no results.