How to use pillow library to access image files in sub-directories recursively? - python-imaging-library

I want to crop and resize multiple images in many sub-directories. The code works if the images are in the same directory, but fails to read from other directories.
I have tried using os.walk() module. It successfully iterate the files from all subdirectories, but the pillow's Image.open() function fails to access the images and thereby displaying error: "image.." not found.
import os
from PIL import Image
for dirpath, dirnames, files in os.walk('.'):
for filename in files:
t = filename.split(".")
ext = t[-1]
if ext in ["jpg"]:
print(filename)
coords = (500, 250, 810,720)
image_obj = Image.open(filename)
cropped_image = image_obj.crop(coords)
resized_image =cropped_image.resize([227,227])
# name = "./data2" + str(i) +".jpg"
resized_image.save("new" + filename)
I expect the code to recursively crop and resize the images in all the sub-directories. The following error occurred.
frame0.jpg
Traceback (most recent call last):
File "........./data2/cropitall.py", line 18, in <module>
image_obj = Image.open(filename) #path of image to be cropped
File "C:\Python36\lib\site-packages\PIL\Image.py", line 2652, in open
fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: 'frame0.jpg'
Process finished with exit code 1

To open the image you need the entire path to the file, not just file name.
Instead of
image_obj = Image.open(filename)
do
path = os.path.join(dirpath, filename)
image_obj = Image.open(path)

Related

Getting 'OSError: -2' while converting a tif image into jpg image using python

I'm trying to convert tiff images into jpg format and use it later in opencv. It is working fine in my local system but when I am executing it over linux server which is not connected to internet it is getting failed while saving the Image object as jpg format.
I'm using python3.8 and had installed all the libraries and its dependencies using wheel files over server using pip.
Here is the piece of code:
import PIL
import cv2
def face_detect(sourceImagepath1, processedFileName, imagename, pdfname):
temp_path = TEMP_PATH
processed_path = PROCESSED_PATH
misc_path = MISC_PATH
# cascade file path1
cascpath1 = misc_path + 'frontalface_cascade.xml'
# Create harr cascade
faceCascade = cv2.CascadeClassifier(cascpath1)
# Read image with PIL
image_pil = Image.open(sourceImagepath1)
# Save image in jpg format
image_pil.save(temp_path + processedFileName + '.jpg')
# Read image with opencv
image_cv = cv2.imread(temp_path + processedFileName + '.jpg')
# Convert image into grayscale
image_gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
# Detect faces in the image
face = faceCascade.detectMultiScale(
image_gray,
scaleFactor=1.3,
minNeighbors=5,
minSize=(30, 30)
# flags = cv2.CASCADE_SCALE_IMAGE
)
if len(face) > 0:
# Coordinates based on auto-face detection
x, y, w, h = face[0][0], face[0][1], face[0][2], face[0][3]
crop_image = image_pil.crop([x - 20, y - 30, x + w + 40, y + h + 60])
crop_image.save(processed_path + imagename)
# Save tif file as pdf
image_pil.save(processed_path + pdfname, save_all=True)
# Close image object
image_pil.close()
return len(face)
Here TEMP_PATH,PROCESSED_PATH,MISC_PATH are global variables of syntax like '/Users/user/Documents/Temp/'. I'm getting error on line:
image_pil.save(temp_path + processedFileName + '.jpg')
Below is the error i'm getting when executing the file
Traceback (most recent call last):
File "*path_from_root_directory*/PYTHON_SCRIPTS/Script/staging.py", line 363, in <module>
auto_face_count = face_detect(sourceImagepath1, processedFileName, imagename, pdfname)
File "*path_from_root_directory*/PYTHON_SCRIPTS/Script/staging.py", line 71, in greyScaleCheck
image_pil.save(temp_path + processedFileName + '.jpg')
File "*path_from_root_directory*/python3.8/site-packages/PIL/Image.py", line 2201, in save
self._ensure_mutable()
File "*path_from_root_directory*/python3.8/site-packages/PIL/Image.py", line 624, in _ensure_mutable
self._copy()
File "*path_from_root_directory*/python3.8/site-packages/PIL/Image.py", line 617, in _copy
self.load()
File "*path_from_root_directory*/python3.8/site-packages/PIL/TiffImagePlugin.py", line 1122, in load
return self._load_libtiff()
File "*path_from_root_directory*/python3.8/site-packages/PIL/TiffImagePlugin.py", line 1226, in _load_libtiff
raise OSError(err)
OSError: -2
I have provided full privileges to python directory and its sub-directories/files. Anyone have any idea why I'm getting this error ?

python AttributeError when attempting to save excel chart using PIL

I am trying to save an chart from excel into a file, which I want to use later in a powerpoint presentation, but the code I am running keeps on coming up with
"AttributeError: 'NoneType' object has no attribute 'save'" .
Have been looking around google/stackoverflow but none of the suggestions I can find actually help, I keep on getting the error.
The code I am trying is below,
import win32com.client
import PIL
folder_path = r'C:/temp/Monthly_Graphs.xlsm'
xlApp = win32com.client.DispatchEx('Excel.Application')
wb = xlApp.Workbooks.Open(folder_path)
xlApp = win32com.client.DispatchEx('Excel.Application')
wb = xlApp.Workbooks.Open(folder_path)
wb.Sheets('Sheet1').Shapes('Sheet1_Pie_Chart').CopyPicture()
pie_image = PIL.ImageGrab.grabclipboard()
pie_image.savefig(r'C:/temp/pie_test.bmp','BMP')
the traceback is below
Traceback (most recent call last):
File "<ipython-input-12-b8e52c17e4d1>", line 1, in <module>
runfile('C:/python/stackoverflow_1.py', wdir='C:/python')
File "C:\Users\xxxxxxx\AppData\Local\conda\conda\envs\py64bit\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\Users\xxxxxxx\AppData\Local\conda\conda\envs\py64bit\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/python/stackoverflow_1.py", line 26, in <module>
pie_image.savefig(r'C:/temp/pie_test.bmp','BMP')
AttributeError: 'NoneType' object has no attribute 'savefig'
Managed to get it to work by referring to the below Q and setting the format of the CopyPicture line. Issue seems to be that excel default copy of the image is not in a format that PIL understands
Python Export Excel Sheet Range as Image
import win32com.client
from PIL import ImageGrab
import win32clipboard as clip
folder_path = r'C:/temp/Monthly_Graphs.xlsm'
xlApp = win32com.client.DispatchEx('Excel.Application')
wb = xlApp.Workbooks.Open(folder_path)
xlApp = win32com.client.DispatchEx('Excel.Application')
wb = xlApp.Workbooks.Open(folder_path)
wb.Sheets('Sheet1').Shapes('Sheet1_Pie_Chart').CopyPicture(Format=clip.CF_BITMAP)
pie_image = ImageGrab.grabclipboard()
pie_image.save(r'C:/temp/pie_test.bmp','BMP')

.extractText() returns "invalid literal for decimal"

I'm coding something which will read PDFs online and return a set of keywords that are found in the document. However I keep running into a problem with the extractText() function from the PyPDF2 package.
Here's my code to open the PDFs and read it:
x = myurl.pdf
if ".pdf" in x:
remoteFile = urlopen(Request(x, headers={"User-Agent": "Magic-Browser"})).read()
memoryFile = StringIO(remoteFile)
pdfFile = PyPDF2.PdfFileReader(memoryFile, strict=False)
num_pages = pdfFile.numPages
count = 0
text = ""
while count < num_pages:
pageObj = pdfFile.getPage(count)
count += 1
text += pageObj.extractText()
The error that I keep running into on the extractText() line goes like this:
Traceback (most recent call last):
File "errortest.py", line 30, in <module>
text += pageObj.extractText()
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/pdf.py", line 2595, in extractText
content = ContentStream(content, self.pdf)
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/pdf.py", line 2674, in __init__
self.__parseContentStream(stream)
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/pdf.py", line 2706, in __parseContentStream
operands.append(readObject(stream, None))
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/generic.py", line 98, in readObject
return NumberObject.readFromStream(stream)
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/generic.py", line 271, in readFromStream
return FloatObject(num)
File "/anaconda2/lib/python2.7/site-packages/PyPDF2/generic.py", line 231, in __new__
return decimal.Decimal.__new__(cls, str(value))
File "/anaconda2/lib/python2.7/decimal.py", line 547, in __new__
"Invalid literal for Decimal: %r" % value)
File "/anaconda2/lib/python2.7/decimal.py", line 3872, in _raise_error
raise error(explanation)
decimal.InvalidOperation: Invalid literal for Decimal: '99.-72'
Would be great if someone could help me out! Thanks!
There is too little information to be certain, but PyPDF2 (and now pypdf) improved a lot in 2022. You will probably just need to upgrade to the latest version of pypdf.
If you encounter a bug in pypdf again, please open an issue: https://github.com/py-pdf/pypdf
A good bug ticket contains (1) your pypdf version (2) the code + PDF document that caused the issue.

PyPDF2.PdfFileReader hangs indefinitely

I'm trying to read this pdf file (https://www.accessdata.fda.gov/cdrh_docs/pdf14/K141693.pdf) and am following these suggestions from SO
Opening pdf urls with pyPdf
I have actually downloaded the file locally and am running the following code
import PyPDF2
pdf_file = open("K141693.pdf")
pdf_read = PyPDF2.PdfFileReader(pdf_file)
but my code hangs indefinitely. I'm running Python 2.7 and here is the stacktrace.
Traceback (most recent call last):
File "", line 1, in
runfile('C:/PoC/pdf_reader.py', wdir='C:/PoC')
File
"C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 880, in runfile
execfile(filename, namespace)
File
"C:\ProgramData\Anaconda2\lib\site-packages\spyder\utils\site\sitecustomize.py",
line 87, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "C:/PoC/pdf_reader.py", line 13, in
pdf_read = PyPDF2.PdfFileReader(pdf_file)
File "C:\ProgramData\Anaconda2\lib\site-packages\PyPDF2\pdf.py",
line 1084, in init
self.read(stream)
File "C:\ProgramData\Anaconda2\lib\site-packages\PyPDF2\pdf.py",
line 1697, in read
line = self.readNextEndLine(stream)
File "C:\ProgramData\Anaconda2\lib\site-packages\PyPDF2\pdf.py",
line 1938, in readNextEndLine
x = stream.read(1)
KeyboardInterrupt
I came across another post here PyPDF2 hangs on processing but that too doesn't have a response.
You need to parse the file in binary ('rb') mode. (This works in Python 3:)
import PyPDF2
pdf_file = open("K141693.pdf", "rb")
read_pdf = PyPDF2.PdfFileReader(pdf_file)

TensorFlow: use gfile.FastGfile() method can't not read a file with its path include Chinese characters

I want to read use gfile.FastGFile(image_path, 'rb').read() to read a picture and use it as the input of my project, and I use the directory name as the lable of these pictures which are include in the directory, when the directory name is in English, my code works fine, but when the directory name is in Chinese, it throws this Error:
Traceback (most recent call last):
File "F:/pythonWS/imageFilter/jpegFileJudge.py", line 27, in <module>
image_data = gfile.FastGFile(image_path, 'rb').read()
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\lib\io\file_io.py", line 106, in read
self._preread_check()
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\lib\io\file_io.py", line 73, in _preread_check
compat.as_bytes(self.__name), 1024 * 512, status)
File "C:\Program Files\Python35\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Program Files\Python35\lib\site-
packages\tensorflow\python\framework\errors_impl.py", line 466, in
raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.NotFoundError: NewRandomAccessFile
failed to Create/Open: F:\vsWorkspace\pics\test\三宝鸟
\0ff41bd5ad6eddc403fa02d13bdbb6fd526633fe.jpg :
ϵͳ\udcd5Ҳ\udcbb\udcb5\udcbdָ\udcb6\udca8\udcb5\udcc4\udcceļ\udcfe\udca1\udca3
my test code is :
# -*- coding: utf-8 -*-
import glob
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile
image_folder='F:/vsWorkspace/pics/test'
os.chdir(image_folder)
count=0
for each in os.listdir(image_folder):
each=os.path.abspath(each)
os.chdir(each)
for image_path in os.listdir(each):
image_path = os.path.abspath(image_path)
print(image_path)
image_data = gfile.FastGFile(image_path, 'rb').read()
count += 1
os.chdir(image_folder)
My envirorment is Windows7 x64, python 3.5.3 and TensorFlow 1.0, How can I solve this problem?
By the way,I have to use Chinese directories' name use my pictures lables.