PyPI packaging, namespace packages and subpackaging problems - setuptools

I've uploaded my personnal generic functions called ofunctions to github in order to share them between my projects, and having separate CI and coverage tests. Link to github project here.
So far so good, I have a package called ofunctions which has several subpacakges like ofunctions.network.
I want to be able to install subpackages without having to install the whole package, ie pip install ofunctions.network.
So I've created a single setup.py file that creates the necessary dist files to upload on PyPI.
My problem:
Whenever I use python setup.py sdist bdist_wheel, it generates the full ofunctions package and a package for each subpackage, but:
source packages like ofunctions.network-0.5.0.tar.gz only contain the subpackage (expected behavior)
wheel packages like ofunctions.network-0.5.0-py3-non-any.whl which contain the whole package (unexpected behavior)
The wheel packages contain the whole ofunctions library including all subpackages which obviously should only contain the same subpackage as the source dist files.
Can anybody have a look at my setup.py file and tell me why the sdist and wheel files don't contain strictly the same subpackages only ?
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of ofunctions package
"""
Namespace packaging here
# Make sure we declare an __init__.py file as namespace holder in the package root containing the following
try:
__import__('pkg_resources').declare_namespace(__name__)
except ImportError:
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
"""
import codecs
import os
import pkg_resources
import setuptools
def get_metadata(package_file):
"""
Read metadata from pacakge file
"""
def _read(_package_file):
here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, _package_file), 'r') as fp:
return fp.read()
_metadata = {}
for line in _read(package_file).splitlines():
if line.startswith('__version__'):
delim = '"' if '"' in line else "'"
_metadata['version'] = line.split(delim)[1]
if line.startswith('__description__'):
delim = '"' if '"' in line else "'"
_metadata['description'] = line.split(delim)[1]
return _metadata
def parse_requirements(filename):
"""
There is a parse_requirements function in pip but it keeps changing import path
Let's build a simple one
"""
try:
with open(filename, 'r') as requirements_txt:
install_requires = [
str(requirement)
for requirement
in pkg_resources.parse_requirements(requirements_txt)
]
return install_requires
except OSError:
print('WARNING: No requirements.txt file found as "{}". Please check path or create an empty one'
.format(filename))
def get_long_description(filename):
with open(filename, 'r', encoding='utf-8') as readme_file:
_long_description = readme_file.read()
return _long_description
# ######### ACTUAL SCRIPT ENTRY POINT
NAMESPACE_PACKAGE_NAME = 'ofunctions'
namespace_package_path = os.path.abspath(NAMESPACE_PACKAGE_NAME)
namespace_package_file = os.path.join(namespace_package_path, '__init__.py')
metadata = get_metadata(namespace_package_file)
requirements = parse_requirements(os.path.join(namespace_package_path, 'requirements.txt'))
# Generic namespace package
setuptools.setup(
name=NAMESPACE_PACKAGE_NAME,
namespace_packages=[NAMESPACE_PACKAGE_NAME],
packages=setuptools.find_namespace_packages(include=['ofunctions.*']),
version=metadata['version'],
install_requires=requirements,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Topic :: Software Development",
"Topic :: System",
"Topic :: System :: Operating System",
"Topic :: System :: Shells",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Operating System :: POSIX :: Linux",
"Operating System :: POSIX :: BSD :: FreeBSD",
"Operating System :: POSIX :: BSD :: NetBSD",
"Operating System :: POSIX :: BSD :: OpenBSD",
"Operating System :: Microsoft",
"Operating System :: Microsoft :: Windows",
"License :: OSI Approved :: BSD License",
],
description=metadata['description'],
author='NetInvent - Orsiris de Jong',
author_email='contact#netinvent.fr',
url='https://github.com/netinvent/ofunctions',
keywords=['network', 'bisection', 'logging'],
long_description=get_long_description('README.md'),
long_description_content_type="text/markdown",
python_requires='>=3.5',
# namespace packages don't work well with zipped eggs
# ref https://packaging.python.org/guides/packaging-namespace-packages/
zip_safe=False
)
for package in setuptools.find_namespace_packages(include=['ofunctions.*']):
package_path = os.path.abspath(package.replace('.', os.sep))
package_file = os.path.join(package_path, '__init__.py')
metadata = get_metadata(package_file)
requirements = parse_requirements(os.path.join(package_path, 'requirements.txt'))
print(package_path)
print(package_file)
print(metadata)
print(requirements)
setuptools.setup(
name=package,
namespace_packages=[NAMESPACE_PACKAGE_NAME],
packages=[package],
package_data={package: ['__init__.py']},
version=metadata['version'],
install_requires=requirements,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Topic :: Software Development",
"Topic :: System",
"Topic :: System :: Operating System",
"Topic :: System :: Shells",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Operating System :: POSIX :: Linux",
"Operating System :: POSIX :: BSD :: FreeBSD",
"Operating System :: POSIX :: BSD :: NetBSD",
"Operating System :: POSIX :: BSD :: OpenBSD",
"Operating System :: Microsoft",
"Operating System :: Microsoft :: Windows",
"License :: OSI Approved :: BSD License",
],
description=metadata['description'],
author='NetInvent - Orsiris de Jong',
author_email='contact#netinvent.fr',
url='https://github.com/netinvent/ofunctions',
keywords=['network', 'bisection', 'logging'],
long_description=get_long_description('README.md'),
long_description_content_type="text/markdown",
python_requires='>=3.5',
# namespace packages don't work well with zipped eggs
# ref https://packaging.python.org/guides/packaging-namespace-packages/
zip_safe=False
)
Thanks 8-|

Okay so I think I found the problem.
The build directory isn't cleaned between setuptools runs.
Worse, build directory is never cleaned unless you manually remove it, so old build files may endup in newer wheel package builds, even on single package builds I think.
I added a function clear_package_build_path() before running setuptools.setup() run that just cleans the build/lib/package dir.
Now my wheel files build with only the necessary files, no bloating anymore.
For instance here's the full working code:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of ofunctions package
"""
Namespace packaging here
# Make sure we declare an __init__.py file as namespace holder in the package root containing the following
try:
__import__('pkg_resources').declare_namespace(__name__)
except ImportError:
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
"""
import codecs
import os
import shutil
import pkg_resources
import setuptools
def get_metadata(package_file):
"""
Read metadata from package file
"""
def _read(_package_file):
here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, _package_file), 'r') as fp:
return fp.read()
_metadata = {}
for line in _read(package_file).splitlines():
if line.startswith('__version__'):
delim = '"' if '"' in line else "'"
_metadata['version'] = line.split(delim)[1]
if line.startswith('__description__'):
delim = '"' if '"' in line else "'"
_metadata['description'] = line.split(delim)[1]
return _metadata
def parse_requirements(filename):
"""
There is a parse_requirements function in pip but it keeps changing import path
Let's build a simple one
"""
try:
with open(filename, 'r') as requirements_txt:
install_requires = [
str(requirement)
for requirement
in pkg_resources.parse_requirements(requirements_txt)
]
return install_requires
except OSError:
print('WARNING: No requirements.txt file found as "{}". Please check path or create an empty one'
.format(filename))
def get_long_description(filename):
with open(filename, 'r', encoding='utf-8') as readme_file:
_long_description = readme_file.read()
return _long_description
def clear_package_build_path(package_rel_path):
"""
We need to clean build path, but setuptools will wait for build/lib/package_name so we need to create that
"""
build_path = os.path.abspath(os.path.join('build', 'lib', package_rel_path))
try:
# We need to use shutil.rmtree() instead of os.remove() since the latter implementation
# produces "WindowsError: [Error 5] Access is denied"
shutil.rmtree('build')
except FileNotFoundError:
print('build path: {} does not exist'.format(build_path))
# Now we need to create the 'build/lib/package/subpackage' path so setuptools won't fail
os.makedirs(build_path)
# ######### ACTUAL SCRIPT ENTRY POINT
NAMESPACE_PACKAGE_NAME = 'ofunctions'
namespace_package_path = os.path.abspath(NAMESPACE_PACKAGE_NAME)
namespace_package_file = os.path.join(namespace_package_path, '__init__.py')
metadata = get_metadata(namespace_package_file)
requirements = parse_requirements(os.path.join(namespace_package_path, 'requirements.txt'))
# First lets make sure build path is clean (avoiding namespace package pollution in subpackages)
# Clean build dir before every run so we don't make cumulative wheel files
clear_package_build_path(NAMESPACE_PACKAGE_NAME)
# Generic namespace package
setuptools.setup(
name=NAMESPACE_PACKAGE_NAME,
namespace_packages=[NAMESPACE_PACKAGE_NAME],
packages=setuptools.find_namespace_packages(include=['ofunctions.*']),
version=metadata['version'],
install_requires=requirements,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Topic :: Software Development",
"Topic :: System",
"Topic :: System :: Operating System",
"Topic :: System :: Shells",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Operating System :: POSIX :: Linux",
"Operating System :: POSIX :: BSD :: FreeBSD",
"Operating System :: POSIX :: BSD :: NetBSD",
"Operating System :: POSIX :: BSD :: OpenBSD",
"Operating System :: Microsoft",
"Operating System :: Microsoft :: Windows",
"License :: OSI Approved :: BSD License",
],
description=metadata['description'],
author='NetInvent - Orsiris de Jong',
author_email='contact#netinvent.fr',
url='https://github.com/netinvent/ofunctions',
keywords=['network', 'bisection', 'logging'],
long_description=get_long_description('README.md'),
long_description_content_type="text/markdown",
python_requires='>=3.5',
# namespace packages don't work well with zipped eggs
# ref https://packaging.python.org/guides/packaging-namespace-packages/
zip_safe=False
)
for package in setuptools.find_namespace_packages(include=['ofunctions.*']):
rel_package_path = package.replace('.', os.sep)
package_path = os.path.abspath(rel_package_path)
package_file = os.path.join(package_path, '__init__.py')
metadata = get_metadata(package_file)
requirements = parse_requirements(os.path.join(package_path, 'requirements.txt'))
print(package_path)
print(package_file)
print(metadata)
print(requirements)
# Again, we need to clean build paths between runs
clear_package_build_path(rel_package_path)
setuptools.setup(
name=package,
namespace_packages=[NAMESPACE_PACKAGE_NAME],
packages=[package],
package_data={package: ['__init__.py']},
version=metadata['version'],
install_requires=requirements,
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Topic :: Software Development",
"Topic :: System",
"Topic :: System :: Operating System",
"Topic :: System :: Shells",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Operating System :: POSIX :: Linux",
"Operating System :: POSIX :: BSD :: FreeBSD",
"Operating System :: POSIX :: BSD :: NetBSD",
"Operating System :: POSIX :: BSD :: OpenBSD",
"Operating System :: Microsoft",
"Operating System :: Microsoft :: Windows",
"License :: OSI Approved :: BSD License",
],
description=metadata['description'],
author='NetInvent - Orsiris de Jong',
author_email='contact#netinvent.fr',
url='https://github.com/netinvent/ofunctions',
keywords=['network', 'bisection', 'logging'],
long_description=get_long_description('README.md'),
long_description_content_type="text/markdown",
python_requires='>=3.5',
# namespace packages don't work well with zipped eggs
# ref https://packaging.python.org/guides/packaging-namespace-packages/
zip_safe=False
)
As a side node, I noticed that os.remove() will fail with WindowsError: [Error 5] Access is denied from time to time because os.remove() waits for all handles to be closed, which can take time because of garbage collector (AFAIK). Using shutil.rmtree() does work in any case.

Related

pytest - mockup a complex module import

I have found several posts on how to "hide" a package and simulate an ImportError with pytest, however, I haven't succeeded in my case and I am looking for some help:
Test for import of optional dependencies in __init__.py with pytest: Python 3.5 /3.6 differs in behaviour
Test behavior of code if optional module is not installed
and related
Here is the content of an __about__.py file that I want to test with pytest.
"""Get the metadata from the package or from setup.py."""
try:
import importlib
metadata = importlib.metadata
except ImportError:
import importlib_metadata as metadata
try:
data = metadata.metadata("mypackage")
__version__ = data["Version"]
__author__ = data["Author"]
__name__ = data["Name"]
except metadata.PackageNotFoundError:
# The repo of the package is accessible to python to get at least the version
import re
from pathlib import Path
try:
from nested_grid_plotter import __file__ as loc
with open(Path(loc).parent.joinpath("../setup.py"), "r") as f:
data = f.read()
except FileNotFoundError:
data = ""
def version_parser(v):
"""Parse the version from the setup file."""
version_pattern = (
r"""(version\s*=\s*)["|'](\d+(=?\.(\d+(=?\.(\d+)*)*)*)*)["|']"""
)
regex_matcher = re.compile(version_pattern).search(v)
if regex_matcher is None:
return "unknwon"
return regex_matcher.group(2)
try:
__version__ = version_parser(data)
except Exception:
__version__ = "unknown"
__author__ = "unknown"
__name__ = "unknown"
Here is the __init__.py at the root of the package:
from .__about__ import __version__, __name__, __author__
And here is the tests that I have come up with until now. However, I am not able to hide importlib.
"""Test the file __about__.py."""
import pytest
import sys
class PackageDiscarder:
def __init__(self):
self.pkgnames = []
def find_spec(self, fullname, path, target=None):
if fullname in self.pkgnames:
raise ImportError()
#pytest.fixture
def no_requests():
sys.modules.pop("importlib", None)
d = PackageDiscarder()
d.pkgnames.append("importlib")
sys.meta_path.insert(0, d)
yield
sys.meta_path.remove(d)
#pytest.fixture(autouse=True)
def cleanup_imports():
yield
sys.modules.pop("mypackage", None)
def test_requests_available():
import mypackage
assert mypackage.__version__ != "unknwon"
#pytest.mark.usefixtures("no_requests")
def test_requests_missing():
import mypackage
assert mypackage.__version__ != "unknwon"
Here is the coverage report:
Name Stmts Miss Cover Missing
----------------------------------------------------------------
mypackage/__about__.py 31 10 68% 5-6, 10-12, 23-24, 33, 38-39
----------------------------------------------------------------
TOTAL 31 10 68%

Autofix directory structure based on package in scala

I have a file src/main/scala/foo.scala which needs to be inside package bar. Ideally the file should be inside src/main/scala/bar/foo.scala.
// src/main/scala/foo.scala
package bar
// ...
How can I auto-fix this issue throughout my project such that the folder structure matches the package structure?
Is there any SBT plugin etc that can help me fix this issue?
As far as I am aware there are not such tools, though AFAIR IntelliJ can warn about package-directory mismatch.
Best I can think if is custom scalafix (https://scalacenter.github.io/scalafix/) rule - scalafix/scalameta would be used to check file's actual package, translate it to an expected directory and if they differ, move file.
I suggest scalafix/scalameta because there are corner cases like:
you are allowed to write your packages like:
package a
package b
package c
and it almost like package a.b.c except that it automatically imports everything from a and b
you can have package object in your file and then if you have
package a.b
package object c
this file should be in a/b/c directory
so I would prefer to check if file didn't fall under any of those using some existing tooling.
If you are certain that you don't have such cases (I wouldn't without checking) you could:
match the first line with regexp (^package (.*))
translate a.b.c into a/b/c (matched.split('.').map(_.trim).mkString(File.separator))
compare generated location to an actual location ( I suggest resolving absolute file locations)
move file if necessary
If there is a possibility of having more complex case than that, I could replace first step by querying scalafix/scalameta utilities.
Here is an sbt plugin providing packageStructureToDirectoryStructure task that reads package statements from source files, creates corresponding directories, and then moves files to them
import sbt._
import sbt.Keys._
import better.files._
object PackagesToDirectories extends AutoPlugin {
object autoImport {
val packageStructureToDirectoryStructure = taskKey[Unit]("Make directory structure match package structure")
}
import autoImport._
override def trigger = allRequirements
override lazy val projectSettings = Seq(
packageStructureToDirectoryStructure := {
val log = streams.value.log
log.info(s"Refactoring directory structure to match package structure...")
val sourceFiles = (Compile / sources).value
val sourceBase = (Compile / scalaSource).value
def packageStructure(lines: Traversable[String]): String = {
val packageObjectRegex = """package object\s(.+)\s\{""".r
val packageNestingRegex = """package\s(.+)\s\{""".r
val packageRegex = """package\s(.+)""".r
lines
.collect {
case packageObjectRegex(name) => name
case packageNestingRegex(name) => name
case packageRegex(name) => name
}
.flatMap(_.split('.'))
.mkString("/")
}
sourceFiles.foreach { sourceFile =>
val packagePath = packageStructure(sourceFile.toScala.lines)
val destination = file"$sourceBase/$packagePath"
destination.createDirectoryIfNotExists(createParents = true)
val result = sourceFile.toScala.moveToDirectory(destination)
log.info(s"$sourceFile moved to $result")
}
}
)
}
WARNING: Make sure to backup the project before running it.

scala fastparse typechecking

I am puzzled by why the following code using scala fastparse 0.4.3 fails typechecking.
val White = WhitespaceApi.Wrapper{
import fastparse.all._
NoTrace(CharIn(" \t\n").rep)
}
import fastparse.noApi._
import White._
case class Term(tokens: Seq[String])
case class Terms(terms: Seq[Term])
val token = P[String] ( CharIn('a' to 'z', 'A' to 'Z', '0' to '9').rep(min=1).!)
val term: P[Term] = P("[" ~ token.!.rep(sep=" ", min=1) ~ "]").map(x => Term(x))
val terms = P("(" ~ term.!.rep(sep=" ", min=1) ~ ")").map{x => Terms(x)}
val parse = terms.parse("([ab bd ef] [xy wa dd] [jk mn op])")
The error messages:
[error] .../MyParser.scala: type mismatch;
[error] found : Seq[String]
[error] required: Seq[Term]
[error] val terms = P("(" ~ term.!.rep(sep=" ", min=1) ~")").map{x => Terms(x)}
[error] ^
I would imagine that since term is of type Term and since the terms pattern uses term.!.rep(..., it should get a Seq[Term].
I figured it out. My mistake was capturing (with !) redundantly in terms. That line should instead be written:
val terms = P("(" ~ term.rep(sep=" ", min=1) ~ ")").map{x => Terms(x)}
Notice that term.!.rep( has been rewritten to term.rep(.
Apparently capturing in any rule will return the text that the captured subrule matches overriding what the subrule actually returns. I guess this is a feature when used correctly. :)

How can I upload file from string in playframework?

PlayFramework docs shows that it is easy to upload a file.
https://www.playframework.com/documentation/2.5.x/ScalaWS
ws.url(url).post(Source(FilePart("hello", "hello.txt", Option("text/plain"), FileIO.fromFile(tmpFile)) :: DataPart("key", "value") :: List()))
But what if the file content is already in memory? Any alternative method for FileIO.fromFile, such as FileIO.fromString(jsontStr)?
val jsonStr = """{ foo: "Bar"} """
ws.url(url).post(Source(FilePart("hello", "hello.json", Option("application/json"), FileIO.fromString(jsonStr)) :: DataPart("key", "value") :: List()))
All you need is a FilePart that has a Source[ByteString] as ref.
Just use
Source.single(ByteString(jsonStr))
as the ref part.

Why do I get a java.nio.BufferUnderflowException in this Scala

I was trying to do some scripting in Scala, to process some log files:
scala> import io.Source
import io.Source
scala> import java.io.File
import java.io.File
scala> val f = new File(".")
f: java.io.File = .
scala> for (l <- f.listFiles) {
| val src = Source.fromFile(l).getLines
| println( (0 /: src) { (i, line) => i + 1 } )
| }
3658
java.nio.BufferUnderflowException
at java.nio.Buffer.nextGetIndex(Unknown Source)
at java.nio.HeapCharBuffer.get(Unknown Source)
at scala.io.BufferedSource$$anon$2.next(BufferedSource.scala:86)
at scala.io.BufferedSource$$anon$2.next(BufferedSource.scala:74)
at scala.io.Source$$anon$6.next(Source.scala:307)
at scala.io.Source$$anon$6.next(Source.scala:301)
at scala.Iterator$cla...
Why do I get this java.nio.BufferUnderflowException?
NOTE - I'm processing 10 log files, each about 1MB in size
I got BufferUnderflowException exception when I opened a file with the wrong enconding. It contained illegal characters (according to the wrong encoding) and this misleading exception was thrown.
I'd also be interested as to exactly why this is happening but I'd guess it's to do with the fact that Source is an object (i.e. a singleton) and how it is gets transparently reset. You can fix the problem as follows:
for (l <- g.listFiles if !l.isDirectory) {
| val src = Source.fromFile(l)
| println( (0 /: src.getLines) { (i, line) => i + 1 } )
| src.reset
| }
The important bit is the reset - which should probably be in a try-finally block (although the isDirectory test is probably useful too)
This is essentially a restatement of Elazar's answer, but you will also get this exception if you try to read a binary file using scala.io.Source.fromFile.
I just ran into this (accidentally trying to read a .jpg with fromFile) due to a very stupid bug in something I wrote...