Package built by Poetry is missing runtime dependencies - python-packaging
I've been working on a project which so far has just involved building some cloud infrastructure, and now I'm trying to add a CLI to simplify running some AWS Lambdas. Unfortunately both the sdist and wheel packages built using poetry build don't seem to include the dependencies, so I have to manually pip install all of them to run the command. Basically I
run poetry build in the project,
cd "$(mktemp --directory)",
python -m venv .venv,
. .venv/bin/activate,
pip install /path/to/result/of/poetry/build/above, and then
run the new .venv/bin/ executable.
At this point the executable fails, because pip did not install any of the package dependencies. If I pip show PACKAGE the Requires line is empty.
The Poetry manual doesn't seem to specify how to link dependencies to the built package, so what do I have to do instead?
I am using some optional dependencies, could that be interfering with the build process? To be clear, even non-optional dependencies do not show up in the package dependencies.
pyproject.toml:
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 100
[tool.coverage.report]
exclude_lines = [
'if TYPE_CHECKING:',
'if __name__ == "__main__":',
'pragma: no cover',
]
fail_under = 100
[tool.coverage.run]
branch = true
omit = [
".venv/*",
]
[tool.isort]
case_sensitive = true
line_length = 100
profile = "black"
[tool.mypy]
show_error_codes = true
strict = true
[[tool.mypy.overrides]]
module = [
"jsonschema",
"jsonschema._utils",
"jsonschema.validators",
"multihash",
"pystac",
"pystac.layout",
"pytest_subtests",
"smart_open",
"linz_logger"
]
ignore_missing_imports = true
[tool.poetry]
name = "geostore"
version = "0.1.0"
description = "Central storage, management and access for important geospatial datasets developed by LINZ"
authors = [
"Bill M. Nelson <bmnelson#linz.govt.nz>",
"Daniel Silk <dsilk#linz.govt.nz>",
"Ivan Mincik <ivan.mincik#gmail.com>",
"Mitchell Paff <mpaff#linz.govt.nz>",
"Sandro Santilli <strk#kbt.io>",
"Simon Planzer <splanzer#linz.govt.nz>",
"Victor Engmark <vengmark#linz.govt.nz>",
]
license = "MIT"
readme = "README.md"
homepage = "https://github.com/linz/geostore"
repository = "https://github.com/linz/geostore"
keywords = [
"SpatioTemporal Asset Catalog (STAC)",
"Toitū Te Whenua Land Information New Zealand",
]
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Framework :: AWS CDK",
"Framework :: Pytest",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: POSIX",
"Programming Language :: Python :: 3.8",
"Topic :: Communications :: File Sharing",
"Topic :: Scientific/Engineering :: GIS",
"Topic :: Utilities",
"Typing :: Typed",
]
[tool.poetry.dependencies]
python = "^3.8"
"aws-cdk.aws-dynamodb" = {version = "*", optional = true}
"aws-cdk.aws-ec2" = {version = "*", optional = true}
"aws-cdk.aws-ecr" = {version = "*", optional = true}
"aws-cdk.aws-ecr_assets" = {version = "*", optional = true}
"aws-cdk.aws-ecs" = {version = "*", optional = true}
"aws-cdk.aws-events" = {version = "*", optional = true}
"aws-cdk.aws-events-targets" = {version = "*", optional = true}
"aws-cdk.aws-iam" = {version = "*", optional = true}
"aws-cdk.aws-lambda" = {version = "*", optional = true}
"aws-cdk.aws-lambda-event-sources" = {version = "*", optional = true}
"aws-cdk.aws-lambda-python" = {version = "*", optional = true}
"aws-cdk.aws-s3" = {version = "*", optional = true}
"aws-cdk.aws-sns" = {version = "*", optional = true}
"aws-cdk.aws-stepfunctions" = {version = "*", optional = true}
"aws-cdk.aws-stepfunctions_tasks" = {version = "*", optional = true}
awscli = {version = "*", optional = true}
boto3 = "*"
cattrs = {version = "*", optional = true}
jsonschema = {version = "*", extras = ["format"], optional = true}
multihash = {version = "*", optional = true}
pynamodb = {version = "*", optional = true}
pystac = {version = "*", optional = true}
slack-sdk = {version = "*", extras = ["models", "webhook"], optional = true}
smart-open = {version = "*", extras = ["s3"], optional = true}
strict-rfc3339 = {optional = true, version = "*"}
typer = "*"
ulid-py = {version = "*", optional = true}
linz-logger = {version = "*", optional = true}
[tool.poetry.dev-dependencies]
black = "*"
boto3-stubs = {version = "*", extras = ["batch", "dynamodb", "events", "lambda", "lambda-python", "s3", "s3control", "sns", "sqs", "ssm", "stepfunctions", "sts"]}
gitlint = "*"
ipdb = "*"
isort = "*"
language-formatters-pre-commit-hooks = "*"
mutmut = "*"
mypy = "*"
pre-commit = "*"
pylint = "*"
pytest = "*"
pytest-randomly = "*"
pytest-socket = "*"
pytest-subtests = "*"
pytest-timeout = "*"
types-pkg-resources = "*"
types-python-dateutil = "*"
types-requests = "*"
types-six = "*"
types-toml = "*"
[tool.poetry.dev-dependencies.coverage]
version = "*"
extras = ["toml"]
[tool.poetry.extras]
cdk = [
"aws-cdk.aws-dynamodb",
"aws-cdk.aws-ec2",
"aws-cdk.aws-ecr",
"aws-cdk.aws-ecr_assets",
"aws-cdk.aws-ecs",
"aws-cdk.aws-events",
"aws-cdk.aws-events-targets",
"aws-cdk.aws-iam",
"aws-cdk.aws-lambda",
"aws-cdk.aws-lambda-event-sources",
"aws-cdk.aws-lambda-python",
"aws-cdk.aws-s3",
"aws-cdk.aws-sns",
"aws-cdk.aws-stepfunctions",
"aws-cdk.aws-stepfunctions_tasks",
"awscli",
"cattrs",
]
check_files_checksums = [
"boto3",
"linz-logger",
"multihash",
"pynamodb",
]
check_stac_metadata = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"strict-rfc3339",
]
cli = [
"boto3",
"typer",
]
content_iterator = [
"jsonschema",
"linz-logger",
"pynamodb",
]
datasets = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"pystac",
"ulid-py",
]
dataset_versions = [
"jsonschema",
"linz-logger",
"pynamodb",
"ulid-py",
]
import_asset_file = [
"boto3",
"linz-logger",
"smart-open",
]
import_dataset = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"smart-open",
"ulid-py",
]
import_metadata_file = [
"boto3",
"linz-logger",
]
import_status = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
]
notify_status_update = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"slack-sdk"
]
populate_catalog = [
"boto3",
"jsonschema",
"linz-logger",
"pystac",
]
update_dataset_catalog = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"ulid-py"
]
upload_status = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
]
validation_summary = [
"jsonschema",
"linz-logger",
"pynamodb",
]
[tool.poetry.scripts]
geostore = "geostore.cli:app"
[tool.pylint.MASTER]
disable = [
"duplicate-code",
"missing-class-docstring",
"missing-function-docstring",
"missing-module-docstring",
]
load-plugins = [
"pylint.extensions.mccabe",
]
max-complexity = 6
[tool.pytest.ini_options]
addopts = "--randomly-dont-reset-seed"
markers = [
"infrastructure: requires a deployed infrastructure",
]
python_functions = "should_*"
testpaths = [
"tests"
]
As you can see the boto3 and typer runtime dependencies are not optional, so I'd expect to see them in poetry show geostore.
This appears to be a bug in Poetry. Or at least it's not clear from the documentation what the expected behavior would be in a case such as yours.
In your pyproject.toml, you specify two dependencies as required in this section:
[tool.poetry.dependencies]
…
awscli = {version = "*", optional = true}
boto3 = "*"
…
typer = "*"
…
So, as opposed to awscli among many others, boto3 and typer should be required because the optional attribute is not set and defaults to false. But you also list the two required dependencies as "extras" in this section:
[tool.poetry.extras]
…
cli = [
"boto3",
"typer",
]
…
Poetry takes that to mean that they are in fact optional, not required. Which makes sense, in a way, because extras are effectively optional. If you inspect the .whl wheel file built by Poetry (it's just a zip archive), specifically the METADATA file in it (which is what Pip refers to when installing the package), then it contains this line:
Requires-Dist: typer; extra == "cli"
So that dependency is in fact optional: It will only get installed if users ask for it explicitly with pip install geostore[cli].
The solution then is simple: Remove all references to the required dependencies from the extras section. They are not needed there anyway.
The Poetry documentation is in fact not very clear on what optional really signifies. That attribute is (currently) only briefly mentioned in the section on the pyproject.toml file. One could also argue that if optional is false, then the extras section should not override that value.
Related
Best way to create several complex resources of the same type with terraform variables
I am converting existing kubernetes infrastructure to terraform. I ran a terraform import on the kubernetes cluster that I wanted to convert to terraform. Now that I have the infrastructure terraform code I'm trying to make it reusable. My organization has several clusters and they all have different node pools. I'm working on creating the variables.tf file and I am unsure of the best method to do this. I want to make it so any number of node_pools with specific variables can be created. Ideally I don't want to have to utilize different files/ variables for each node pool i create. Is there a way to place 6 different node_pools into variables without creating inidividual variables for each node pool and resource for the node pool? For simpler objects I could see count being a viable solution but this might be too complicated. Below are 2 of the 6 node pools I am working with. node_pool { initial_node_count = 12 # instance_group_urls = [] max_pods_per_node = 16 name = "test-pool" node_count = 12 node_locations = [ "us-central1-b", "us-central1-c", "us-central1-f", ] version = "1.21.14-gke.700" management { auto_repair = true auto_upgrade = true } node_config { disk_size_gb = 50 disk_type = "pd-standard" guest_accelerator = [] image_type = "COS_CONTAINERD" labels = { "integrationtestnode" = "true" } local_ssd_count = 0 machine_type = "n1-standard-2" metadata = { "disable-legacy-endpoints" = "true" } oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform", ] preemptible = false service_account = "svcs-dev#megacorp-dev-project.iam.gserviceaccount.com" spot = false tags = [] taint = [ { effect = "NO_SCHEDULE" key = "integrationtest" value = "true" }, ] shielded_instance_config { enable_integrity_monitoring = true enable_secure_boot = true } } upgrade_settings { max_surge = 1 max_unavailable = 0 } } node_pool { initial_node_count = 1 max_pods_per_node = 110 name = "promop-n2s8" node_count = 1 node_locations = [ "us-central1-b", "us-central1-c", "us-central1-f", ] version = "1.21.13-gke.900" management { auto_repair = true auto_upgrade = true } node_config { disk_size_gb = 100 disk_type = "pd-standard" guest_accelerator = [] image_type = "COS_CONTAINERD" labels = { "megacorp.reserved" = "promop-dev" } local_ssd_count = 0 machine_type = "n2-standard-8" metadata = { "disable-legacy-endpoints" = "true" } oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform", ] preemptible = false service_account = "svcs-dev#megacorp-dev-project.iam.gserviceaccount.com" spot = false tags = [] taint = [ { effect = "NO_SCHEDULE" key = "app" value = "prometheus-operator-dev" }, ] shielded_instance_config { enable_integrity_monitoring = true enable_secure_boot = false } workload_metadata_config { mode = "GKE_METADATA" } } upgrade_settings { max_surge = 2 max_unavailable = 0 } } ... ```
Dynamic creation of kubernetes manifest in Terraform
I'm trying to create multiple K8s manifests based on VPC subnets as the following code suggests: resource "aws_subnet" "pod_subnets" { for_each = module.pods_subnet_addrs.network_cidr_blocks depends_on = [ aws_vpc_ipv4_cidr_block_association.pod_cidr ] vpc_id = data.terraform_remote_state.vpc.outputs.vpc_id availability_zone = each.key cidr_block = each.value tags = merge( local.common_tags, { "Name" = format( "${var.environment_name}-pods-network-%s", each.key) } ) } resource "kubernetes_manifest" "ENIconfig" { for_each = module.pods_subnet_addrs.network_cidr_blocks manifest = { "apiVersion" = "crd.k8s.amazonaws.com/v1alpha1" "kind" = "ENIConfig" "metadata" = { "name" = each.key } "spec" = { "securityGroups" = [ aws_security_group.worker_node.id, ] "subnet" = aws_subnet.pod_subnets[each.key].id } } } However, when I'm running Terraform I'm getting the following error: Provider "registry.terraform.io/hashicorp/kubernetes" planned an invalid value for kubernetes_manifest.ENIconfig["eu-west-3a"].manifest: planned value cty.ObjectVal(map[string]cty.Value{"apiVersion":cty.StringVal("crd.k8s.amazonaws.com/v1alpha1"), "kind":cty.StringVal("ENIConfig"),"metadata":cty.ObjectVal(map[string]cty.Value{"name":cty.StringVal("eu-west-3a")}), "spec":cty.ObjectVal(map[string]cty.Value{"securityGroups":cty.TupleVal([]cty.Value{cty.StringVal("sg-07e264400925e9a4a")}),"subnet":cty.NullVal(cty.String)})}) does not match config value cty.ObjectVal(map[string]cty.Value{"apiVersion":cty.StringVal("crd.k8s.amazonaws.com/v1alpha1"),"kind":cty.StringVal("ENIConfig"),"metadata":cty.ObjectVal(map[string]cty.Value{"name":cty.StringVal("eu-west-3a")}), "spec":cty.ObjectVal(map[string]cty.Value{"securityGroups":cty.TupleVal([]cty.Value{cty.StringVal("sg-07e264400925e9a4a")}),"subnet":cty.UnknownVal(cty.String)})}). Any idea what I'm doing wrong here?
Turns out that kubernetes_manifest cannot be rendered with values that have not been created first. Only static values that can populate the resource are working.
Azure Terraform function app deployment issue
I hope somebody can help me with this issue because I don't understand what am I doing wrong. I am trying to build an azure function app and deploy a zip package (timer trigger) to it. I have set this code. resource "azurerm_resource_group" "function-rg" { location = "westeurope" name = "resource-group" } data "azurerm_storage_account_sas" "sas" { connection_string = azurerm_storage_account.sthriprdeurcsvtoscim.primary_connection_string https_only = true start = "2021-01-01" expiry = "2023-12-31" resource_types { object = true container = false service = false } services { blob = true queue = false table = false file = false } permissions { read = true write = false delete = false list = false add = false create = false update = false process = false } } resource "azurerm_app_service_plan" "ASP-rg-hri-prd-scim" { location = azurerm_resource_group.function-rg.location name = "ASP-rghriprdeurcsvtoscim" resource_group_name = azurerm_resource_group.function-rg.name kind = "functionapp" maximum_elastic_worker_count = 1 per_site_scaling = false reserved = false sku { capacity = 0 size = "Y1" tier = "Dynamic" } } resource "azurerm_storage_container" "deployments" { name = "function-releases" storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name container_access_type = "private" } resource "azurerm_storage_blob" "appcode" { name = "functionapp.zip" storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name storage_container_name = azurerm_storage_container.deployments.name type = "Block" source = "./functionapp.zip" } resource "azurerm_function_app" "func-hri-prd-eur-csv-to-scim" { storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name storage_account_access_key = azurerm_storage_account.sthriprdeurcsvtoscim.primary_access_key app_service_plan_id = azurerm_app_service_plan.ASP-rg-hri-prd-scim.id location = azurerm_resource_group.function-rg.location name = "func-hri-prd-csv-to-scim" resource_group_name = azurerm_resource_group.function-rg.name app_settings = { "WEBSITE_RUN_FROM_PACKAGE" = "https://${azurerm_storage_account.sthriprdeurcsvtoscim.name}.blob.core.windows.net/${azurerm_storage_container.deployments.name}/${azurerm_storage_blob.appcode.name}${data.azurerm_storage_account_sas.sas.sas}" "FUNCTIONS_EXTENSION_VERSION" = "~3" "FUNCTIONS_WORKER_RUNTIME" = "dotnet" } enabled = true identity { type = "SystemAssigned" } version = "~3" enable_builtin_logging = false } resource "azurerm_storage_account" "sthriprdeurcsvtoscim" { account_kind = "Storage" account_replication_type = "LRS" account_tier = "Standard" allow_blob_public_access = false enable_https_traffic_only = true is_hns_enabled = false location = azurerm_resource_group.function-rg.location name = "sthriprdeurcsvtoscim" resource_group_name = azurerm_resource_group.function-rg.name } Goes without saying that terraform apply work without any error. The configurations of the function app are correct and points to the right storage account. The storage account has a container with the zip file containing my azure function code. But when I go to the function app -> Functions, I don't see any function there. Can please somebody help me to understand what am I doing wrong in this? The Function app is a .net3 function
When you create a function app, it isn’t set up for Functions + Terraform. It’s set up for a Visual Code + Functions deployment. We need to adjust both the package.json so that it will produce the ZIP file for us, and the .gitignore so that it ignores the Terraform build files. I use a bunch of helper NPM packages: azure-functions-core-tools for the func command. #ffflorian/jszip-cli to ZIP my files up. mkdirp for creating directories. npm-run-all and particularly the run-s command for executing things in order. rimraf for deleting things. Below is the code how package.json looks like { "name": "backend", "version": "1.0.0", "description": "", "scripts": { "func": "func", "clean": "rimraf build", "build:compile": "tsc", "build:prune": "npm prune --production", "prebuild:zip": "mkdirp --mode=0700 build", "build:zip": "jszip-cli", "build": "run-s clean build:compile build:zip", "predeploy": "npm run build", "deploy": "terraform apply" }, "dependencies": { }, "devDependencies": { "azure-functions-core-tools": "^2.7.1724", "#azure/functions": "^1.0.3", "#ffflorian/jszip-cli": "^3.0.2", "mkdirp": "^0.5.1", "npm-run-all": "^4.1.5", "rimraf": "^3.0.0", "typescript": "^3.3.3" } } npm run build will build the ZIP file. npm run deploy will build the ZIP file and deploy it to Azure. For complete information check Azure Function app with Terraform.
How to reuse deps for different tests in Bazel?
I am using Bazel to compile scala. Right now, my scala_test looks like scala_test { name = "sample", srcs = [ "a.scala", "b.scala", "c.scala", "d.scala", ], deps = [ "//src//main/scala/.../dep1", "//src//main/scala/.../dep2", "//src//main/scala/.../dep3", "//src//main/scala/.../dep4", ] } In this case, Bazel does not support parallelization on these srcs as they are grouped as one scala_test. To enable automatic parallel testing, I would like to separate srcs into different scala_test like scala_test { name = "sample1", srcs = [ "a.scala", ], deps = [ "//src//main/scala/.../dep1", "//src//main/scala/.../dep2", "//src//main/scala/.../dep3", "//src//main/scala/.../dep4", ] } scala_test { name = "sample2", srcs = [ "b.scala", ], deps = [ "//src//main/scala/.../dep1", "//src//main/scala/.../dep2", "//src//main/scala/.../dep3", "//src//main/scala/.../dep4", ] } ... The problem is I guess bazel tries to compile the deps for every scala_test. Is there any way to group dependencies and reuse them in different scala_test blocks such as scala_library?
Sorry, I think Bazel caches the dependencies so I don't have to worry about compiling them again when running all tests.
kamon stastsd not sending metrics when i run my scala application as a docker container
When I run scala application using 'sbt run' command it is sending kamon metrics to graphite/grafana container. Then I created a docker image for my scala application and running it as a docker container. Now it is not sending metrics to graphite/grafana container. Both my application container and graphite/grafana container are running under same docker network. The command I used to run grafana image is: docker run --network smart -d -p 80:80 -p 81:81 -p 2003:2003 -p 8125:8125/udp -p 8126:8126 8399049ce731 kamon configuration in application.conf is kamon { auto-start=true metric { tick-interval = 1 seconds filters { akka-actor { includes = ["*/user/*"] excludes = [ "*/system/**", "*/user/IO-**", "**/kamon/**" ] } akka-router { includes = ["*/user/*"] excludes = [ "*/system/**", "*/user/IO-**", "**/kamon/**" ] } akka-dispatcher { includes = ["*/user/*"] excludes = [ "*/system/**", "*/user/IO-**", "*kamon*", "*/kamon/*", "**/kamon/**" ] } trace { includes = [ "**" ] excludes = [ ]enter code here } } } # needed for "[error] Exception in thread "main" java.lang.ClassNotFoundException: local" internal-config { akka.actor.provider = "akka.actor.LocalActorRefProvider" } statsd { hostname = "127.0.0.1" port = 8125 # Subscription patterns used to select which metrics will be pushed to StatsD. Note that first, metrics # collection for your desired entities must be activated under the kamon.metrics.filters settings. subscriptions { histogram = [ "**" ] min-max-counter = [ "**" ] gauge = [ "**" ] counter = [ "**" ] trace = [ "**" ] trace-segment = [ "**" ] akka-actor = [ "**" ] akka-dispatcher = [ "**" ] akka-router = [ "**" ] system-metric = [ "**" ] http-server = [ "**" ] } metric-key-generator = kamon.statsd.SimpleMetricKeyGenerator simple-metric-key-generator { application = "my-application" include-hostname = true hostname-override = none metric-name-normalization-strategy = normalize } } modules { kamon-scala.auto-start = yes kamon-statsd.auto-start = yes kamon-system-metrics.auto-start = yes } } your help will be very much appreciated.
It is necessary to add AspectJ weaver as Java Agent when you're starting application: -javaagent:aspectjweaver.jar You can add the following settings in your project SBT configuration: .settings( retrieveManaged := true, libraryDependencies += "org.aspectj" % "aspectjweaver" % aspectJWeaverV) So AspectJ weaver JAR will be copied to ./lib_managed/jars/org.aspectj/aspectjweaver/aspectjweaver-[aspectJWeaverV].jar in your project root. Then you can refer this JAR in your Dockerfile: COPY ./lib_managed/jars/org.aspectj/aspectjweaver/aspectjweaver-*.jar /app- workdir/aspectjweaver.jar WORKDIR /app-workdir CMD ["java", "-javaagent:aspectjweaver.jar", "-jar", "app.jar"]