Package built by Poetry is missing runtime dependencies - python-packaging

I've been working on a project which so far has just involved building some cloud infrastructure, and now I'm trying to add a CLI to simplify running some AWS Lambdas. Unfortunately both the sdist and wheel packages built using poetry build don't seem to include the dependencies, so I have to manually pip install all of them to run the command. Basically I
run poetry build in the project,
cd "$(mktemp --directory)",
python -m venv .venv,
. .venv/bin/activate,
pip install /path/to/result/of/poetry/build/above, and then
run the new .venv/bin/ executable.
At this point the executable fails, because pip did not install any of the package dependencies. If I pip show PACKAGE the Requires line is empty.
The Poetry manual doesn't seem to specify how to link dependencies to the built package, so what do I have to do instead?
I am using some optional dependencies, could that be interfering with the build process? To be clear, even non-optional dependencies do not show up in the package dependencies.
pyproject.toml:
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.black]
line-length = 100
[tool.coverage.report]
exclude_lines = [
'if TYPE_CHECKING:',
'if __name__ == "__main__":',
'pragma: no cover',
]
fail_under = 100
[tool.coverage.run]
branch = true
omit = [
".venv/*",
]
[tool.isort]
case_sensitive = true
line_length = 100
profile = "black"
[tool.mypy]
show_error_codes = true
strict = true
[[tool.mypy.overrides]]
module = [
"jsonschema",
"jsonschema._utils",
"jsonschema.validators",
"multihash",
"pystac",
"pystac.layout",
"pytest_subtests",
"smart_open",
"linz_logger"
]
ignore_missing_imports = true
[tool.poetry]
name = "geostore"
version = "0.1.0"
description = "Central storage, management and access for important geospatial datasets developed by LINZ"
authors = [
"Bill M. Nelson <bmnelson#linz.govt.nz>",
"Daniel Silk <dsilk#linz.govt.nz>",
"Ivan Mincik <ivan.mincik#gmail.com>",
"Mitchell Paff <mpaff#linz.govt.nz>",
"Sandro Santilli <strk#kbt.io>",
"Simon Planzer <splanzer#linz.govt.nz>",
"Victor Engmark <vengmark#linz.govt.nz>",
]
license = "MIT"
readme = "README.md"
homepage = "https://github.com/linz/geostore"
repository = "https://github.com/linz/geostore"
keywords = [
"SpatioTemporal Asset Catalog (STAC)",
"Toitū Te Whenua Land Information New Zealand",
]
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Framework :: AWS CDK",
"Framework :: Pytest",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: POSIX",
"Programming Language :: Python :: 3.8",
"Topic :: Communications :: File Sharing",
"Topic :: Scientific/Engineering :: GIS",
"Topic :: Utilities",
"Typing :: Typed",
]
[tool.poetry.dependencies]
python = "^3.8"
"aws-cdk.aws-dynamodb" = {version = "*", optional = true}
"aws-cdk.aws-ec2" = {version = "*", optional = true}
"aws-cdk.aws-ecr" = {version = "*", optional = true}
"aws-cdk.aws-ecr_assets" = {version = "*", optional = true}
"aws-cdk.aws-ecs" = {version = "*", optional = true}
"aws-cdk.aws-events" = {version = "*", optional = true}
"aws-cdk.aws-events-targets" = {version = "*", optional = true}
"aws-cdk.aws-iam" = {version = "*", optional = true}
"aws-cdk.aws-lambda" = {version = "*", optional = true}
"aws-cdk.aws-lambda-event-sources" = {version = "*", optional = true}
"aws-cdk.aws-lambda-python" = {version = "*", optional = true}
"aws-cdk.aws-s3" = {version = "*", optional = true}
"aws-cdk.aws-sns" = {version = "*", optional = true}
"aws-cdk.aws-stepfunctions" = {version = "*", optional = true}
"aws-cdk.aws-stepfunctions_tasks" = {version = "*", optional = true}
awscli = {version = "*", optional = true}
boto3 = "*"
cattrs = {version = "*", optional = true}
jsonschema = {version = "*", extras = ["format"], optional = true}
multihash = {version = "*", optional = true}
pynamodb = {version = "*", optional = true}
pystac = {version = "*", optional = true}
slack-sdk = {version = "*", extras = ["models", "webhook"], optional = true}
smart-open = {version = "*", extras = ["s3"], optional = true}
strict-rfc3339 = {optional = true, version = "*"}
typer = "*"
ulid-py = {version = "*", optional = true}
linz-logger = {version = "*", optional = true}
[tool.poetry.dev-dependencies]
black = "*"
boto3-stubs = {version = "*", extras = ["batch", "dynamodb", "events", "lambda", "lambda-python", "s3", "s3control", "sns", "sqs", "ssm", "stepfunctions", "sts"]}
gitlint = "*"
ipdb = "*"
isort = "*"
language-formatters-pre-commit-hooks = "*"
mutmut = "*"
mypy = "*"
pre-commit = "*"
pylint = "*"
pytest = "*"
pytest-randomly = "*"
pytest-socket = "*"
pytest-subtests = "*"
pytest-timeout = "*"
types-pkg-resources = "*"
types-python-dateutil = "*"
types-requests = "*"
types-six = "*"
types-toml = "*"
[tool.poetry.dev-dependencies.coverage]
version = "*"
extras = ["toml"]
[tool.poetry.extras]
cdk = [
"aws-cdk.aws-dynamodb",
"aws-cdk.aws-ec2",
"aws-cdk.aws-ecr",
"aws-cdk.aws-ecr_assets",
"aws-cdk.aws-ecs",
"aws-cdk.aws-events",
"aws-cdk.aws-events-targets",
"aws-cdk.aws-iam",
"aws-cdk.aws-lambda",
"aws-cdk.aws-lambda-event-sources",
"aws-cdk.aws-lambda-python",
"aws-cdk.aws-s3",
"aws-cdk.aws-sns",
"aws-cdk.aws-stepfunctions",
"aws-cdk.aws-stepfunctions_tasks",
"awscli",
"cattrs",
]
check_files_checksums = [
"boto3",
"linz-logger",
"multihash",
"pynamodb",
]
check_stac_metadata = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"strict-rfc3339",
]
cli = [
"boto3",
"typer",
]
content_iterator = [
"jsonschema",
"linz-logger",
"pynamodb",
]
datasets = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"pystac",
"ulid-py",
]
dataset_versions = [
"jsonschema",
"linz-logger",
"pynamodb",
"ulid-py",
]
import_asset_file = [
"boto3",
"linz-logger",
"smart-open",
]
import_dataset = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"smart-open",
"ulid-py",
]
import_metadata_file = [
"boto3",
"linz-logger",
]
import_status = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
]
notify_status_update = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"slack-sdk"
]
populate_catalog = [
"boto3",
"jsonschema",
"linz-logger",
"pystac",
]
update_dataset_catalog = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
"ulid-py"
]
upload_status = [
"boto3",
"jsonschema",
"linz-logger",
"pynamodb",
]
validation_summary = [
"jsonschema",
"linz-logger",
"pynamodb",
]
[tool.poetry.scripts]
geostore = "geostore.cli:app"
[tool.pylint.MASTER]
disable = [
"duplicate-code",
"missing-class-docstring",
"missing-function-docstring",
"missing-module-docstring",
]
load-plugins = [
"pylint.extensions.mccabe",
]
max-complexity = 6
[tool.pytest.ini_options]
addopts = "--randomly-dont-reset-seed"
markers = [
"infrastructure: requires a deployed infrastructure",
]
python_functions = "should_*"
testpaths = [
"tests"
]
As you can see the boto3 and typer runtime dependencies are not optional, so I'd expect to see them in poetry show geostore.

This appears to be a bug in Poetry. Or at least it's not clear from the documentation what the expected behavior would be in a case such as yours.
In your pyproject.toml, you specify two dependencies as required in this section:
[tool.poetry.dependencies]
…
awscli = {version = "*", optional = true}
boto3 = "*"
…
typer = "*"
…
So, as opposed to awscli among many others, boto3 and typer should be required because the optional attribute is not set and defaults to false. But you also list the two required dependencies as "extras" in this section:
[tool.poetry.extras]
…
cli = [
"boto3",
"typer",
]
…
Poetry takes that to mean that they are in fact optional, not required. Which makes sense, in a way, because extras are effectively optional. If you inspect the .whl wheel file built by Poetry (it's just a zip archive), specifically the METADATA file in it (which is what Pip refers to when installing the package), then it contains this line:
Requires-Dist: typer; extra == "cli"
So that dependency is in fact optional: It will only get installed if users ask for it explicitly with pip install geostore[cli].
The solution then is simple: Remove all references to the required dependencies from the extras section. They are not needed there anyway.
The Poetry documentation is in fact not very clear on what optional really signifies. That attribute is (currently) only briefly mentioned in the section on the pyproject.toml file. One could also argue that if optional is false, then the extras section should not override that value.

Related

Best way to create several complex resources of the same type with terraform variables

I am converting existing kubernetes infrastructure to terraform. I ran a terraform import on the kubernetes cluster that I wanted to convert to terraform. Now that I have the infrastructure terraform code I'm trying to make it reusable. My organization has several clusters and they all have different node pools. I'm working on creating the variables.tf file and I am unsure of the best method to do this. I want to make it so any number of node_pools with specific variables can be created. Ideally I don't want to have to utilize different files/ variables for each node pool i create. Is there a way to place 6 different node_pools into variables without creating inidividual variables for each node pool and resource for the node pool?
For simpler objects I could see count being a viable solution but this might be too complicated. Below are 2 of the 6 node pools I am working with.
node_pool {
initial_node_count = 12
# instance_group_urls = []
max_pods_per_node = 16
name = "test-pool"
node_count = 12
node_locations = [
"us-central1-b",
"us-central1-c",
"us-central1-f",
]
version = "1.21.14-gke.700"
management {
auto_repair = true
auto_upgrade = true
}
node_config {
disk_size_gb = 50
disk_type = "pd-standard"
guest_accelerator = []
image_type = "COS_CONTAINERD"
labels = {
"integrationtestnode" = "true"
}
local_ssd_count = 0
machine_type = "n1-standard-2"
metadata = {
"disable-legacy-endpoints" = "true"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
preemptible = false
service_account = "svcs-dev#megacorp-dev-project.iam.gserviceaccount.com"
spot = false
tags = []
taint = [
{
effect = "NO_SCHEDULE"
key = "integrationtest"
value = "true"
},
]
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = true
}
}
upgrade_settings {
max_surge = 1
max_unavailable = 0
}
}
node_pool {
initial_node_count = 1
max_pods_per_node = 110
name = "promop-n2s8"
node_count = 1
node_locations = [
"us-central1-b",
"us-central1-c",
"us-central1-f",
]
version = "1.21.13-gke.900"
management {
auto_repair = true
auto_upgrade = true
}
node_config {
disk_size_gb = 100
disk_type = "pd-standard"
guest_accelerator = []
image_type = "COS_CONTAINERD"
labels = {
"megacorp.reserved" = "promop-dev"
}
local_ssd_count = 0
machine_type = "n2-standard-8"
metadata = {
"disable-legacy-endpoints" = "true"
}
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
preemptible = false
service_account = "svcs-dev#megacorp-dev-project.iam.gserviceaccount.com"
spot = false
tags = []
taint = [
{
effect = "NO_SCHEDULE"
key = "app"
value = "prometheus-operator-dev"
},
]
shielded_instance_config {
enable_integrity_monitoring = true
enable_secure_boot = false
}
workload_metadata_config {
mode = "GKE_METADATA"
}
}
upgrade_settings {
max_surge = 2
max_unavailable = 0
}
}
...
```

Dynamic creation of kubernetes manifest in Terraform

I'm trying to create multiple K8s manifests based on VPC subnets as the following code suggests:
resource "aws_subnet" "pod_subnets" {
for_each = module.pods_subnet_addrs.network_cidr_blocks
depends_on = [
aws_vpc_ipv4_cidr_block_association.pod_cidr
]
vpc_id = data.terraform_remote_state.vpc.outputs.vpc_id
availability_zone = each.key
cidr_block = each.value
tags = merge(
local.common_tags,
{
"Name" = format(
"${var.environment_name}-pods-network-%s",
each.key)
} )
}
resource "kubernetes_manifest" "ENIconfig" {
for_each = module.pods_subnet_addrs.network_cidr_blocks
manifest = {
"apiVersion" = "crd.k8s.amazonaws.com/v1alpha1"
"kind" = "ENIConfig"
"metadata" = {
"name" = each.key
}
"spec" = {
"securityGroups" = [
aws_security_group.worker_node.id,
]
"subnet" = aws_subnet.pod_subnets[each.key].id
}
}
}
However, when I'm running Terraform I'm getting the following error:
Provider "registry.terraform.io/hashicorp/kubernetes" planned an invalid value for kubernetes_manifest.ENIconfig["eu-west-3a"].manifest: planned value cty.ObjectVal(map[string]cty.Value{"apiVersion":cty.StringVal("crd.k8s.amazonaws.com/v1alpha1"), "kind":cty.StringVal("ENIConfig"),"metadata":cty.ObjectVal(map[string]cty.Value{"name":cty.StringVal("eu-west-3a")}), "spec":cty.ObjectVal(map[string]cty.Value{"securityGroups":cty.TupleVal([]cty.Value{cty.StringVal("sg-07e264400925e9a4a")}),"subnet":cty.NullVal(cty.String)})}) does not match config value cty.ObjectVal(map[string]cty.Value{"apiVersion":cty.StringVal("crd.k8s.amazonaws.com/v1alpha1"),"kind":cty.StringVal("ENIConfig"),"metadata":cty.ObjectVal(map[string]cty.Value{"name":cty.StringVal("eu-west-3a")}), "spec":cty.ObjectVal(map[string]cty.Value{"securityGroups":cty.TupleVal([]cty.Value{cty.StringVal("sg-07e264400925e9a4a")}),"subnet":cty.UnknownVal(cty.String)})}).
Any idea what I'm doing wrong here?
Turns out that kubernetes_manifest cannot be rendered with values that have not been created first. Only static values that can populate the resource are working.

Azure Terraform function app deployment issue

I hope somebody can help me with this issue because I don't understand what am I doing wrong.
I am trying to build an azure function app and deploy a zip package (timer trigger) to it.
I have set this code.
resource "azurerm_resource_group" "function-rg" {
location = "westeurope"
name = "resource-group"
}
data "azurerm_storage_account_sas" "sas" {
connection_string = azurerm_storage_account.sthriprdeurcsvtoscim.primary_connection_string
https_only = true
start = "2021-01-01"
expiry = "2023-12-31"
resource_types {
object = true
container = false
service = false
}
services {
blob = true
queue = false
table = false
file = false
}
permissions {
read = true
write = false
delete = false
list = false
add = false
create = false
update = false
process = false
}
}
resource "azurerm_app_service_plan" "ASP-rg-hri-prd-scim" {
location = azurerm_resource_group.function-rg.location
name = "ASP-rghriprdeurcsvtoscim"
resource_group_name = azurerm_resource_group.function-rg.name
kind = "functionapp"
maximum_elastic_worker_count = 1
per_site_scaling = false
reserved = false
sku {
capacity = 0
size = "Y1"
tier = "Dynamic"
}
}
resource "azurerm_storage_container" "deployments" {
name = "function-releases"
storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name
container_access_type = "private"
}
resource "azurerm_storage_blob" "appcode" {
name = "functionapp.zip"
storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name
storage_container_name = azurerm_storage_container.deployments.name
type = "Block"
source = "./functionapp.zip"
}
resource "azurerm_function_app" "func-hri-prd-eur-csv-to-scim" {
storage_account_name = azurerm_storage_account.sthriprdeurcsvtoscim.name
storage_account_access_key = azurerm_storage_account.sthriprdeurcsvtoscim.primary_access_key
app_service_plan_id = azurerm_app_service_plan.ASP-rg-hri-prd-scim.id
location = azurerm_resource_group.function-rg.location
name = "func-hri-prd-csv-to-scim"
resource_group_name = azurerm_resource_group.function-rg.name
app_settings = {
"WEBSITE_RUN_FROM_PACKAGE" = "https://${azurerm_storage_account.sthriprdeurcsvtoscim.name}.blob.core.windows.net/${azurerm_storage_container.deployments.name}/${azurerm_storage_blob.appcode.name}${data.azurerm_storage_account_sas.sas.sas}"
"FUNCTIONS_EXTENSION_VERSION" = "~3"
"FUNCTIONS_WORKER_RUNTIME" = "dotnet"
}
enabled = true
identity {
type = "SystemAssigned"
}
version = "~3"
enable_builtin_logging = false
}
resource "azurerm_storage_account" "sthriprdeurcsvtoscim" {
account_kind = "Storage"
account_replication_type = "LRS"
account_tier = "Standard"
allow_blob_public_access = false
enable_https_traffic_only = true
is_hns_enabled = false
location = azurerm_resource_group.function-rg.location
name = "sthriprdeurcsvtoscim"
resource_group_name = azurerm_resource_group.function-rg.name
}
Goes without saying that terraform apply work without any error. The configurations of the function app are correct and points to the right storage account. The storage account has a container with the zip file containing my azure function code.
But when I go to the function app -> Functions, I don't see any function there.
Can please somebody help me to understand what am I doing wrong in this?
The Function app is a .net3 function
When you create a function app, it isn’t set up for Functions + Terraform. It’s set up for a Visual Code + Functions deployment. We need to adjust both the package.json so that it will produce the ZIP file for us, and the .gitignore so that it ignores the Terraform build files. I use a bunch of helper NPM packages:
azure-functions-core-tools for the func command.
#ffflorian/jszip-cli to ZIP my files up.
mkdirp for creating directories.
npm-run-all and particularly the run-s command for executing things in order.
rimraf for deleting things.
Below is the code how package.json looks like
{
"name": "backend",
"version": "1.0.0",
"description": "",
"scripts": {
"func": "func",
"clean": "rimraf build",
"build:compile": "tsc",
"build:prune": "npm prune --production",
"prebuild:zip": "mkdirp --mode=0700 build",
"build:zip": "jszip-cli",
"build": "run-s clean build:compile build:zip",
"predeploy": "npm run build",
"deploy": "terraform apply"
},
"dependencies": {
},
"devDependencies": {
"azure-functions-core-tools": "^2.7.1724",
"#azure/functions": "^1.0.3",
"#ffflorian/jszip-cli": "^3.0.2",
"mkdirp": "^0.5.1",
"npm-run-all": "^4.1.5",
"rimraf": "^3.0.0",
"typescript": "^3.3.3"
}
}
npm run build will build the ZIP file.
npm run deploy will build the ZIP file and deploy it to Azure.
For complete information check Azure Function app with Terraform.

How to reuse deps for different tests in Bazel?

I am using Bazel to compile scala.
Right now, my scala_test looks like
scala_test {
name = "sample",
srcs = [
"a.scala",
"b.scala",
"c.scala",
"d.scala",
],
deps = [
"//src//main/scala/.../dep1",
"//src//main/scala/.../dep2",
"//src//main/scala/.../dep3",
"//src//main/scala/.../dep4",
]
}
In this case, Bazel does not support parallelization on these srcs as they are grouped as one scala_test. To enable automatic parallel testing, I would like to separate srcs into different scala_test like
scala_test {
name = "sample1",
srcs = [
"a.scala",
],
deps = [
"//src//main/scala/.../dep1",
"//src//main/scala/.../dep2",
"//src//main/scala/.../dep3",
"//src//main/scala/.../dep4",
]
}
scala_test {
name = "sample2",
srcs = [
"b.scala",
],
deps = [
"//src//main/scala/.../dep1",
"//src//main/scala/.../dep2",
"//src//main/scala/.../dep3",
"//src//main/scala/.../dep4",
]
}
...
The problem is I guess bazel tries to compile the deps for every scala_test. Is there any way to group dependencies and reuse them in different scala_test blocks such as scala_library?
Sorry, I think Bazel caches the dependencies so I don't have to worry about compiling them again when running all tests.

kamon stastsd not sending metrics when i run my scala application as a docker container

When I run scala application using 'sbt run' command it is sending kamon metrics to graphite/grafana container. Then I created a docker image for my scala application and running it as a docker container.
Now it is not sending metrics to graphite/grafana container. Both my application container and graphite/grafana container are running under same docker network.
The command I used to run grafana image is: docker run --network smart -d -p 80:80 -p 81:81 -p 2003:2003 -p 8125:8125/udp -p 8126:8126 8399049ce731
kamon configuration in application.conf is
kamon {
auto-start=true
metric {
tick-interval = 1 seconds
filters {
akka-actor {
includes = ["*/user/*"]
excludes = [ "*/system/**", "*/user/IO-**", "**/kamon/**" ]
}
akka-router {
includes = ["*/user/*"]
excludes = [ "*/system/**", "*/user/IO-**", "**/kamon/**" ]
}
akka-dispatcher {
includes = ["*/user/*"]
excludes = [ "*/system/**", "*/user/IO-**", "*kamon*",
"*/kamon/*", "**/kamon/**" ]
}
trace {
includes = [ "**" ]
excludes = [ ]enter code here
}
}
}
# needed for "[error] Exception in thread "main"
java.lang.ClassNotFoundException: local"
internal-config {
akka.actor.provider = "akka.actor.LocalActorRefProvider"
}
statsd {
hostname = "127.0.0.1"
port = 8125
# Subscription patterns used to select which metrics will be pushed
to StatsD. Note that first, metrics
# collection for your desired entities must be activated under the
kamon.metrics.filters settings.
subscriptions {
histogram = [ "**" ]
min-max-counter = [ "**" ]
gauge = [ "**" ]
counter = [ "**" ]
trace = [ "**" ]
trace-segment = [ "**" ]
akka-actor = [ "**" ]
akka-dispatcher = [ "**" ]
akka-router = [ "**" ]
system-metric = [ "**" ]
http-server = [ "**" ]
}
metric-key-generator = kamon.statsd.SimpleMetricKeyGenerator
simple-metric-key-generator {
application = "my-application"
include-hostname = true
hostname-override = none
metric-name-normalization-strategy = normalize
}
}
modules {
kamon-scala.auto-start = yes
kamon-statsd.auto-start = yes
kamon-system-metrics.auto-start = yes
}
}
your help will be very much appreciated.
It is necessary to add AspectJ weaver as Java Agent when you're starting application: -javaagent:aspectjweaver.jar
You can add the following settings in your project SBT configuration:
.settings(
retrieveManaged := true,
libraryDependencies += "org.aspectj" % "aspectjweaver" % aspectJWeaverV)
So AspectJ weaver JAR will be copied to ./lib_managed/jars/org.aspectj/aspectjweaver/aspectjweaver-[aspectJWeaverV].jar in your project root.
Then you can refer this JAR in your Dockerfile:
COPY ./lib_managed/jars/org.aspectj/aspectjweaver/aspectjweaver-*.jar /app-
workdir/aspectjweaver.jar
WORKDIR /app-workdir
CMD ["java", "-javaagent:aspectjweaver.jar", "-jar", "app.jar"]