$geoWithin with mongoDB aggregate causes BadValue bad geo query - mongodb

I am trying to use a $geoWithin query in a aggregate pipeline, but I am getting a an
MongoError: exception: bad query: BadValue bad geo query: { $geoWithin: { $box: [ [ "13.618240356445312", "51.01343066212905" ], [ "13.865432739257812", "51.09662294502995" ] ] } }
My query is:
{
$match: {
'gps.coordinates.matched': {
$geoWithin: {
$box: [
[ swlng, swlat ],
[ nelng , nelat ]
]
}
}
}
},
{ $project : {shortGeohash: {$substr: ["$gps.geohash.original", 0, 11]}}},
{ $group: {_id: "$shortGeohash", count: {$sum:1}, originalDoc:{$push: "$$ROOT"}}}
The query only for $geoWithin as well $project...,$group work well on their own, but combined the error occurs.

I tried your query and it seems to actually work. I executed the query over a collection with documents such as this.
[{
"_id" : "5a2404674eb6d938c8f44856",
"code" : "M.12345",
"loc" : {
"type" : "Point",
"coordinates" : [
41.9009789,
12.5010465
]
}
},
...
]
The aggregation pipeline is this.
{
$match: {
'loc': {
$geoWithin: {
$box: [
[ 0, 0 ],
[ 5, 5 ]
]
}
}
}
},
{ $project : {subCode: {$substr: ["$code", 0, 4]}}},
{ $group: {_id: "$subCode", count: {$sum:1}, originalDoc:{$push: "$$ROOT"}}}
One of the results is this.
{
"_id" : "M.10",
"count" : 12.0,
"originalDoc" : [
{
"_id" : "5a2481c44eb6d92b6895633a",
"subCode" : "M.10"
},
.... //11 more items
]
}
Results are correctly returned with mongod v3.4.9.

It seems like $geoWithin is not one of the aggregation operators.
The reference example works, sadly, I am not aware of a way to add an aggregation to that.

Related

On MongoDb Aggregation lookup, does the let need special formating?

I am trying to use the MongoDB $lookup with the Uncorrelated Subqueries.
Using MongoDB 3.6.12 (support began on 3.6)
https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#join-conditions-and-uncorrelated-sub-queries
The following pipeline step is working, however if I swap out the first "userB" with the second, no results are returned.
{
from: 'friendships',
let: { requestuser: ObjectId("5c0a9c37b2365a002367df79"), postuser: ObjectId("5c0820ea17a69b00231627be") },
pipeline: [
{
$match : {
$or : [
{
"userA" : ObjectId("5c0820ea17a69b00231627be"),
"userB" : ObjectId("5c0a9c37b2365a002367df79")
// "userB" : '$$requestuser'
},
{
"userA" : ObjectId("5c0a9c37b2365a002367df79"),
"userB" : ObjectId("5c0820ea17a69b00231627be")
}
],
"accepted" : true
}
},
{
$project: {v: 'true'}
}
],
as: "match"}
Results with hard coded ObjectId:
"match" : [
{
"_id" : ObjectId("5d6171dd319401001fd326bf"),
"v" : "true"
}
]
Results using variable:
"match" : [
]
I feel like ObjectIds need special treatment. All the examples I could find are using simple variables like strings.
To verify the '$$requestUser' contained a value, I tested it on the projection:
"match" : [
{
"_id" : ObjectId("5d6171dd319401001fd326bf"),
"v" : ObjectId("5c0a9c37b2365a002367df79")
}
]
When you use un co-related sub queires, you need to use $expr to pass a variable.
You can try something like following.
{
$match: {
$expr: {
$or: [
{
$and:[
{
$eq: [ "userA", ObjectId("5c0820ea17a69b00231627be") ]
},
{
$eq: [ "userB", ObjectId("5c0a9c37b2365a002367df79") ]
},
{
$eq: [ "userB", "$$requestuser" ]
}
]
},
{
$and:[
{
$eq: [ "userA", ObjectId("5c0a9c37b2365a002367df79") ]
},
{
$eq: [ "userB", ObjectId("5c0820ea17a69b00231627be") ]
}
]
}
]
},
"accepted": true,
}
}
I have created a sample demo to show how $expr works inside the lookup : Sample demo for Uncorrelated Subquery

MongoDB: Matching points from one collection with polygons from another

I'm trying to match points in one collection with regions stored in another collection.
Here are examples of documents.
Points:
{
"_id" : ObjectId("5e36d904618c0ea59f1eb04f"),
"gps" : { "lat" : 50.073288, "lon" : 14.43979 },
"timeAdded" : ISODate("2020-02-02T15:13:22.096Z")
}
Regions:
{
"_id" : ObjectId("5e49a469afae4a11c4ff3cf7"),
"type" : "Feature",
"geometry" : {
"type" : "Polygon",
"coordinates" : [
[
[ -748397.88, -1049211.61 ],
[ -748402.77, -1049212.2 ],
...
[ -748410.41, -1049213.11 ],
[ -748403.05, -1049070.62 ]
]
]
},
"properties" : {
"Name" : "Region 1"
}
}
And the query I'm trying to construct is something like this:
db.points.aggregate([
{$project: {
coordinates: ["$gps.lon", "$gps.lat"]
}},
{$lookup: {
from: "regions", pipeline: [
{$match: {
coordinates: {
$geoWithin: {
$geometry: {
type: "Polygon",
coordinates: "$geometry.coordinates"
}
}
}
}}
],
as: "district"
}}
])
I'm getting an error:
assert: command failed: {
"ok" : 0,
"errmsg" : "Polygon coordinates must be an array",
"code" : 2,
"codeName" : "BadValue"
} : aggregate failed
I've noticed the structure of $geoWithin document is same as structure of one I have for each region. So I tried such query:
db.points.aggregate([
{$project: {
coordinates: ["$gps.lon", "$gps.lat"]
}},
{$lookup: {
from: "regions", pipeline: [
{$match: {
coordinates: {
$geoWithin: "$geometry.coordinates"
}
}}
],
as: "district"
}}
])
The error was same.
I looked up for geoqueries but surprisingly all found mentions had static region document instead of one taken from a collection. So I'm wondering - is it ever possible to map points with regions having that both document collections aren't static and taken from DB?
Unfortunately not possible
You could perform query below if $geometry could deal with MongoDB Aggregation Expressions.
db.points.aggregate([
{
$lookup: {
from: "regions",
let: {
coordinates: [
"$gps.lon",
"$gps.lat"
]
},
pipeline: [
{
$addFields: {
coordinates: "$$coordinates"
}
},
{
$match: {
coordinates: {
$geoWithin: {
$geometry: {
type: "Polygon",
coordinates: "$geometry.coordinates"
}
}
}
}
}
],
as: "district"
}
}
])

How to use $geoNear after $lookup

Hi i have the following problem:
I would like to use $geoNear (to count distance between two points) but after $loopback (and on collection that i joined).
This is the model for companyBases collection (i would like to join it):
{
"_id" : ObjectId("5d7cfe13f42e7345d967b378"),
"location" : {
"type" : "Point",
"coordinates" : [
20.633856,
49.761268
]
},
"vehicles" : [
{
"_id" : ObjectId("5d7cfe13f42e7345d967b340"),
...other fields that doesn't matter
}
]
}
This is vehicle collection:
{
"_id" : ObjectId("5d7cfe13f42e7345d967b340"),
...other fields that doesn't matter
}
I would like to join companyBase collection in aggregation on vehicles collection:
db.vehicles.aggregate([
{
$lookup: {
from: "companybases",
let: {
vehicleId: "$_id"
},
pipeline: [
{
$match: {
$expr: { $in: ["$$vehicleId", "$vehicles._id"] }
}
}
],
as: "companyBases"
}
},
{
$unwind: "$companyBases"
},
{
$geoNear: {
near: {
type: "Point",
coordinates: [50.02485, 20.0008]
},
distanceField: "distance",
spherical: true
}
}
]);
But it returns me:
{
"message" : "$geoNear is only valid as the first stage in a pipeline.",
"operationTime" : "Timestamp(1568472833, 1)",
"ok" : 0,
"code" : 40602,
"codeName" : "Location40602",
"$clusterTime" : {
"clusterTime" : "Timestamp(1568472833, 1)",
"signature" : {
"hash" : "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
"keyId" : "0"
}
},
"name" : "MongoError"
}
When i am doing the same pipeline on companybases collection it returns me documents with counted distance:
db.companybases.aggregate([
{
$geoNear: {
near: {
type: "Point",
coordinates: [50.02485, 20.0008]
},
distanceField: "distance",
spherical: true
}
}
]);
And result:
{
"_id" : ObjectId("5d7cfe13f42e7345d967b378"),
"location" : {
"type" : "Point",
"coordinates" : [
20.633856,
49.761268
]
},
"vehicles" : [
{
...some fields
},
],
...some fields
"distance" : 4209673.447019393
}
I realize that the error may be because of missing index on vehicles collection. So is there any way to calculate distance with $geoNear with $lookup ? Or maybe it's impossible and i have to do on my own ?
Simple solutions (you can put $geoNear in $lookup pipeline):
db.vehicles.aggregate([
{
$lookup: {
from: "companybases",
let: {
vehicleId: "$_id"
},
pipeline: [
{
$geoNear: {
near: {
type: "Point",
coordinates: [50.02485, 20.0008]
},
distanceField: "distance",
spherical: true
}
},
{
$match: {
$expr: { $in: ["$$vehicleId", "$vehicles._id"] }
}
}
],
as: "companyBases"
}
},
{
$unwind: "$companyBases"
}
]);
But that strongly impressed the performance (it takes at least 5 seconds), becuase $geoNear is used before match.
your error shows "message" : "$geoNear is only valid as the first stage in a pipeline.",
it is clear that you can't put '$geoNear' after $lookup
so You can only use $geoNear as the first stage of a pipeline.
just to exchange you agg order
use $geoNear at first then with $lookup

In MongoDB, how do I use a field in the document as input to a $geoWithin/$centerSphere expression?

I'm trying to write a MongoDB query that searches for documents within a radius centered on a specified location.
The query below works. It finds all documents that are within searching.radius radians of searching.coordinates.
However what I would like to do is add the current documents allowed_radius value to the searching.radius value, so that the allowed sphere is actually larger.
How can I phrase this query to make this possible?
Present Query:
collection.aggregate([
{
$project:{
location: "$location",
allowed_radius: "$allowed_radius"
}
},
{
$match: {
$and:
[
{ location: { $geoWithin: { $centerSphere: [ searching.coordinates, searching.radius ] }}},
{...},
...]
...}
]);
What I am trying to do (pseudo-query):
collection.aggregate([
{
$project:{
location: "$location",
allowed_radius: "$allowed_radius"
}
},
{
$match: {
$and:
[
{ location: { $geoWithin: { $centerSphere: [ searching.coordinates, { $add: [searching.radius, $allowed_radius]} ] }}},
{...},
...]
...}
]);
I tried using $geoWithin / $centerSphere, but couldn't make it work this way.
Here is another way of doing so, using the $geoNear operator:
Given this input:
db.collection.insert({
"airport": "LGW",
"id": 1,
"location": { type: "Point", coordinates: [-0.17818, 51.15609] },
"allowed_radius": 100
})
db.collection.insert({
"airport": "LGW",
"id": 2,
"location": { type: "Point", coordinates: [-0.17818, 51.15609] },
"allowed_radius": 0
})
db.collection.insert({
"airport": "ORY",
"id": 3,
"location": { type: "Point", coordinates: [2.35944, 48.72528] },
"allowed_radius": 10
})
And this index (which is required for $geoNear):
db.collection.createIndex( { location : "2dsphere" } )
With searching.radius = 1000:
db.collection.aggregate([
{ $geoNear: {
near: { "type" : "Point", "coordinates": [7.215872, 43.658411] },
distanceField: "distance",
spherical: true,
distanceMultiplier: 0.001
}},
{ $addFields: { radius: { "$add": ["$allowed_radius", 1000] } } },
{ $addFields: { isIn: { "$subtract": ["$distance", "$radius" ] } } },
{ $match: { isIn: { "$lte": 0 } } }
])
would return documents with id 1 (distance=1002 <= radius=1000+100) and 3 (distance=676 <= radius=1000+10) and discard id 2 (distance=1002 > 1000+0).
The distanceMultiplier parameter is used to bring back units to km.
$geoNear must be the first stage of an aggregation (due to the usage of the index I think), but one of the parameters of $geoNear is a match query on other fields.
Even if it requires the geospacial index, you can add additional dimensions to the index.
$geoNear doesn't take the location field as an argument, because it requires the collection to have a geospacial index. Thus $geoNear implicitly uses as location field (whatever the name of the field) the one indexed.
Finally, I'm pretty sure the last stages can be simplified.
The $geoNear stage is only used to project the distance on each record:
{ "airport" : "ORY", "distance" : 676.5790971238937, "location" : { "type" : "Point", "coordinates" : [ 2.35944, 48.72528 ] }, "allowed_radius" : 10, "id" : 3 }
{ "airport" : "LGW", "distance" : 1002.3351814526812, "location" : { "type" : "Point", "coordinates" : [ -0.17818, 51.15609 ] }, "allowed_radius" : 100, "id" : 1 }
{ "airport" : "LGW", "distance" : 1002.3351814526812, "location" : { "type" : "Point", "coordinates" : [ -0.17818, 51.15609 ] }, "allowed_radius" : 0, "id" : 2 }
In fact, the geoNear operator requires the use of the distanceField argument, which is used to project the computed distance on each record for the next stages of the query. At the end of the aggregation, returned records look like:
{
"airport" : "ORY",
"location" : { "type" : "Point", "coordinates" : [ 2.35944, 48.72528 ] },
"allowed_radius" : 10,
"id" : 3,
"distance" : 676.5790971238937,
"radius" : 1010,
"isIn" : -333.4209028761063
}
If necessary, you can remove fields produced by the query for the query (distance, radius, isIn) with a final $project stage. For instance: {"$project":{"distance":0}}

Performing case-statement in mongodb aggregation framework

I'm evaluating how well the MongoDB aggregation framework suits our needs as we are currently running on top of SQL Server. I'm having a hard time performing a specific query:
Say I have the following pseudo records (modeled as columns in a sql table and as a full document in a mongodb collection)
{
name: 'A',
timespent: 100,
},
{
name: 'B',
timespent: 200,
},
{
name: 'C',
timespent: 300,
},
{
name: 'D',
timespent: 400,
},
{
name: 'E',
timespent: 500,
}
I want to group the timespent field in to ranges and count the occurrences so I will get e.g. the following pseudo-records:
results{
0-250: 2,
250-450: 2,
450-650: 1
}
Note that these ranges (250, 450 and 650) are dynamic and will likely be altered over time by the user. In SQL we extracted the results with something like this:
select range, COUNT(*) as total from (
select case when Timespent <= 250 then '0-250'
when Timespent <= 450 then '200-450'
else '450-600' end as range
from TestTable) as r
group by r.range
Again, note that this sql is constructed dynamically by our app to fit the specific ranges available at any one time.
I'm struggling to find the appropriate constructs in the mongodb aggregation framework to perform such queries. I can query for the results of a single range by inserting a $match into the pipeline(i.e. getting the result of a single range) but I cannot grok how to extract all the ranges and their counts in a single pipeline query.
what corresponds to the "case" SQL statement in the aggregation framework, is the $cond operator (see manual). $cond statements can be nested to simulate "when-then" and "else", but I have chosen another approach, because it is easier to read (and to generate, see below): I'll use the $concat operator to write the range string, which then serves as grouping key.
So for the given collection:
db.xx.find()
{ "_id" : ObjectId("514919fb23700b41723f94dc"), "name" : "A", "timespent" : 100 }
{ "_id" : ObjectId("514919fb23700b41723f94dd"), "name" : "B", "timespent" : 200 }
{ "_id" : ObjectId("514919fb23700b41723f94de"), "name" : "C", "timespent" : 300 }
{ "_id" : ObjectId("514919fb23700b41723f94df"), "name" : "D", "timespent" : 400 }
{ "_id" : ObjectId("514919fb23700b41723f94e0"), "name" : "E", "timespent" : 500 }
the aggregate (hardcoded) looks like this:
db.xx.aggregate([
{ $project: {
"_id": 0,
"range": {
$concat: [{
$cond: [ { $lte: ["$timespent", 250] }, "range 0-250", "" ]
}, {
$cond: [ { $and: [
{ $gte: ["$timespent", 251] },
{ $lt: ["$timespent", 450] }
] }, "range 251-450", "" ]
}, {
$cond: [ { $and: [
{ $gte: ["$timespent", 451] },
{ $lt: ["$timespent", 650] }
] }, "range 450-650", "" ]
}]
}
}},
{ $group: { _id: "$range", count: { $sum: 1 } } },
{ $sort: { "_id": 1 } },
]);
and the result is:
{
"result" : [
{
"_id" : "range 0-250",
"count" : 2
},
{
"_id" : "range 251-450",
"count" : 2
},
{
"_id" : "range 450-650",
"count" : 1
}
],
"ok" : 1
}
In order to generate the aggregate command, you have to build the "range" projection as a JSON object ( or you could generate a string and then use JSON.parse(string) )
The generator looks like this:
var ranges = [ 0, 250, 450, 650 ];
var rangeProj = {
"$concat": []
};
for (i = 1; i < ranges.length; i++) {
rangeProj.$concat.push({
$cond: {
if: {
$and: [{
$gte: [ "$timespent", ranges[i-1] ]
}, {
$lt: [ "$timespent", ranges[i] ]
}]
},
then: "range " + ranges[i-1] + "-" + ranges[i],
else: ""
}
})
}
db.xx.aggregate([{
$project: { "_id": 0, "range": rangeProj }
}, {
$group: { _id: "$range", count: { $sum: 1 } }
}, {
$sort: { "_id": 1 }
}]);
which will return the same result as above.
Starting from MongoDB 3.4 we can use the $switch operator to perform a multi-switch statement in the $project stage.
The $group pipeline operator group the documents by "range" and return the "count" for each group using the $sum accumulator operator.
db.collection.aggregate(
[
{ "$project": {
"range": {
"$switch": {
"branches": [
{
"case": { "$lte": [ "$timespent", 250 ] },
"then": "0-250"
},
{
"case": {
"$and": [
{ "$gt": [ "$timespent", 250 ] },
{ "$lte": [ "$timespent", 450 ] }
]
},
"then": "251-450"
},
{
"case": {
"$and": [
{ "$gt": [ "$timespent", 450 ] },
{ "$lte": [ "$timespent", 650 ] }
]
},
"then": "451-650"
}
],
"default": "650+"
}
}
}},
{ "$group": {
"_id": "$range",
"count": { "$sum": 1 }
}}
]
)
With the following documents in our collection,
{ "_id" : ObjectId("514919fb23700b41723f94dc"), "name" : "A", "timespent" : 100 },
{ "_id" : ObjectId("514919fb23700b41723f94dd"), "name" : "B", "timespent" : 200 },
{ "_id" : ObjectId("514919fb23700b41723f94de"), "name" : "C", "timespent" : 300 },
{ "_id" : ObjectId("514919fb23700b41723f94df"), "name" : "D", "timespent" : 400 },
{ "_id" : ObjectId("514919fb23700b41723f94e0"), "name" : "E", "timespent" : 500 }
our query yields:
{ "_id" : "451-650", "count" : 1 }
{ "_id" : "251-450", "count" : 2 }
{ "_id" : "0-250", "count" : 2 }
We may want to add a $sort stage to the pipeline sort our document by range but this will only sort the documents in lexicographic order because of the type of "range".