MongoDB Query aggreate/group by and geometry/location/geoNear query - mongodb

I am trying to fetch the documents with geometry within a certain location, however only want to return a single document per UUID. For this project, in most cases, there are many documents for each UUID that match the $near selector, hence we get many documents with the same UUID.
Can anyone assist with completing the below query so it only returns a single document per uuid (most recent "date")?
db.device.find(
{
location:
{ $near :
{
$geometry: { type: "Point", coordinates: [ -73.9667, 40.78 ] },
$minDistance: 1000,
}
}
}
)
Here's an example of the collection:
{
"_id":ObjectId("5a4f1ff0fc6ded723265e6b0"),
"uuid":"user1",
"date": "2018-01-20 11:58:29.000",
"location":{
"type": "Point",
"coordinates":[
//remove for demo sake
]
}
},
{
"_id":ObjectId("5a62a245ce689f68245450a7"),
"uuid":"user2",
"date": "2018-01-20 11:58:07.000",
"location":{
"type": "Point",
"coordinates":[
//remove for demo sake
]
}
},
{
"_id":ObjectId("5a62a20fce689f7a14648c62"),
"uuid":"user1",
"date": "2018-01-20 11:58:39.000",
"location":{
"type": "Point",
"coordinates":[
//remove for demo sake
]
}
},
{
"_id":ObjectId("5a62a205ce689f7039203923"),
"uuid":"user1",
"date": "2018-01-20 11:58:49.000",
"location":{
"type": "Point",
"coordinates":[
//remove for demo sake
]
}
},
{
"_id":ObjectId("5a62a277ce689f703a3eacb3"),
"uuid":"user2",
"date": "2018-01-20 11:58:59.000",
"location":{
"type": "Point",
"coordinates":[
//remove for demo sake
]
}
}

When performing this kind of heavier operations, you can switch to using an aggregation pipeline.
Using this input:
{
"uuid": "user1",
"date": "2018-01-20 11:58:29.000",
"location": { "type": "Point", "coordinates":[-0.17818, 51.15609] }
},
{
"uuid": "user2",
"date": "2018-01-20 11:58:07.000",
"location": { "type": "Point", "coordinates":[2.35944, 48.72528] }
},
{
"uuid": "user1",
"date": "2018-01-20 11:58:39.000",
"location": { "type": "Point", "coordinates": [1.45414, 43.61132] }
},
{
"uuid": "user1",
"date": "2018-01-20 11:58:49.000",
"location": { "type": "Point", "coordinates":[-6.24889, 53.33306] }
},
{
"uuid": "user2",
"date": "2018-01-20 11:58:59.000",
"location": { "type": "Point", "coordinates":[-3.68248, 40.47184] }
}
Using this index:
db.device.createIndex( { location : "2dsphere" } )
This pipeline should perform what you want:
db.device.aggregate([
{ $match: { location: { $geoWithin: { $centerSphere: [ [ -0.17818, 51.15609 ], 0.1232135647961246 ] } } } },
{ $sort: { "date": -1 } },
{ $group: { _id: { uuid: "$uuid" }, users: { $push: { "uuid": "$uuid", "date": "$date", "location": "$location" } } } },
{ $project: { user: { $arrayElemAt: ["$users", 0] } } }
])
I first adapted the find/$near operator to an aggregation equivalent ($geoWithin/$centerSphere). It matches locations within 0.123 radians (488 kms (0.123*3963.2)).
I then directly sort by date, this way when documents will then be grouped by user, I will be able to easily select the first per user.
I then group by user.
And finally for each user, as I have a value produced by $group which is an array of the user documents (sorted), I just extract the first item of the array with $arrayElemAt.
This produces:
{
"_id" : { "uuid" : "user2" },
"user": {
"uuid": "user2",
"date": "2018-01-20 11:58:07.000",
"location": { "type": "Point", "coordinates": [ 2.35944, 48.72528 ] }
}
}
{
"_id": { "uuid" : "user1" },
"user": {
"uuid": "user1",
"date": "2018-01-20 11:58:49.000",
"location": { "type": "Point", "coordinates": [ -6.24889, 53.33306 ] }
}
}

Related

How to get a 'distanceField' in Atlas search?

When I use $geoNear I can return a calculated distance by including the "distanceField" which is extremely useful when displaying items and their distance from a location. My code for this is:
$geoNear: {
near: {
type: "Point",
coordinates: [
req.body.position.lng ,
req.body.position.lat
]
},
distanceField: "dist.calculated",
maxDistance: req.body.distance,
spherical: true
When using Atlas search, I need the same or similar "distanceField", but havn't been able to find a solution. I am open to other workarounds. Here's my $search pipeline:
"$search": {
"index": "searchResults",
"compound": {
"must": [
{
"text": {
"query": `${req.body.query}`,
path: [
"name",
"description",
"tag", ],
"fuzzy": {
"maxEdits": 2,
"prefixLength": 4
}
}
},
{
"geoWithin": {
"circle": {
"center": {
"type": "Point",
"coordinates": [
req.body.position.lng,
req.body.position.lat
]
},
"radius": req.body.distance,
},
"path": "location",
}
}
],
"filter":[{
"text": {
"query": "Active",
"path": "active",
}
}]
}
}
}```

$geoNear for a list embeded locations and return distance for all location in list

I have an user collection:
{
"name": "David",
"age": 20,
"addresses": [
{
"radius": 10000,
"location": {
"type": "Point",
"coordinates": [106.785299, 20.999999]
}
},
{
"radius": 30000,
"location": {
"type": "Point",
"coordinates": [105.785299, 20.979733]
}
}
]
}
Each user will have one or more address. I want to calculate the distance between these addresses with a point, then using calculated distance to compare with radius of each address. If distance < radius then keep address else remove address from addresses list. I am using below query:
db.collection.aggregrate(
{
"$geoNear": {
"near": {"type": "Point", "coordinates": [ 105.823620, 21.006047 ]},
"distanceField": "distance",
"key": "addresses.location"
}
}
)
But this query only return the distance of nearest address, like this:
{
"name": "David",
"age": 20,
"addresses": [
{
"radius": 10000,
"location": {
"type": "Point",
"coordinates": [105.785299, 20.979733]
}
},
{
"radius": 30000,
"location": {
"type": "Point",
"coordinates": [105.785299, 20.979733]
}
}
],
"distance": 110000 // <--- distance is added here, just for nearest addrest
}
My expected result:
{
"name": "David",
"age": 20,
"addresses": [
{
"radius": 10000,
"location": {
"type": "Point",
"coordinates": [105.785299, 20.979733]
},
"distance": 2000``// <------ add distance here for each addesss`
},
{
"radius": 30000,
"location": {
"type": "Point",
"coordinates": [105.785299, 20.979733]
},
"distance": 30000 // <------ add distance here for each addesss
}
]
}
So next stage I can compare distance with radius and keep proper adddress
Anybody know how to do it ? thanks
You need to store each address in an individual document:
{
"_id" : ObjectId("5ec77d127df107cd889d567d"),
"name" : "David",
"age" : 20,
"addresses" : {
"radius" : 10000,
"location" : {
"type" : "Point",
"coordinates" : [
105.785299,
20.979733
]
}
}
},
{
"_id" : ObjectId("5ec77f7843732e8f9a63bf67"),
"name" : "David",
"age" : 20,
"addresses" : {
"radius" : 30000,
"location" : {
"type" : "Point",
"coordinates" : [
105.795299,
20.989733
]
}
}
}
Now, we perform $geoNear and $group stages:
db.user.aggregate([
{
"$geoNear": {
"near": {
"type": "Point",
"coordinates": [
105.823620,
21.006047
]
},
"distanceField": "distance",
"key": "addresses.location"
}
},
{
"$group": {
"_id": "$name",
"name": {
"$first": "$name"
},
"age": {
"$first": "$age"
},
"addresses": {
"$push": {
"$mergeObjects": [
"$addresses",
{
"distance": "$distance"
}
]
}
}
}
}
])

Mongo Aggregation using $Max

I have a collection that stores history, i.e. a new document is created every time a change is made to the data, I need to extract fields based on the max value of a date field, however my query keeps returning either all of the dates or requires me to push the fields into an array which make the data hard to analyze for an end-user.
Expected output as CSV:
MAX(DATE), docID, url, type
1579719200216, 12371, www.foodnetwork.com, food
1579719200216, 12371, www.cnn.com, news,
1579719200216, 12371, www.wikipedia.com, info
Sample Doc:
{
"document": {
"revenueGroup": "fn",
"metaDescription": "",
"metaData": {
"audit": {
"lastModified": 1312414124,
"clientId": ""
},
"entities": [],
"docId": 1313943,
"url": ""
},
"rootUrl": "",
"taggedImages": {
"totalSize": 1,
"list": [
{
"image": {
"objectId": "woman-reaching-for-basket",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Facebook",
"alt": "Woman reaching for basket"
},
"tags": {
"totalSize": 4,
"list": []
}
}
]
},
"title": "The 8 Best Food Items of 2020",
"socialTitle": "The 8 Best Food Items of 2020",
"primaryImage": {
"objectId": "woman-reaching-for-basket.jpg",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Hero Images / Getty Images",
"alt": "Woman reaching for basket in laundry room"
},
"subheading": "Reduce your footprint with these top-performing diets",
"citations": {
"list": []
},
"docId": 1313943,
"revisionId": "1313943_1579719200216",
"templateType": "LIST",
"documentState": {
"activeDate": 579719200166,
"state": "ACTIVE"
}
},
"url": "",
"items": {
"totalSize": "",
"list": [
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "food",
"id": "https://www.foodnetwork.com"
}
]
},
"type": ""
},
"id": 4,
"uuid": "1313ida-qdad3-42c3-b41d-223q2eq2j"
},
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "news",
"id": "https://www.cnn.com"
},
{
"type": "info",
"id": "https://www.wikipedia.com"
}
]
},
"type": "PRODUCT"
},
"id": 11,
"uuid": "318231jc-da12-4475-8994-283u130d32"
}
]
},
"vertical": "food"
}
Below query:
db.collection.aggregate([
{
$match: {
vertical: "food",
"document.documentState.state": "ACTIVE",
"document.templateType": "LIST"
}
},
{
$unwind: "$document.items"
},
{
$unwind: "$document.items.list"
},
{
$unwind: "$document.items.list.contents"
},
{
$unwind: "$document.items.list.contents.list"
},
{
$match: {
"document.items.list.contents.list.type": "recipe",
"document.revenueGroup": "fn"
}
},
{
$sort: {
"document.revisionId": -1
}
},
{
$group: {
_id: {
_id: {
docId: "$document.docId",
date: {$max: "$document.revisionId"}
},
url: "$document.items.list.contents.list.data.comInfo.list.id",
type: "$document.items.list.contents.list.data.comInfo.list.type"
}
}
},
{
$project: {
_id: 1
}
},
{
$sort: {
"document.items.list.contents.list.id": 1, "document.revisionId": -1
}
}
], {
allowDiskUse: true
})
First of all, you need to go through the documentation of the $group aggregation here.
you should be doing this instead:
{
$group: {
"_id": "$document.docId"
"date": {
$max: "$document.revisionId"
},
"url": {
$first: "$document.items.list.contents.list.data.comInfo.list.id"
},
"type": {
$first:"$document.items.list.contents.list.data.comInfo.list.type"
}
}
}
This will give you the required output.

How to group multiple documents in mongodb aggregation

I have a collection named offers and a sample documents are below,
{
"offerId": "3a06d230-5836-44c2-896b-f5bfb6b27a77",
"outlets": {
"storeUuid": "b3da5136-15a4-4593-aabd-4788f7d80f19",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
}"startTime": "2018-04-06T08:03:37.954Z",
"endTime": "2018-04-07T07:35:00.046Z"
},
{
"offerId": "3a06d230-5836-44c2-896b-f5bfb6b27a77",
"outlets": {
"storeUuid": "f18a9a9e-539e-4a9e-b313-d947e2ce76de",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
"startTime": "2018-04-06T08:03:37.954Z",
"endTime": "2018-04-07T07:35:00.046Z"
},
{
"offerId": "e6c1f140-6407-4481-9a18-56789d90f549",
"outlets": {
"storeUuid": "b3cdd08d-f7f5-4544-8279-08489974148c",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
"startTime": "2018-04-05T12:30:37.954Z",
"endTime": "2018-04-08T12:38:00.046Z"
},
{
"offerId": "e6c1f140-6407-4481-9a18-56789d90f549",
"outlets": {
"storeUuid": "09d6fc18-9d5c-4b4f-8de1-c6f555b8a370",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
"startTime": "2018-04-05T12:30:37.954Z",
"endTime": "2018-04-08T12:38:00.046Z"
},
{
"offerId": "e6c1f140-6407-4481-9a18-56789d90f549",
"outlets": {
"storeUuid": "bf71e102-9da1-47b5-81e1-98d27f20bcf4",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
"startTime": "2018-04-05T12:30:37.954Z",
"endTime": "2018-04-08T12:38:00.046Z"
}
I want to group by offerId and result should be
[
{
"offerId": "e6c1f140-6407-4481-9a18-56789d90f549",
"outlet": [
{
"storeUuid": "bf71e102-9da1-47b5-81e1-98d27f20bcf4",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
{
"storeUuid": "09d6fc18-9d5c-4b4f-8de1-c6f555b8a370",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
{
"storeUuid": "b3cdd08d-f7f5-4544-8279-08489974148c",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
}
],
"startTime": "2018-04-05T12:30:37.954Z",
"endTime": "2018-04-08T12:38:00.046Z"
},
{
"offerId": "3a06d230-5836-44c2-896b-f5bfb6b27a77",
"outlet": [
{
"storeUuid": "f18a9a9e-539e-4a9e-b313-d947e2ce76de",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
},
{
"storeUuid": "b3da5136-15a4-4593-aabd-4788f7d80f19",
"location": {
"type": "Point",
"coordinates": [
77,
22
]
}
}
],
"startTime": "2018-04-06T08:03:37.954Z",
"endTime": "2018-04-07T07:35:00.046Z"
}
]
My aggregation query so far,
db.offers.aggregate([
{
$group: {
_id: "$offerId",
outlet: {
$addToSet: "$outlets"
}
}
}
])
Any help would be appreciated
Do it like this
Add a projection of the fields that you want.
Group by your desired field
Create a new propertie and push to it the nested fields
db.getCollection('offers').aggregate([
{ $project : { offerId : 1 , outlets : 1, startTime: 1, endTime: 1 } },
{ $group: {
_id: "$offerId" ,
outlet: {
$push: {
storeUuid : "$outlets.storeUuid",
location: "$outlets.location"
}
},
startTime: { "$first": "$startTime"},
endTime: { "$first": "$endTime"}
}
}
])
$addToSet: Returns an array of all unique values that results from
applying an expression to each document in a group of documents that
share the same group by key. Order of the elements in the output array
is unspecified. can be used for
$push: Returns an array of all values that result from applying an
expression to each document in a group of documents that share the
same group by key.
In Your case $push operator is required:
db.offers.aggregate([
{$unwind:'$outlets'},
{$group:{_id:'$offerId',outlets:{$push:'$outlets'}}}
])
For more:
https://docs.mongodb.com/manual/reference/operator/aggregation/push/#grp._S_push
https://docs.mongodb.com/manual/reference/operator/aggregation/addToSet/
For complete list of Aggregation pipeline operators
https://docs.mongodb.com/manual/reference/operator/aggregation/

Using mongodb aggregate to match documents and get all values for a field

In Mongo 3.4
I have a collection with documents in the format:
Type1:
{
"Level1": {
"#version": "genR",
"#revision": "aux",
"Level2": {
"container": {
"type": "ARRAY",
"categories": [
{
"category": [
{
"Type": "STRING",
"Value": "Currency"
},
{
"Type": "STRING",
"Value": "EUR"
}
]
},
{
"category": [
{
"Type": "STRING",
"Value": "Portfolio"
},
{
"Type": "STRING",
"Value": "ABCDEF"
}
]
},
]
}
}
}
}
Type 2:
{
"Level1": {
"#version": "genR",
"#revision": "aux",
"Level2": {
"container": {
"type": "ARRAY",
"categories": [
{
"category": [
{
"Type": "STRING",
"Value": "Currency"
},
{
"Type": "STRING",
"Value": "EUR"
}
]
},
{
"category": [
{
"Type": "STRING",
"Value": "Portfolio"
},
{
"Type": "STRING",
"Value": "ABCDEF"
}
]
},
{
"category": [
{
"Type": "STRING",
"Value": "Short Description"
},
{
"Type": "STRING",
"Value": "Cash Only"
}
]
},
]
}
}
}
}
How do i write an aggregate statement so that I get ALL the Currency Values, ONLY from the documents where Portfolio matches a certain value.
I have been using pymongo's aggregate framework as below:
pipeline = [{"$unwind":"$Level1.Level2.container.categories"},{"$unwind":"$Level1.Level2.container.categories.category"},{"$match":{"Level1.Level2.container.categories.category.Value":"Portfolio"}}]
pprint(db.command('aggregate',collection,pipeline=pipeline))
But no results. Pymongo is a little confusing. Even if someone can point the general approach, it would really help.
The expected response assuming 4 matching documents (each with varying number of category items) is:
{'Currency': [{'Level1': {'Level2': {'container': {'categories': {'category': {'Value': 'EUR'}}}}}},
{'Level1': {'Level2': {'container': {'categories': {'category': {'Value': 'EUR'}}}}}},
{'Level1': {'Level2': {'container': {'categories': {'category': {'Value': 'USD'}}}}}},
{'Level1': {'Level2': {'container': {'categories': {'category': {'Value': 'EUR'}}}}}}]}
Your structure is not ideal but you can use below query.
The below $match stage $ands two conditions. Looks in category array ($elemMatch) under categories ($elemMatch) array for elements satisfying both ($all) Portfolio match with ABCDEF value condition followed by condition for element with Currrency value.
$unwind stage is to break down the categories followed by $match to keep the Currency category embedded array documents.
$unwind stage is to break down the category followed by $match to remove the Currency Value embedded document.
Final two stages is to $group + $push the remaining data into embedded array and $project the Currency value.
You can run one stage at a time to view the intermediate output for better understanding.
db.collection.aggregate(
{ $match :
{ $and :
[
{ "Level1.Level2.container.categories":
{ $elemMatch:
{ "category":
{ $all:
[
{ $elemMatch : { "Type": "STRING", "Value": "Portfolio" } },
{ $elemMatch : { "Type": "STRING", "Value": "ABCDEF" } }
]
}
}
}
},
{ "Level1.Level2.container.categories":
{ $elemMatch:
{ "category":
{ $elemMatch : { "Type": "STRING", "Value": "Currency" } }
}
}
}
]
}
},
{ $unwind : "$Level1.Level2.container.categories" },
{ $match : { "Level1.Level2.container.categories.category.Value": "Currency" } },
{ $unwind : "$Level1.Level2.container.categories.category" },
{ $match : { "Level1.Level2.container.categories.category.Value": { $ne : "Currency" } } },
{ $group: { _id: null, "Currency": { $push: "$$ROOT" } } },
{ $project: { _id: 0, "Currency.Level1.Level2.container.categories.category.Value": 1 } } )