Running mongodb $geoWithin without a [long,lat] array - mongodb

I have a mongodb $geoWithin query as followed
db.test.find(
{
'loc': {
$geoWithin: {
$geometry: {
type : "Polygon" ,
coordinates: [[list of co-ordinates]]
}
}
}
}
);
So here the query runs on the loc field which is a array of lng, lat values but fortunately in my data, lat and lng values are in 2 different fields like
{
lat:12,
long:122
}
In this case how can I run the above query?

The best thing you can do really is to tranform your documents to store the data better. By preference you should probably go for GeoJSON format. But more later.
Fortunately since $geoWithin does not actually "require" an index ( but it really still is the better option to have one ) you can actually do the transformation "on the fly" with the aggregation framework instead:
Transform "On the Fly"
Hoping you have at least MongoDB 2.6, there is $map:
db.collection.aggregate([
// Tranform to array
{ "$project": {
"location": {
"$map": {
"input": ["lng","lat"],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "lng" ] },
"$long",
"$lat"
}
}
}
}
}},
// Then match
{ "$match": {
"location": {
"$geoWithin": {
"$geometry": {
"type": "Polygon" ,
"coordinates": [[list of co-ordinates]]
}
}
}
}}
])
MongoDB 3.2 has a much simpler syntax:
db.collection.aggregate([
// Tranform to array - pretty simple huh!
{ "$project": {
"location": ["$long","$lat"]
}},
// Then match
{ "$match": {
"location": {
"$geoWithin": {
"$geometry": {
"type": "Polygon" ,
"coordinates": [[list of co-ordinates]]
}
}
}
}}
])
Or if you still have MongoDB 2.4 - Upgrade! Okay, use this then:
db.collection.aggregate([
// Add an array field
{ "$project": {
"long": 1,
"lat": 1
"location": { "$const": [ "A", "B" ] }
}},
// Unwind it
{ "$unwind": "$location" },
// Group back and map it!
{ "$group": {
"_id": "$_id",
"location": {
"$push": {
"$cond": [
{ "$eq": [ "$location", "A" ] },
"$long",
"$lat"
]
}
}
}},
// Then match
{ "$match": {
"location": {
"$geoWithin": {
"$geometry": {
"type": "Polygon" ,
"coordinates": [[list of co-ordinates]]
}
}
}
}}
])
Tranform "Permanently"
But really the best case is to change the document structure permanantly. The modern way to do this is in bulk with something like:
var ops = [];
db.collection.find({}).forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": {
"location": {
"type": "Point",
"coordinates": [doc.long,doc.lat]
}
},
"$unset": { "long": "", "lat": "" }
}
}
});
// Send once in 1000 only
if ( ops.length % 1000 == 0 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
// Clear remaining queue
if ( ops.length > 0 )
db.collection.bulkWrite(ops);
But generally speaking, loop the source documents and update each one to create the new "location" field. Then of course "index" it:
db.collection.createIndex({ "location": "2dsphere" })
And now the documents look like that and actually have an index, you can use regular $geoWithin queries directly which will also work faster from the present indexed data.

Related

Mongo DB: How to aggregate on self Collection to pull Value(s) associated to the IDs in Hierarchical Dataset

I'm trying to write an aggregation in Mongo which would result in something similar to SQL.
I'm now trying to achieve the same in Mongo with the above collection.
Please Suggest me how to build Mongo Aggregation in order to achieve my output.
Unwind both module_details and module_child then match them.
[
{
"$unwind": "$module.module_details.data"
},
{
"$unwind": "$module.module_child.data"
},
{
"$match": {
"$expr": {
"$eq": [
"$module.module_details.data.module_child_id",
"$module.module_child.data.module_child_id"
]
}
}
},
{
"$project": {
"_id": 0,
"module_id:": "$module.module_details.data.module_id",
"name": "$module.module_child.data.name",
"value": "$module.module_details.data.value"
}
}
]
You probably need to match on module_id as well. However, it was not a part of the question.
[
{
"$match": {
"module_id": "9898"
}
},
{
"$unwind": "$module.module_details.data"
},
{
"$unwind": "$module.module_child.data"
},
{
"$match": {
"$expr": {
"$eq": [
"$module.module_details.data.module_child_id",
"$module.module_child.data.module_child_id"
]
}
}
},
{
"$project": {
"_id": 0,
"module_id:": "$module.module_details.data.module_id",
"name": "$module.module_child.data.name",
"value": "$module.module_details.data.value"
}
}
]

MongoDB Aggregate Query to find the documents with missing values

I am having a huge collection of objects where the data is stored for different employees.
{
"employee": "Joe",
"areAllAttributesMatched": false,
"characteristics": [
{
"step": "A",
"name": "house",
"score": "1"
},
{
"step": "B",
"name": "car"
},
{
"step": "C",
"name": "job",
"score": "3"
}
]
}
There are cases where the score for an object is completely missing and I want to find out all these details from the database.
In order to do this, I have written the following query, but seems I am going wrong somewhere due to which it is not displaying the output.
I want the data in the following format for this query, so that it is easy to find out which employee is missing the score for which step and which name.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
"characteristics.score": {
"$exists": false
}
}
},
{
"$project": {
"employee": 1,
"name": "$characteristics.name",
"step": "$characteristics.step",
_id: 0
}
}
])
You need to use $exists to check the existence
playground
You can use $ifNull to handle both cases of 1. the score field is missing 2. score is null.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
$expr: {
$eq: [
{
"$ifNull": [
"$characteristics.score",
null
]
},
null
]
}
}
},
{
"$group": {
_id: null,
documents: {
$push: {
"employee": "$employee",
"name": "$characteristics.name",
"step": "$characteristics.step",
}
}
}
},
{
$project: {
_id: false
}
}
])
Here is the Mongo playground for your reference.

How to select related events from the same table?

How do I make the next self query on MongoDB?
SELECT e.user_id AS user_id,
e.datetime AS started_at,
(SELECT MIN(datetime) ## taking the closest "end" event datetime of that userId ##
FROM events
WHERE type = "end" AND
user_id = e.user_id AND
datetime > e.datetime) AS end_at,
FROM events AS e
WHERE e.type = "start"
Over the next event data table:
{"_id" : "1", "type": "start", "datetime": "2022-02-01T10:15Z", "userId": "1"},
{"_id" : "2", "type": "end", "datetime": "2022-02-01T10:20Z", "userId": "1"},
{"_id" : "3", "type": "start", "datetime": "2022-02-01T10:16Z", "userId": "2"},
{"_id" : "4", "type": "end", "datetime": "2022-02-01T10:21Z", "userId": "2"},
{"_id" : "5", "type": "start", "datetime": "2022-02-02T11:01Z", "userId": "1"},
{"_id" : "6", "type": "end", "datetime": "2022-02-02T11:02Z", "userId": "1"}
The expected result should look like:
user_id
started_at
end_at
1
2022-02-01T10:15Z
2022-02-01T10:20Z
2
2022-02-01T10:16Z
2022-02-01T10:21Z
1
2022-02-02T11:01Z
2022-02-02T11:02Z
Maybe something like this:
db.collection.aggregate([
{
$sort: {
"datetime": 1
}
},
{
$project: {
"d": {
k: "$type",
v: "$datetime"
},
userId: 1
}
},
{
$group: {
_id: "$userId",
e: {
$push: "$d"
}
}
},
{
$addFields: {
e: {
$map: {
input: {
$range: [
0,
{
$size: "$e"
},
2
]
},
as: "index",
in: {
$slice: [
"$e",
"$$index",
2
]
}
}
}
}
},
{
$unwind: "$e"
},
{
$project: {
events: {
"$arrayToObject": "$e"
}
}
},
{
$project: {
userId: "$_id",
start_at: "$events.start",
end_at: "$events.end",
_id: 0
}
}
])
Explailed:
( The solution will work only if the user events start / end sequentially )
Sort the documents by datetime.
Rename the fields type & datetime to k,v ( suitable for $arrayToObject )
Group the documents per userId ( Note this solution has the limitation that total number of events must not exceed 16MB per userId)
Split the events per date/time pairs (start+end , considering user cannot start new event if the previous has not finished)
$unwind the events array
Convert start/end array to object.
Project the fields as per the expected output.
playground
Not sure what the exact use case is , but in general looks abit more practical if you add sessionId for every event document so if user can start paralel sessions the start/end events to be possible easier to correlate based on sessionId.
Here's a pipeline that closely (exactly?) follows your SQL. I converted the string datetime to ISODate to insure comparisons were done properly, but perhaps this is unecessary.
db.collection.aggregate([
{
// match each start
"$match": { "type": "start" }
},
{ // lookup ends for userId in collection
"$lookup": {
"from": "collection",
"localField": "userId",
"foreignField": "userId",
"let": {
"isoDate": {
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
}
},
"pipeline": [
{
"$match": {
"type": "end",
"$expr": {
"$gt": [
{
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
},
"$$isoDate"
]
}
}
}
],
"as": "endArray"
}
},
{ // output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// assumes original collection was sorted
"$first": "$endArray.datetime"
}
}
}
])
Try it on mongoplayground.net.
Here's another pipeline that uses "$setWindowFields", but it's not ideal. I don't know how to filter "$setWindowFields" "output" given the allowed operators, etc., but it works. Improvement comments welcome!
db.collection.aggregate([
{
// add winField sorted array to each doc
// containing userId docs following
// current doc
"$setWindowFields": {
"partitionBy": "$userId",
"sortBy": { "datetime": 1 },
"output": {
"winField": {
"$push": "$$CURRENT",
"window": {
"documents": [ 1, "unbounded" ]
}
}
}
}
},
{
// just keep start docs
"$match": { "type": "start" }
},
{
// sorting on start datetime
"$sort": { "datetime": 1 }
},
{
// output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// grab first end datetime
"$getField": {
"field": "datetime",
"input": {
"$first": {
"$filter": {
"input": "$winField",
"cond": { "$eq": [ "$$this.type", "end" ] }
}
}
}
}
}
}
}
])
Try it on mongoplayground.net.

How to sum values in a nested date range in MongoDB

I need to sum the values for 2018-06-01 through 2018-06-30 for each document in the collection. Each key in "days" is a different date and value. What should the mongo aggregate command look like? Result should look something like {
_id: Product_123 ,
June_Sum:
value}
That's really not a great structure for the sort of operation you now want to do. The whole point of keeping data in such a format is that you "increment" it as you go.
For example:
var now = Date.now(),
today = new Date(now - ( now % ( 1000 * 60 * 60 * 24 ))).toISOString().substr(0,10);
var product = "Product_123";
db.counters.updateOne(
{
"month": today.substr(0,7),
"product": product
},
{
"$inc": {
[`dates.${today}`]: 1,
"totals": 1
}
},
{ "upsert": true }
)
In that way the subsequent updates with $inc apply to both the "key" used for the "date" and also increment the "totals" property of the matched document. So after a few iterations you would end up with something like:
{
"_id" : ObjectId("5af395c53945a933add62173"),
"product": "Product_123",
"month": "2018-05",
"dates" : {
"2018-05-10" : 2,
"2018-05-09" : 1
},
"totals" : 3
}
If you're not actually doing that then you "should" be since it's the intended usage pattern for such a structure.
Without keeping a "totals" or like type of entry within the document(s) storing these keys the only methods left for "aggregation" in processing are to effectively coerce the the "keys" into an "array" form.
MongoDB 3.6 with $objectToArray
db.colllection.aggregate([
// Only consider documents with entries within the range
{ "$match": {
"$expr": {
"$anyElementTrue": {
"$map": {
"input": { "$objectToArray": "$days" },
"in": {
"$and": [
{ "$gte": [ "$$this.k", "2018-06-01" ] },
{ "$lt": [ "$$this.k", "2018-07-01" ] }
]
}
}
}
}
}},
// Aggregate for the month
{ "$group": {
"_id": "$product", // <-- or whatever your key for the value is
"total": {
"$sum": {
"$sum": {
"$map": {
"input": { "$objectToArray": "$days" },
"in": {
"$cond": {
"if": {
"$and": [
{ "$gte": [ "$$this.k", "2018-06-01" ] },
{ "$lt": [ "$$this.k", "2018-07-01" ] }
]
},
"then": "$$this.v",
"else": 0
}
}
}
}
}
}
}}
])
Other versions with mapReduce
db.collection.mapReduce(
// Taking the same presumption on your un-named key for "product"
function() {
Object.keys(this.days)
.filter( k => k >= "2018-06-01" && k < "2018-07-01")
.forEach(k => emit(this.product, this.days[k]));
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 },
"query": {
"$where": function() {
return Object.keys(this.days).some(k => k >= "2018-06-01" && k < "2018-07-01")
}
}
}
)
Both are pretty horrible since you need to calculate whether the "keys" fall within the required range even to select the documents and even then still filter through the keys in those documents again in order to decide whether to accumulate for it or not.
Also noting here that if your "Product_123' is also the "name of a key" in the document and NOT a "value", then you're performing even more "gymnastics" to simply convert that "key" into a "value" form, which is how databases do things and the whole point of the the unnecessary coercion going on here.
Better Option
So as opposed to the handling as originally shown where you "should" be accumulating "as you go" with every write to the document(s) at hand, the better option than needing "processing" in order to coerce into an array format is to simply put the data into an array in the first place:
{
"_id" : ObjectId("5af395c53945a933add62173"),
"product": "Product_123",
"month": "2018-05",
"dates" : [
{ "day": "2018-05-09", "value": 1 },
{ "day": "2018-05-10", "value": 2 }
},
"totals" : 3
}
These are infinitely better for purposes of query and further analysis:
db.counters.aggregate([
{ "$match": {
// "month": "2018-05" // <-- or really just that, since it's there
"dates": {
"day": {
"$elemMatch": {
"$gte": "2018-05-01", "$lt": "2018-06-01"
}
}
}
}},
{ "$group": {
"_id": null,
"total": {
"$sum": {
"$sum": {
"$filter": {
"input": "$dates",
"cond": {
"$and": [
{ "$gte": [ "$$this.day", "2018-05-01" ] },
{ "$lt": [ "$$this.day", "2018-06-01" ] }
]
}
}
}
}
}
}}
])
Which is of course really efficient, and kind of deliberately avoiding the "total" field that is already there for demonstration only. But of course you keep the "running accumulation" on writes by doing:
db.counters.updateOne(
{ "product": product, "month": today.substr(0,7)}, "dates.day": today },
{ "$inc": { "dates.$.value": 1, "total": 1 } }
)
Which is really simple. Adding upserts adds a "little" more complexity:
// A "batch" of operations with bulkWrite
db.counter.bulkWrite([
// Incrementing the matched element
{ "udpdateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
"dates.day": today
},
"update": {
"$inc": { "dates.$.value": 1, "total": 1 }
}
}},
// Pushing a new "un-matched" element
{ "updateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
"dates.day": { "$ne": today }
},
"update": {
"$push": { "dates": { "day": today, "value": 1 } },
"$inc": { "total": 1 }
}
}},
// "Upserting" a new document were not matched
{ "updateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
},
"update": {
"$setOnInsert": {
"dates": [{ "day": today, "value": 1 }],
"total": 1
}
},
"upsert": true
}}
])
But generally your getting the "best of both worlds" by having something simple to accumulate "as you go" as well as something that's easy and efficient to query and do other analysis on later.
The overall moral of the story is to "choose the right structure" for what you actually want to do. Don't put things into "keys" which are clearly intended to be used as "values", since it's an anti-pattern which just adds complexity and inefficiency to the rest of your purposes, even if it seemed right for a "single" purpose when you originally stored it that way.
NOTE Also not really advocating storing "strings" for "dates" in any way here. As noted the better approach is to use "values" where you really mean "values" you intend to use. When storing date data as a "value" it is always far more efficient and practical to store as a BSON Date, and NOT a "string".

How to find match in documents in Mongo and Mongo aggregation?

I have following json structure in mongo collection-
{
"students":[
{
"name":"ABC",
"fee":1233
},
{
"name":"PQR",
"fee":345
}
],
"studentDept":[
{
"name":"ABC",
"dept":"A"
},
{
"name":"XYZ",
"dept":"X"
}
]
},
{
"students":[
{
"name":"XYZ",
"fee":133
},
{
"name":"LMN",
"fee":56
}
],
"studentDept":[
{
"name":"XYZ",
"dept":"X"
},
{
"name":"LMN",
"dept":"Y"
},
{
"name":"ABC",
"dept":"P"
}
]
}
Now I want to calculate following output.
if students.name = studentDept.name
so my result should be as below
{
"name":"ABC",
"fee":1233,
"dept":"A",
},
{
"name":"XYZ",
"fee":133,
"dept":"X"
}
{
"name":"LMN",
"fee":56,
"dept":"Y"
}
Do I need to use mongo aggregation or is it possible to get above given output without using aggregation???
What you are really asking here is how to make MongoDB return something that is actually quite different from the form in which you store it in your collection. The standard query operations do allow a "limitted" form of "projection", but even as the title on the page shared in that link suggests, this is really only about "limiting" the fields to display in results based on what is present in your document already.
So any form of "alteration" requires some form of aggregation, which with both the aggregate and mapReduce operations allow to "re-shape" the document results into a form that is different from the input. Perhaps also the main thing people miss with the aggregation framework in particular, is that it is not just all about "aggregating", and in fact the "re-shaping" concept is core to it's implementation.
So in order to get results how you want, you can take an approach like this, which should be suitable for most cases:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$group": {
"_id": "$students.name",
"tfee": { "$first": "$students.fee" },
"tdept": {
"$min": {
"$cond": [
{ "$eq": [
"$students.name",
"$studentDept.name"
]},
"$studentDept.dept",
false
]
}
}
}},
{ "$match": { "tdept": { "$ne": false } } },
{ "$sort": { "_id": 1 } },
{ "$project": {
"_id": 0,
"name": "$_id",
"fee": "$tfee",
"dept": "$tdept"
}}
])
Or alternately just "filter out" the cases where the two "name" fields do not match and then just project the content with the fields you want, if crossing content between documents is not important to you:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$studentDept.dept",
"same": { "$eq": [ "$students.name", "$studentDept.name" ] }
}},
{ "$match": { "same": true } },
{ "$project": {
"name": 1,
"fee": 1,
"dept": 1
}}
])
From MongoDB 2.6 and upwards you can even do the same thing "inline" to the document between the two arrays. You still want to reshape that array content in your final output though, but possible done a little faster:
db.collection.aggregate([
// Compares entries in each array within the document
{ "$project": {
"students": {
"$map": {
"input": "$students",
"as": "stu",
"in": {
"$setDifference": [
{ "$map": {
"input": "$studentDept",
"as": "dept",
"in": {
"$cond": [
{ "$eq": [ "$$stu.name", "$$dept.name" ] },
{
"name": "$$stu.name",
"fee": "$$stu.fee",
"dept": "$$dept.dept"
},
false
]
}
}},
[false]
]
}
}
}
}},
// Students is now an array of arrays. So unwind it twice
{ "$unwind": "$students" },
{ "$unwind": "$students" },
// Rename the fields and exclude
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$students.dept"
}},
])
So where you want to essentially "alter" the structure of the output then you need to use one of the aggregation tools to do. And you can, even if you are not really aggregating anything.