Mongodb Aggregation orderless items in Group _id - mongodb

I am storing messages between users in a collection with this schema:
{
"_id" : ObjectId("5b23c455e3fce278f9e8d05f"), //message id
"f" : ObjectId("5ad13aaa1ba073601cc16bca"), //sender userid
"c" : "Hi", //contents
"t" : ObjectId("5ad2de5c691a4008cf6923b4"), //reciever userid
}
I am trying to query db to generate a list of current user conversations just like whatsapp list with the last message embedded using this aggregation:
db.getCollection('message').aggregate(
[
{ $match: { $or: [ { f: ObjectId("5ad13aaa1ba073601cc16bca") }, {t:ObjectId("5ad13aaa1ba073601cc16bca")} ] } },
{
$group : {
_id :{f:"$f",t:"$t"},
c: { $push: "$$ROOT" }
}
}
]
)
Result is:
{
"_id" : {
"f" : ObjectId("5ad13aaa1ba073601cc16bca"),
"t" : ObjectId("5ad2de5c691a4008cf6923b4")
},
"c" : [
{
"_id" : ObjectId("5b23c455e3fce278f9e8d05f"),
"f" : ObjectId("5ad13aaa1ba073601cc16bca"),
"c" : "Hi",
"t" : ObjectId("5ad2de5c691a4008cf6923b4"),
"d" : ISODate("2018-06-15T13:48:34.000Z"),
}
]
},
{
"_id" : {
"f" : ObjectId("5ad2de5c691a4008cf6923b4"),
"t" : ObjectId("5ad13aaa1ba073601cc16bca")
},
"c" : [
{
"_id" : ObjectId("5b235fea43966a767d2d9604"),
"f" : ObjectId("5ad2de5c691a4008cf6923b4"),
"c" : "Hello",
"t" : ObjectId("5ad13aaa1ba073601cc16bca"),
"d" : ISODate("2018-06-15T06:40:07.000Z"),
}
]
}
As you can see, there is a conversation between 5ad13aaa1ba073601cc16bca and 5ad2de5c691a4008cf6923b4. The group acts on f and t with their order. But we do just need to find conversations regardless of the order of f and t. Thus, the result document should be just like this with the latest message embedded:
{
"_id" : {
"x" : ObjectId("5ad13aaa1ba073601cc16bca"),
"y" : ObjectId("5ad2de5c691a4008cf6923b4")
},
"c" : [
{
"_id" : ObjectId("5b23c455e3fce278f9e8d05f"),
"f" : ObjectId("5ad13aaa1ba073601cc16bca"),
"c" : "Hi",
"t" : ObjectId("5ad2de5c691a4008cf6923b4"),
"d" : ISODate("2018-06-15T13:48:34.000Z"),
}
]
}
How can I handle this with aggregation? Any suggestions? Thanks.

Of course, you can handle it with an aggregation! As your _ids are ObjectIds, you can compare them with relational operators, therefore you can use $max and $min on them:
db.getCollection('message').aggregate([
{$match: {$or: [{f: _id}, {t: _id}]}},
{$group: {
_id: {
x: {$max: ['$f', '$t']},
y: {$min: ['$f', '$t']}
},
c: {$push: '$$ROOT'}}
}
])

Related

Mongoose, filter subdocument array by date

[{
"_id" : ObjectId("5f3d0f13fd6fd6667f8f56d6"),
"name" : "A",
"prices" : [
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fb"),
"d" : ISODate("2019-08-19T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fc"),
"d" : ISODate("2019-08-20T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f81f57fc"),
"d" : ISODate("2019-08-21T00:00:00.000Z"),
"h" : 182.1,
}
]
}]
Input:
from: '2019-08-20'
to: '2019-08-21'
Exepected output
[{
"_id" : ObjectId("5f3d0f13fd6fd6667f8f56d6"),
"name" : "A",
"prices" : [
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fc"),
"d" : ISODate("2019-08-20T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f81f57fc"),
"d" : ISODate("2019-08-21T00:00:00.000Z"),
"h" : 182.1,
}
]
}]
So I want to filter prices array so it only returns items within the given date range based on variable d
So some form of aggregation.
mongoose.model("stock").aggregate(...)
Some combination of $unwind $filter, $gte, $gle
You can do as below
db.collection.aggregate([
{
$project: {
items: {
$filter: {
input: "$prices",
as: "price",
cond: {
"$and": [//only date conditions
{
$gte: [
"$$price.d",
new Date("2019-08-20")
]
},
{
$lte: [
"$$price.d",
new Date("2019-08-21")
]
}
]
}
}
}
}
}
])
play

Find duplicate key in embedded sub document in mongodb

I am trying to craft a query that will allow me to find duplicate keys in subdocument in MongoDB.
It needs to be able to query any number of documents and see what keys are duplicated across them in a subdocument. The key of my subdocument is called attributes and I need to be able to target a particular query of documents and pull out duplicate attribute keys that they all share.
EDIT:
I forgot to mention that I do not know the names of the attributes ahead of time. I need to be able to essentially select distinct attributes that they share and aggregate the values.
Collection Sample:
[
{
sku: '123',
attributes: {
size: 'L',
custom: 7
}
},
{
sku: '456',
attributes: {
size: 'M'
}
},
{
sku: 'abc',
attributes: {
material: 'cotton'
size: 'S'
}
}
]
Desired Result (if possible):
{
size: [' S', 'M', 'L']
}
If the desired result is not possible I would at least like to be able to get back [ 'size' ]
This process needs to be optimized as much as possible and I just cant seem to get a query just right to return what I need, any help is greatly appreciated =)
Here is what I have so far
db.getCollection('myCollection').aggregate([
{ $match: {
_id: { $in: [ObjectId("55158b0bd6076278295cf022"), ObjectId("55158b0bd6076278295cf021"), ObjectId("55158b0bd6076278295cf01f") ] }
}
},
{ $project: { attributes: 1 }},
{ $group: { _id: '$attributes' } }
])
Which products this output:
{
"result" : [
{
"_id" : {
"shirt_size" : "S",
"shirt_color" : "Blue",
"custom_attr" : "adsfasdf"
}
},
{
"_id" : {
"shirt_size" : "M",
"shirt_color" : "Green"
}
},
{
"_id" : {
"shirt_size" : "L",
"shirt_color" : "Red"
}
}
],
"ok" : 1.0000000000000000,
"$gleStats" : {
"lastOpTime" : Timestamp(1427475045, 1),
"electionId" : ObjectId("54f7c1edf8e5ff44cec194b6")
}
}
I feel like it is close and I am just missing the last step :(
I think you need to $unwind the array, and then $group it and use $sum to count the appearance, then everything with sum > 1 is a duplicate.
Links:
http://docs.mongodb.org/manual/reference/operator/aggregation/unwind/
http://docs.mongodb.org/manual/reference/operator/aggregation/group/
http://docs.mongodb.org/manual/reference/operator/aggregation/sum/
The $addToSet(aggregation) returns an array of unique values - http://docs.mongodb.org/manual/reference/operator/aggregation/addToSet/
Using the following aggregation (get unique sizes per Doc):
db.coll1.aggregate([
{$unwind : "$testdoc"},
{$group : {_id: "$_id", size: {$addToSet: "$testdoc.attributes.size"}}}
])
Gives the following result:
{
"result" : [
{
"_id" : ObjectId("551621fe6155a7741a0d328a"),
"size" : [
"M",
"L"
]
},
{
"_id" : ObjectId("551621fe6155a7741a0d328b"),
"size" : [
"L"
]
},
{
"_id" : ObjectId("551621fe6155a7741a0d3289"),
"size" : [
"S",
"M",
"L"
]
}
],
"ok" : 1
}
The following aggregation returns unique sizes across all docs:
db.coll1.aggregate([
{$unwind : "$testdoc"},
{$group :
{_id: "AllSizes", size: {$addToSet: "$testdoc.attributes.size"}}} ])
Result:
{
"result" : [
{
"_id" : "AllSizes",
"size" : [
"S",
"M",
"L"
]
}
],
"ok" : 1
}
Based on the following Docs:
> db.coll1.find().pretty()
{
"_id" : ObjectId("551621fe6155a7741a0d3289"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "M"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "S"
}
}
]
}
{
"_id" : ObjectId("551621fe6155a7741a0d328a"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "M"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "M"
}
}
]
}
{
"_id" : ObjectId("551621fe6155a7741a0d328b"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "L"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "L"
}
}
]
}

$avg in mongodb aggregation

Document looks like this:
{
"_id" : ObjectId("361de42f1938e89b179dda42"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{ "NAME" : "asd", "OBTAINED_SCORE" : 30}, { "NAME" : "acd", "OBTAINED_SCORE" : 36}
]
}
Aggregation function:
db.coll.aggregate([ {$unwind:"$SCORES"}, {$group : { _id : { user_id : "$user_id", evaluator_id : "$evaluator_id"}, AVG_SCORE : { $avg : "$SCORES.OBTAINED_SCORE" }}} ])
Suppose if there are two documents with same "user_id" (say u1) and different "evaluator_id" (say e1 and e2).
For example:
1) Average will work like this ((30 + 20) / 2 = 25). This is working for me.
2) But for { evaluator_id : "e1" } document, score is 30 for { "NAME" : "asd" } and { evaluator_id : "e2" } document, score is 0 for { "NAME" : "asd" }. In this case, I want the AVG_SCORE to be 30 only (not (30 + 0) / 2 = 15).
Is it possible through aggregation??
Could any one help me out.
It's possible by placing a $match between the $unwind and $group aggregation pipelines to first filter the arrays which match the specified condition to include in the average computation and that is, score array where the obtained score is not equal to 0 "SCORES.OBTAINED_SCORE" : { $ne : 0 }
db.coll.aggregate([
{
$unwind: "$SCORES"
},
{
$match : {
"SCORES.OBTAINED_SCORE" : { $ne : 0 }
}
},
{
$group : {
_id : {
user_id : "$user_id",
evaluator_id : "$evaluator_id"
},
AVG_SCORE : {
$avg : "$SCORES.OBTAINED_SCORE"
}
}
}
])
For example, the aggregation result for this document:
{
"_id" : ObjectId("5500aaeaa7ef65c7460fa3d9"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{
"NAME" : "asd",
"OBTAINED_SCORE" : 0
},
{
"NAME" : "acd",
"OBTAINED_SCORE" : 36
}
]
}
will yield:
{
"result" : [
{
"_id" : {
"user_id" : "u1",
"evaluator_id" : "e1"
},
"AVG_SCORE" : 36
}
],
"ok" : 1
}

Mongodb Time Series operations and generation

I've got a Mongodb Collection with this kind of docs :
{
"_id" : ObjectId("53cb898bed4bd6c24ae07a9f"),
"account" : "C1"
"created_on" : ISODate("2014-10-01T01:23:00.000Z")
"value" : 253
}
and
{
"_id" : ObjectId("52cb898bed4bd6c24ae06a9e"),
"account" : "C2"
"created_on" : ISODate("2014-10-01T01:23:00.000Z")
"value" : 9381
}
There is a document every minutes for C1 and C2.
I would like to generate data for an other account "C0" which will be equal to : (C2 - C1)*0.25
So the aim is to generate data for every minutes in the collection.
According to you, is it possible to do that in mongo shell ?
Thank you very much :)
The logic to solve this problem, is as below:
a) group all the records by created_on date.
b) get the value of both the documents in each group.
c) calculate the difference the C2 and C1 documents for each group.
d) In case one of the documents is missing difference
would be the value of the existing document.
d) project a document with value as (difference*.25) in each group.
e) insert the projected document to the collection.
I would like to propose two solutions to this, the first one would be on your assumption,
There is a document every minutes for C1 and C2.
So for every created_on time, there would be only two documents, C1 and C2.
db.time.aggregate([ {
$match : {
"account" : {
$in : [ "C1", "C2" ]
}
}
}, {
$group : {
"_id" : "$created_on",
"first" : {
$first : "$value"
},
"second" : {
$last : "$value"
},
"count" : {
$sum : 1
}
}
}, {
$project : {
"_id" : 0,
"value" : {
$multiply : [ {
$cond : [ {
$lte : [ "$count", 1 ]
}, "$first", {
$subtract : [ "$first", "$second" ]
} ]
}, 0.25 ]
},
"created_on" : "$_id",
"account" : {
$literal : "C0"
}
}
} ]).forEach(function(doc) {
doc.value = Math.abs(doc.value);
db.time.insert(doc);
});
The second solution is based on real-time scenarios. For a particular created_on time, there can be 'n' number of C1 documents and 'm' number of C2 documents with different values, but we would need only one 'C0' document representing the differences, for that particular created_on time. You would need an extra $group pipeline operator as below:
db.time.aggregate([ {
$match : {
"account" : {
$in : [ "C1", "C2" ]
}
}
}, {
$group : {
"_id" : {
"created_on" : "$created_on",
"account" : "$account"
},
"created_on" : {
$first : "$created_on"
},
"values" : {
$sum : "$value"
}
}
}, {
$group : {
"_id" : "$created_on",
"first" : {
$first : "$values"
},
"second" : {
$last : "$values"
},
"count" : {
$sum : 1
}
}
}, {
$project : {
"_id" : 0,
"value" : {
$multiply : [ {
$cond : [ {
$lte : [ "$count", 1 ]
}, "$first", {
$subtract : [ "$first", "$second" ]
} ]
}, 0.25 ]
},
"created_on" : "$_id",
"account" : {
$literal : "C0"
}
}
} ]).forEach(function(doc) {
doc.value = Math.abs(doc.value);
db.time.insert(doc);
});

Aggregate of different subtypes in document of a collection

abstract document in collection md given:
{
vals : [{
uid : string,
val : string|array
}]
}
the following, partially correct aggregation is given:
db.md.aggregate(
{ $unwind : "$vals" },
{ $match : { "vals.uid" : { $in : ["x", "y"] } } },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
that may lead to the following result:
"result" : [
{
"_id" : {
"uid" : "x"
},
"vals" : [
[
"24ad52bc-c414-4349-8f3a-24fd5520428e",
"e29dec2f-57d2-43dc-818a-1a6a9ec1cc64"
],
[
"5879b7a4-b564-433e-9a3e-49998dd60b67",
"24ad52bc-c414-4349-8f3a-24fd5520428e"
]
]
},
{
"_id" : {
"uid" : "y"
},
"vals" : [
"0da5fcaa-8d7e-428b-8a84-77c375acea2b",
"1721cc92-c4ee-4a19-9b2f-8247aa53cfe1",
"5ac71a9e-70bd-49d7-a596-d317b17e4491"
]
}
]
as x is the result aggregated on documents containing an array rather than a string, the vals in the result is an array of arrays. what i look for in this case is to have a flattened array (like the result for y).
for me it seems like that what i want to achieve by one aggegration call only, is currently not supported by any given operation as e.g. a type conversion cannot be done or unwind expectes in every case an array as input type.
is map reduce the only option i have? if not ... any hints?
thanks!
You can use the aggregation to do the computation you want without changing your schema (though you might consider changing your schema simply to make queries and aggregations of this field easier to write).
I broke up the pipeline into multiple steps for readability. I also simplified your document slightly, again for readability.
Sample input:
> db.md.find().pretty()
{
"_id" : ObjectId("512f65c6a31a92aae2a214a3"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a4"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a5"),
"uid" : "y",
"val" : "string2"
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a6"),
"uid" : "y",
"val" : [
"string3",
"string4"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a7"),
"uid" : "z",
"val" : [
"string"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a8"),
"uid" : "y",
"val" : [
"string1",
"string2"
]
}
Pipeline stages:
> project1 = {
"$project" : {
"uid" : 1,
"val" : 1,
"isArray" : {
"$cond" : [
{
"$eq" : [
"$val.0",
[ ]
]
},
true,
false
]
}
}
}
> project2 = {
"$project" : {
"uid" : 1,
"valA" : {
"$cond" : [
"$isArray",
"$val",
[
null
]
]
},
"valS" : {
"$cond" : [
"$isArray",
null,
"$val"
]
},
"isArray" : 1
}
}
> unwind = { "$unwind" : "$valA" }
> project3 = {
"$project" : {
"_id" : 0,
"uid" : 1,
"val" : {
"$cond" : [
"$isArray",
"$valA",
"$valS"
]
}
}
}
Final aggregation:
> db.md.aggregate(project1, project2, unwind, project3, group)
{
"result" : [
{
"_id" : "z",
"vals" : [
"string"
]
},
{
"_id" : "y",
"vals" : [
"string1",
"string4",
"string3",
"string2"
]
},
{
"_id" : "x",
"vals" : [
"string"
]
}
],
"ok" : 1
}
If you modify your schema using always "vals.val" field as an array field (even when the record contains only one element) you can do it easily as follows:
db.test_col.insert({
vals : [
{
uid : "uuid1",
val : ["value1"]
},
{
uid : "uuid2",
val : ["value2", "value3"]
}]
});
db.test_col.insert(
{
vals : [{
uid : "uuid2",
val : ["value4", "value5"]
}]
});
Using this approach you only need to use two $unwind operations: one unwinds the "parent" array and the second unwinds every "vals.val" value. So, querying like
db.test_col.aggregate(
{ $unwind : "$vals" },
{ $unwind : "$vals.val" },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
You can obtain your expected value:
{
"result" : [
{
"_id" : {
"uid" : "uuid2"
},
"vals" : [
"value5",
"value4",
"value3",
"value2"
]
},
{
"_id" : {
"uid" : "uuid1"
},
"vals" : [
"value1"
]
}
],
"ok" : 1
}
And no, you can't execute this query using your current schema, since $unwind fails when the field isn't an array field.