Match Distinct Count from Multiple Arrays - mongodb

Now I have a collection col including docs like follows:
{
"_id": 1,
"shares": [{
"fundcode": "000001",
"lastshares": 1230.20,
"agencyno": "260",
"netno": "260"
},{
"fundcode": "000002",
"lastshares": 213124.00,
"agencyno": "469",
"netno": "001"
},{
"fundcode": "000003",
"lastshares": 10000.80,
"agencyno": "469",
"netno": "002"
}
],
"trade": [{
"fundcode": "000001",
"c_date": "20160412",
"agencyno": "260",
"netno": "260",
"bk_tradetype": "122",
"confirmbalance": 1230.20,
"cserialno": "10110000119601",
"status": "1"
},{
"fundcode": "000002",
"c_date": "20160506",
"agencyno": "469",
"netno": "001",
"bk_tradetype": "122",
"confirmbalance": 213124.00,
"cserialno": "10110000119602",
"status": "1"
},{
"fundcode": "000003",
"c_date": "20170507",
"agencyno": "469",
"netno": "002",
"bk_tradetype": "122",
"confirmbalance": 10000.80,
"netvalue": 1.0000,
"cserialno": "10110000119602",
"status": "1"
}
]
}
how can I realize a selection like the following sql using mongodb query?:
SELECT _id
FROM col
WHERE col.shares.lastshares > 1000
AND col.trade.agencyno = '469'
GROUP BY _id
HAVING COUNT(DISTINCT col.shares.fundcode) > 2
AND COUNT(DISTINCT col.trade.fundcode) > 2
I tried the $unwind, $groupby, $match aggregate pipeline twice, but I did not get the right answer. Thanks for help.

It doesn't really help that the supplied sample does not meet the conditions, but of course only because the "trade" array would only produce 2 distinct matches which is not enough to meet the *"greater than 2"` constraint in the query.
The structure is certainly different than in an RDBMS, so "sub-queries" do not apply, but at least you made these array. But ideally we would not use $unwind here at all.
And therefore all we need to do is "count" the "distinct" matches from within the arrays. This can basically be applied within a $redact using $map, $setDifference and $size as the main operations:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$shares",
"as": "el",
"in": {
"$cond": {
"if": { "$gt": [ "$$el.lastshares", 1000 ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]},
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$trade",
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el.agencyno", "469" ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
/*
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
*/
])
That makes it basically compatible with MongoDB 2.6 and upwards, and only adding the $addFields in there so you could see the results of the "filter" at least, but it's not needed since that is not what the query in the question asks for, which is in fact "just the document _id", but just returning the whole document takes less work. Add a $project for just the _id on the end if you really want that.
Also, to taste you can use $filter instead with MongoDB 3.x releases, but the syntax in this case is actually a little longer:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"as": "el",
"in": "$$el.fundcode"
}},
[]
]
}},
2
]},
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
},
"as": "el",
"in": "$$el.fundcode"
}},
[]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
/*
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
*/
])
The basic principles here is the part with:
having (count(distinct fundcode))...
Is being achieved by $size and $setDifference to the "filtered" array content by the conditions. And in fact the "GROUP BY"parts are not even required, since the "array" represents the relationship in a "grouped" form already. Think of the overall $redact statement as the "HAVING" here.
If your MongoDB is truly ancient and you cannot use those forms, then it still is possible with $unwind. And this time we $addToSet to get the "distinct" entries:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$unwind": "$shares" },
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
}},
{ "$group": {
"_id": "$_id",
"shares": { "$addToSet": "$shares.fundcode" },
"trade": { "$first": "$trade" }
}},
{ "$unwind": "$trade" },
{ "$match": {
"trade.agencyno": "469"
}},
{ "$group": {
"_id": "$_id",
"shares": { "$first": "$shares" },
"trade": { "$addToSet": "$trade.fundcode" }
}},
{ "$match": {
"shares.2": { "$exists": true },
"trade.2": { "$exists": true }
}}
])
Where in this case the "HAVING" is represented by the $match clause, where the notations such as "shares.2": { "$exists": true } actually ask if the array being tested actually has a "third index", which in turn means it has "greater than two", which is the point of the condition.
But the document only has "two" matches
As noted it would have helped your question if you actually supplied a document that matched the conditions you asked for. Unfortunately the supplied document falls short of the required number of matches for the "trade" array in the document.
Fixing your condition to match the supplied document we make it rather $gte for greater than or equal to 2 on the "trade" conditions:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$shares",
"as": "el",
"in": {
"$cond": {
"if": { "$gt": [ "$$el.lastshares", 1000 ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]},
{ "$gte": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$trade",
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el.agencyno", "469" ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
])
Which outputs in that form as:
{
"_id" : 1.0,
"shares" : [
{
"fundcode" : "000001",
"lastshares" : 1230.2,
"agencyno" : "260",
"netno" : "260"
},
{
"fundcode" : "000002",
"lastshares" : 213124.0,
"agencyno" : "469",
"netno" : "001"
},
{
"fundcode" : "000003",
"lastshares" : 10000.8,
"agencyno" : "469",
"netno" : "002"
}
],
"trade" : [
{
"fundcode" : "000002",
"c_date" : "20160506",
"agencyno" : "469",
"netno" : "001",
"bk_tradetype" : "122",
"confirmbalance" : 213124.0,
"cserialno" : "10110000119602",
"status" : "1"
},
{
"fundcode" : "000003",
"c_date" : "20170507",
"agencyno" : "469",
"netno" : "002",
"bk_tradetype" : "122",
"confirmbalance" : 10000.8,
"netvalue" : 1.0,
"cserialno" : "10110000119602",
"status" : "1"
}
]
}
Or with the $unwind, relax the length to test for 2 positions:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$unwind": "$shares" },
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
}},
{ "$group": {
"_id": "$_id",
"shares": { "$addToSet": "$shares.fundcode" },
"trade": { "$first": "$trade" }
}},
{ "$unwind": "$trade" },
{ "$match": {
"trade.agencyno": "469"
}},
{ "$group": {
"_id": "$_id",
"shares": { "$first": "$shares" },
"trade": { "$addToSet": "$trade.fundcode" }
}},
{ "$match": {
"shares.2": { "$exists": true },
"trade.1": { "$exists": true }
}}
])
Which returns:
{
"_id" : 1.0,
"shares" : [
"000003",
"000002",
"000001"
],
"trade" : [
"000003",
"000002"
]
}
But of course both identify the "document" to the conditions which is what the original query asks, and therefore it's the same basic result regardless of content returned. Which you can alwauys $project as just the _id if you must.

Related

how to filter the product and optimize mongodb query?

How to filter the product and optimize mongodb query,
We would like to get popular products base on some conditions i.e which products are orders, view and likes.
db.products.aggregate([
{
"$lookup": {
"from": "orders",
"localField": "_id",
"foreignField": "product_id",
"as": "orders"
}
},
{
"$addFields": {
"orderCount": {
"$size": {
"$cond": [
{
"$isArray": "$orders"
},
"$orders",
[]
]
}
}
}
},
{
"$addFields": {
"likeCount": {
"$size": {
"$cond": [
{
"$isArray": "$likes"
},
"$likes",
[]
]
}
}
}
},
{
"$addFields": {
"sumCount": {
"$sum": [
"$orderCount",
"$likeCount",
"$view"
]
}
}
},
{
$sort: {
"sumCount": -1
}
}
])
https://mongoplayground.net/p/fIG3-yHGuV6
Have to use multiple $addFields what would be best option to achieve the products that have the most orders, likes and views. please guide
thanks
I would suggest 2 corrections,
orders size does not need verification if it is an array or not condition because $lookup stage will always return in array
You can do both operations for orderCount and likeCount in a single $addFields stage
You final query would be,
db.products.aggregate([
{
"$lookup": {
"from": "orders",
"localField": "_id",
"foreignField": "product_id",
"as": "orders"
}
},
{
"$addFields": {
"orderCount": { "$size": "$orders" },
"likeCount": {
"$size": {
"$cond": [{ "$isArray": "$likes" }, "$likes", []]
}
}
}
},
{
"$addFields": {
"sumCount": {
"$sum": ["$orderCount", "$likeCount", "$view"]
}
}
},
{ "$sort": { "sumCount": -1 } }
])
Playground
You can also use projection to minimize the code
{
"$project": {
"likes": 1,
"orderCount": {
"$size": {
"$cond": {
"if": {
"$isArray": [
"$orders"
]
},
"then": "$orders",
"else": []
}
}
},
"likeCount": {
"$size": {
"$cond": {
"if": {
"$isArray": [
"$likes"
]
},
"then": "$likes",
"else": []
}
}
},
"views": {
"$ifNull": [
"$view",
0
]
}
}
},
https://mongoplayground.net/p/qUNftLP_-PN
check the mongoplayground.

Use $addToSet condition vise in mongodb

I have below mongodb query, in which i am using $addToSet, Now i want to use it condition vise.
Worksheet.aggregate([
{
"$group": {
"_id": null,
"todayBilling": {
"$sum": {
"$cond": [{ "$and" : [ { "$eq": [ "$isBilling", true] }, { $eq: [ "$date",new Date(moment().format('l'))]}] },"$hours",0 ]
}
},
"todayProjects": { "$addToSet": "$projectId" }
},
},
{ "$addFields": { "todayProjects": { "$size": "$todayProjects" }}},
{
"$lookup":{
"from": "projects",
"let": {},
"pipeline": [
{
"$group": { "_id": null, "count": { "$sum": 1 } }
}
],
"as": "totalProjects"
}
},
{'$unwind':'$totalProjects'}
])
Now, I want to get the count of todayProjects field if got result today date vise. means where "todayProjects": { "$addToSet": "$projectId" } exists, i want to use $cond with below condition:
{ $eq: [ "$date",new Date(moment().format('l'))]}

MongoDB: Get all $matched elements individually from an array

I'm trying to get all matched elements individually, here is the sample data and the query.
// json
[
{
"name": "Mr Cool",
"ican": [
{
"subcategory": [
{
"id": "5bffdba824488b182ec86f8d", "name": "Cricket"
},
{
"id": "5bffdba824488b182ec86f8c", "name": "Footbal"
}
],
"category": "5bffdba824488b182ec86f88",
"name": "Sports"
}
]
}
]
// query
db.collection.aggregate([
{
"$match": {
"ican.subcategory.name": { $in: ["Cricket","Football"] }
}
},
{
"$project": { "_id": 1, "name": 1, }
}
])
I'm getting the combined result, I need the individual match record. I tried $all and $elementMatch but getting the same response. how can I get the results as below. I'm using $aggregate because I will be using $geoNear pipeline for getting the nearby users.
// current result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool"
}
]
// expected result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Cricket"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Footbal"
}
]
Thank you
Try this Mongo Playground
db.col.aggregate([
{"$unwind" : "$ican"},
{"$unwind" : "$ican.subcategory"},
{"$match" : {"ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{"$group" : {"_id" : null,"data" : {"$push" : {"_id" : "$_id","name" : "$name","subcategory" : "$ican.subcategory.name"}}}},
{"$unwind" : "$data"},
{"$replaceRoot" : {"newRoot" : "$data"}}
])
You can use below aggregation without the $unwind and for better performance
db.collection.aggregate([
{ "$match": { "ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{ "$project": {
"ican": {
"$reduce": {
"input": "$ican",
"initialValue": [],
"in": {
"$concatArrays": [
{ "$filter": {
"input": {
"$map": {
"input": "$$this.subcategory",
"as": "s",
"in": { "name": "$name", "subcategory": "$$s.name" }
}
},
"as": "fil",
"cond": { "$in": ["$$fil.subcategory", ["Football"]] }
}},
"$$value"
]
}
}
}
}},
{ "$unwind": "$ican" },
{ "$replaceRoot": { "newRoot": "$ican" }}
])

How we can use $toUpper with array fields?

How we can use toUpper with array field, I have the following query which compare array field 'locations' with an array of camel case items, now my problem is how we can convert locations field values to upper case and then compare with array.
var array = ["KABUL","KAPISA","WARDAK","LOGAR","PARWAN","BAGHLAN","NANGARHAR","LAGHMAN",
"BAMYAN","PANJSHER","KHOST","GHAZNI","KUNARHA","PAKTYA","PAKTIKA","KUNDUZ",
"NOORISTAN","SAMANGAN","TAKHAR","DAYKUNDI","BADAKHSHAN","BALKH","GHOR",
"UROZGAN","FARYAB","ZABUL","SAR-E-PUL","NIMROZ","JAWZJAN","HELMAND","BADGHIS",
"KANDAHAR","FARAH","HERAT"];
db.getCollection('test').aggregate([
{ "$project": {
"locations": {
"$map": {
"input": {
"$setIntersection": ["$locations", array ]
},
"in": { "k": "$$this", "v": 1 }
}
}
}},
{ "$unwind": "$locations" },
{ "$group": {
"_id": "$locations.k",
"v": { "$sum": "$locations.v" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": null,
"obj": { "$push": { "k": "$_id", "v": "$v" } }
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$obj" }
}}
])
locations field is like :
"locations" : [
"Afghanistan",
"Kabul",
.....
],
Using $map to transform "each" element of course:
{ "$project": {
"locations": {
"$map": {
"input": {
"$setIntersection": [
{ "$map": { "input": "$locations", "in": { "$toUpper": "$$this" } } },
array
]
},
"in": { "k": "$$this", "v": 1 }
}
}
}},

Get property with highest value from key value pair

In MongoDB, I have documents with a structure like this:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: {
class_1: 0.45,
class_2: 0.11,
class_3: 0.44
}
}
Using the aggregation pipeline, is it possible to give me an object that contains the highest classification? So, given the above, I would like something like this as result:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: "class_1"
}
I thought I could use $unwind but the classification property is not an array.
For what it's worth: I know there will always be three properties in classification, so it's ok to hard-code the keys in the query.
You should probably note here that every technique applied is essentially based on "coercion" of the "key/value" pairs into an "array" format for comparison and extraction. So the real lesson to learn is is that your document "should" in fact store this as an "array" instead. But onto the techniques.
If you have MongoDB 3.4 then you can use $objectToArray to turn the "keys" into an array so you can get the value:
Dynamic
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$arrayElemAt": [
{ "$map": {
"input": {
"$filter": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"cond": {
"$eq": [
"$$c.v",
{ "$max": {
"$map": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"in": "$$c.v"
}
}}
]
}
}
},
"as": "c",
"in": "$$c.k",
}},
0
]
}
}}
])
Otherwise just to the transformation as you iterate the cursor if you do not really need it for further aggregation. As a basic JavaScript example:
db.collection.find().map(d => Object.assign(
d,
{ classification: Object.keys(d.classification)
.filter(k => d.classification[k] === Math.max.apply(null,
Object.keys(d.classification).map(k => d.classification[k])
))[0]
}
));
And that's also the same basic logic that you apply using mapReduce if you were actually aggregating something.
Both produce:
/* 1 */
{
"_id" : "123456...",
"name" : "foobar",
"classification" : "class_1"
}
HardCoding
On the "hardcoding" case which you say is okay. Then you can construct like this with $switch by supplying $max with each of the values:
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$switch": {
"branches": [
{ "case": { "$eq": [ "$classification.class_1", "$$max" ] }, "then": "class_1" },
{ "case": { "$eq": [ "$classification.class_2", "$$max" ] }, "then": "class_2" },
{ "case": { "$eq": [ "$classification.class_3", "$$max" ] }, "then": "class_3" },
]
}
}
}
}
}}
])
Which gives rise to then actually being able to write that out longer using $cond, and then the only real constraint is the change in $max for MongoDB 3.2, which allowed an array of arguments as opposed to it's previous role as an "accumulator only":
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}
}
}}
])
If you were "really" constrained then you could "force" the "max" through a separate pipeline stage using $map and $unwind on the array then $group again. This would make the operations compatible with MongoDB 2.6:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"max": {
"$map": {
"input": [1,2,3],
"as": "e",
"in": {
"$cond": {
"if": { "$eq": [ "$$e", 1 ] },
"then": "$classification.class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$$e", 2 ] },
"then": "$classification.class_2",
"else": "$classification.class_3"
}
}
}
}
}
}
}},
{ "$unwind": "$max" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": { "$max": "$max" }
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}}
])
And going really ancient, then we can instead $unwind from $const, which was (and still is) a "hidden" and undocumented operator equal in function to $literal (which is technically aliased to it) in modern versions, but also with the alternate syntax to $cond as an "array" ternary operation this then becomes compatible with all versions since the aggregation framework existed:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"temp": { "$const": [1,2,3] }
}},
{ "$unwind": "$temp" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": {
"$max": {
"$cond": [
{ "$eq": [ "$temp", 1 ] },
"$classification.class_1",
{ "$cond": [
{ "$eq": [ "$temp", 2 ] },
"$classification.class_2",
"$classification.class_3"
]}
]
}
}
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": [
{ "$eq": [ "$max", "$classification.class_1" ] },
"class_1",
{ "$cond": [
{ "$eq": [ "$max", "$classification.class_2" ] },
"class_2",
"class_3"
]}
]
}
}}
])
But it is of course possible, even if extremely messy.
You can use $indexOfArray operator to find the $max value in classification followed by projecting the key. $objectToArray to convert classification embedded doc into array of key value pairs in 3.4.4 version.
db.collection.aggregate([
{
"$addFields": {
"classification": {
"$let": {
"vars": {
"classificationkv": {
"$objectToArray": "$classification"
}
},
"in": {
"$let": {
"vars": {
"classificationmax": {
"$arrayElemAt": [
"$$classificationkv",
{
"$indexOfArray": [
"$$classificationkv.v",
{
"$max": "$$classificationkv.v"
}
]
}
]
}
},
"in": "$$classificationmax.k"
}
}
}
}
}
}
])
In the end, I went with a more simple solution, but not as generic as the other ones posted here. I used this a switch case statement:
{'$project': {'_id': 1, 'name': 1,
'classification': {'$switch': {
'branches': [
{'case': {'$and': [{'$gt': ['$classification.class_1', '$classification.class_2']},
{'$gt': ['$classification.class_1', '$classification.class_3']}]},
'then': "class1"},
{'case': {'$and': [{'$gt': ['$classification.class_2', '$classification.class_1']},
{'$gt': ['$classification.class_2', '$classification.class_3']}]},
'then': "class_2"},
{'case': {'$and': [{'$gt': ['$classification.class_3', '$classification.class_1']},
{'$gt': ['$classification.class_3', '$classification.class_2']}]},
'then': "class_3"}],
'default': ''}}
}}
This works for me, but the other answers might be a better option, YMMV.