mongodb aggregate uniqueness and count at same time with higher level averages - mongodb

Consider this dataset. For each name, we wish to find the average of x and the distinct set and count of game. For Steve, this is avg(x)=19, game A is 2, and game B is 1. For Bob, this is avg(x) = 58, game B is 4:
{"name":"Steve", "game": "A", x:7},
{"name":"Steve", "game": "A", x:21},
{"name":"Steve", "game": "B", x:31},
{"name":"Bob", "game": "B", x:41},
{"name":"Bob", "game": "B", x:51},
{"name":"Bob", "game": "B", x:71},
{"name":"Bob", "game": "B", x:79},
{"name":"Jill", "game": "A", x:61},
{"name":"Jill", "game": "B", x:71},
{"name":"Jill", "game": "C", x:81},
{"name":"Jill", "game": "D", x:91}
EDIT: Answer is below but leaving this incomplete solution as a stepping stone.
I am really close with this. Note we cannot use $addToSet because it is "lossy". So instead, we group by player and game to get the full list, then in a second group, capture list size:
db.foo2.aggregate([
{$group: {_id:{n:"$name",g:"$game"}, z:{$push: "$x"} }}
,{$group: {_id:"$_id.n",
avgx: {$avg: "$z"},
games: {$push: {name: "$_id.g", num: {$size:"$z"}}}
}}
]);
which yields:
{
"_id" : "Steve",
"avgx" : null,
"games" : [ {"name":"A", "num":2 },
{"name":"B", "num":1 }
]
}
{
"_id" : "Bob",
"avgx" : null,
"games" : [ {"name":"B", "num":4 } ]
}
but I just cannot seem to get the avgx working properly. If I needed the average within the game type that would be easy but I need it across the player. $avg in the $group context does not work with array inputs.

Try this:
db.collection.aggregate([
{
$group: {
_id: "$name",
avg: {
$avg: "$x"
},
gamesUnFiltered: {
$push: {
name: "$game",
num: "$x"
}
}
}
},
{
$addFields: {
games: {
$reduce: {
input: "$gamesUnFiltered",
initialValue: [],
in: {
$cond: [
{
$not: [
{
$in: [
"$$this.name",
"$$value.name"
]
}
]
},
{
$concatArrays: [
[
"$$this"
],
"$$value"
]
},
"$$value"
]
}
}
}
}
},
{
$project: {
gamesUnFiltered: 0
}
}
])
Output:
[
{
"_id": "Bob",
"avg": 60.5,
"games": [
{
"name": "B",
"num": 41
}
]
},
{
"_id": "Steve",
"avg": 19.666666666666668,
"games": [
{
"name": "B",
"num": 31
},
{
"name": "A",
"num": 7
}
]
},
{
"_id": "Jill",
"avg": 76,
"games": [
{
"name": "D",
"num": 91
},
{
"name": "C",
"num": 81
},
{
"name": "B",
"num": 71
},
{
"name": "A",
"num": 61
}
]
}
]

Got it! You need an extra $unwind and use $first to "carry" the a field from stage to stage. Threw in total_games for extra info. In general, the "group-unwind-first" pattern is a way to aggregate one or more things then "reset" to unaggregated state to perform additional operations with the aggregate values traveling along with each doc.
db.foo2.aggregate([
{$group: {_id:"$name", a:{$avg:"$x"}, g:{$push: "$game"} }}
,{$unwind: "$g"}
,{$group: {_id:{name:"$_id",game:"$g"}, a:{$first:"$a"}, n:{$sum:1}}}
,{$group: {_id:"$_id.name",
a:{$first:"$a"},
total_games: {$sum:"$n"},
games: {$push: {name:"$_id.game",n:"$n"}}
}}
]);

Related

Remove multiple objects from nested array 3

I try to clean my collection with single update query , need to remove some deeply nested objects , but without breaking other objects , here is a good solution provided by #rickhg12hs:
Remove multiple objects from deeply nested array 2
but it has small drawback , it is breaking the content of _a._p object when there is no _a._p.s object inside...
and original solution provided by #nimrod serok:
Remove multiple elements from deep nested array with single update query
but it has other issue , when there is missing "_a._p.s.c" , "_a._p.s.d" or "_a._p.s.a" object it add objects with null values instead which afcourse is not expected ...
Playground test
This are 2x example original documents:
[
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
did: "x",
dst: "y",
den: "z"
},
{
id: 2,
"dateP": "20200-09-20"
}
]
},
"c": {
"t": [
{
id: 3,
"dateP": "20300-09-22"
},
{
id: 4,
"dateP": "20300-09-23",
did: "x",
dst: "y",
den: "z"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
}
}
}
]
},
{
"_id": ObjectId("5c05984246a0201286d4b57b"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
]
}
]
Expected result after update:
[
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
"dateP": "20200-09-20",
"den": "z",
"did": "x",
"dst": "y",
"id": 1
}
]
},
"c": {
"t": [
{
"dateP": "20300-09-23",
"den": "z",
"did": "x",
"dst": "y",
"id": 4
}
]
}
}
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57a"),
"f": "x"
},
{
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
_t: "Some field",
_x: "Some other field"
}
}
],
"_id": ObjectId("5c05984246a0201286d4b57b"),
"f": "x"
}
]
The goal is with single update query to remove any objects under _a._p.s.[a|c|d].t where the fields did,dst and den are missing but without breaking other objects _a._p where _a._p.s do not exists ...
Looks like a small change to #rickhg12hs's answer can solve this:
db.collection.update({},
[
{$set: {
_a: {$map: {
input: "$_a",
as: "elem",
in: {$cond: [
{$or: [
{$eq: [{$type: "$$elem._p"}, "missing"]},
{$eq: [{$type: "$$elem._p.s"}, "missing"]}
]},
"$$elem",
{
_p: {s: {
$arrayToObject: {$map: {
input: {$objectToArray: "$$elem._p.s"},
as: "anyKey",
in: {
k: "$$anyKey.k",
v: {
t: {$filter: {
input: "$$anyKey.v.t",
as: "t",
cond: {$setIsSubset: [
["did", "dst", "den"],
{$map: {
input: {$objectToArray: "$$t"},
in: "$$this.k"
}}
]}
}}
}
}
}}
}
}}
]}
}}
}}
],
{
"multi": true
})
See how it works on the playground example

2-level group by for objects in an array

Good day SO Community,
I would like to ask for your help in creating the correct aggregation pipeline for sample data:
[
{
"group": "A",
"subgroup": "A1",
"name": "Abby"
},
{
"group": "A",
"subgroup": "A2",
"name": "Andy"
},
{
"group": "A",
"subgroup": "A2",
"name": "Amber"
},
{
"group": "B",
"subgroup": "B1",
"name": "Bart"
}
]
I want to group by group first, then for each group, group by subgroup.
The names will also go to their respective subgroup and the count is showing the actual count.
My expected output is as follows:
[
{
"_id": "B",
"count": 1,
"subgroup": [
{
"_id": "B1",
"count": 1,
"names": ["Bart"]
}
]
},
{
"_id": "A",
"count": 3,
"subgroup": [
{
"_id": "A1",
"count": 1,
"names":[ "Abby"]
},
{
"_id": "A2",
"count": 2,
"names": ["Amber", "Andy"]
}
]
}
]
I have tried this pipeline but it's not grouping the subgroups.
{
"$group": {
"_id": "$group",
"subgroup": {
"$addToSet": {
"_id": "$subgroup",
"name": "$name",
count: {
$sum: 1
}
}
},
count: {
$sum: 1
}
}
}
The aggregation pipeline and actual output can be seen in the playground:
https://mongoplayground.net/p/MO1fCf21Rez
Thank you!
$group - Group by group and subgroup. Perform count and add name into names array.
$group - Group by group. Perform total count and add the object for subgroup into subgroup array.
db.students.aggregate([
{
$group: {
_id: {
group: "$group",
subgroup: "$subgroup"
},
names: {
$push: "$name"
},
count: {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.group",
"subgroup": {
$addToSet: {
"_id": "$_id.subgroup",
"names": "$names",
count: "$count"
}
},
count: {
$sum: "$count"
}
}
}
])
Demo # Mongo Playground

MongoDb score results based on simple matches

I'm trying to create a simple search algorithm that will try to match against a first name, last name, and/or set of tags, as an example:
[
{
"key": 1,
"fname": "Bob",
"lname": "Smith",
"tags": [
"a",
"b",
"c"
]
},
{
"key": 2,
"fname": "John",
"lname": "Jacob",
"tags": [
"c",
"d",
"e"
]
},
{
"key": 3,
"fname": "Will",
"lname": "Smith",
"tags": [
"a",
"b",
"c"
]
}
]
This works with the following, but I can only get the tags count. Basically what I'm going for here is to match first-name, last-name, or tags and for each match store a "point":
db.collection.aggregate([
{
$match: {
$or: [
{
"fname": "Will"
},
{
"lname": "Smith"
},
{
tags: {
$in: [
"b",
"c"
]
}
}
]
}
},
{
$project: {
tagsMatchCount: {
$size: {
"$setIntersection": [
[
"b",
"c"
],
"$tags"
]
}
}
}
},
{
"$sort": {
tagsMatchCount: -1
}
}
])
Here's the sandbox I'm playing with: https://mongoplayground.net/p/DFJQZY-dfb5
Query
create a document to hold the matches each in separate field
add one extra field total
keep only those with at least 1 match
you can sort also after by any of the 3 types of matches or by total, like
{"$sort":{"points.total":-1}}
if you have index that can be used, remove my $match and add your match as first stage like in your example
Test code here
aggregate(
[{"$set":
{"points":
{"fname":{"$cond":[{"$eq":["$fname", "Will"]}, 1, 0]},
"lname":{"$cond":[{"$eq":["$lname", "Smith"]}, 1, 0]},
"tags":{"$size":{"$setIntersection":["$tags", ["b", "c"]]}}}}},
{"$set":
{"points.total":
{"$add":["$points.fname", "$points.lname", "$points.tags"]}}},
{"$match":{"$expr":{"$gt":["$points.total", 0]}}}])

MongoDB: Transform array of objects to array of arrays

I have a collection named "records" that contains documents in the following form:
{
"name": "a"
"items": [
{
"a": "5",
"b": "1",
"c": "2"
},
{
"a": "6",
"b": "3",
"c": "7"
}
]
}
I want to keep the data just as it is in the database (to make the data easy to read and interpret). But I'd like to run a query that returns the data in the following form:
{
"name": "a"
"items": [
["5", "1", "2"],
["6", "3", "7"],
]
}
Is this possible with pymongo? I know I can run a query and translate the documents using Python, but I'd like to avoid iterating over the query result if possible.
I have a table named "records"
Collection
Is this possible with pymongo?
Yes
Any pointers on how to approach this would be super helpful!
I'd suggest you to use a view to transform your data during a query in MongoDB.
In this way, you can get transformed data and apply find to already transformed data if you need.
db.createCollection(
"view_name",
{"viewOn": "original_collection_name",
"pipeline": [{$unwind: "$items"},
{$project: {name: 1, items: {$objectToArray: "$items"}}},
{$project: {name: 1, items: {$concatArrays: ["$items.v"]}}},
{$group: {_id: "$_id", name: {$first: "$name"},
items: {$push: "$items"}}}]
}
)
> db.view_name.find({name: "a"})
{ "_id" : ObjectId("5fc3dbb69cb76f866582620f"), "name" : "a", "items" : [ [ "5", "1", "2" ], [ "6", "3", "7" ] ] }
> db.view_name.find({"items": {$in: [["5", "1", "2"]]}})
{ "_id" : ObjectId("5fc3dbb69cb76f866582620f"), "name" : "a", "items" : [ [ "5", "1", "2" ], [ "6", "3", "7" ] ] }
> db.view_name.find()
{ "_id" : ObjectId("5fc3dbb69cb76f866582620f"), "name" : "a", "items" : [ [ "5", "1", "2" ], [ "6", "3", "7" ] ] }
Query:
db.original_collection_name.aggregate([
{$unwind: "$items"},
{$project: {name: 1, items: {$objectToArray: "$items"}}},
{$project: {name: 1, items: {$concatArrays: ["$items.v"]}}},
{$group: {_id: "$_id", name: {$first: "$name"}, items: {$push: "$items"}}}])
Using $objectToArray and $map transformations:
// { name: "a", items: [ { a: "5", b: "1", c: "2" }, { a: "6", b: "3", c: "7" } ] }
db.collection.aggregate([
{ $set: { items: { $map: { input: "$items", as: "x", in: { $objectToArray: "$$x" } } } } },
// {
// name: "a",
// items: [
// [ { k: "a", v: "5" }, { k: "b", v: "1" }, { k: "c", v: "2" } ],
// [ { k: "a", v: "6" }, { k: "b", v: "3" }, { k: "c", v: "7" } ]
// ]
// }
{ $set: { items: { $map: { input: "$items", as: "x", in: "$$x.v" } } } }
])
// { name: "a", items: [["5", "1", "2"], ["6", "3", "7"]] }
This maps items' elements as key/value arrays such that { field: "value" } becomes [ { k: "field", v: "value" } ]. This way whatever the field name, we can easily access the value using v, which is the role of the second $set stage: "$$x.v".
This has the benefit of avoiding heavy stages such as unwind/group.
Note that you can also imbricate the second $map within the first; but that's probably less readable.

How to pop() the last element of an array with Aggregate

I have data in below format:
{
"Array1" : [
"A",
"B",
"C",
"D",
"E"
],
"tag": "X"
}
{
"Array1" : [
"A",
"B",
"C",
"X",
"Y"
],
"tag": "X"
}
{
"Array1" : [
"A",
"B",
"C",
"L",
"M"
],
"tag": "U"
}
And, I need to perform a pop command on Array1 during the aggregate command so that the last element is ignored. I am trying the below command:
aggregate([
{$unwind: "$Array1"},
{$group: {_id: "$Array1" count: {$sum: 1}}},
])
Similarly, would it be possible to ignore the first element of the array?
Edit: Expected output:
{
"A": 3,
"B": 3,
"C": 3,
"D": 1,
"X": 1,
"L": 1
}
I'm going to skip the PHP translation because it's both late at night for me and also quite trivial. But the basic process is this:
db.collection.aggregate([
{ "$unwind": "$Array1" },
{ "$group": {
"_id": "$_id",
"Array1": { "$push": "$Array1" },
"last": { "$last": "$Array1" }
}},
{ "$project": {
"Array1": {
"$setDifference": [
"$Array1",
{ "$map": { "input": ["A"], "as": "el", "in": "$last" } }
]
}
}}
])
If your array items are not actually unique, or the order is impotant so the "set" operator there messes with this, then do this instead:
db.collection.aggregate([
{ "$unwind": "$Array1" },
{ "$group": {
"_id": "$_id",
"Array1": { "$push": "$Array1" },
"last": { "$last": "$Array1" }
}},
{ "$unwind": "$Array1" },
{ "$redact": {
"$cond": {
"if": { "$eq": [ "$Array1", "$last" ] },
"then": "$$PRUNE",
"else": "$$KEEP"
}
}},
{ "$group": {
"_id": "$_id",
"Array1": { "$push": "$Array1" }
}}
])
In either case, you are essentially comparing the $last element found in the array with the whole array and removing that from the selection.
But personally, unless you need this type of operation for further aggregation, then do it in client code. Or wait for the next release of MongoDB where the new $slice operator makes this simple:
db.collection.aggregate([
{ "$project": {
"Array1": {
"$slice": [
"$Array1",
0,
{ "$subtract": [
{ "$size": "$Array1" },
1
]}
]
}
}}
])
All produce ( in varying forms, as with the "set" operation ) :
{
"_id" : ObjectId("55cb4ef04f67f8a950c7b8fa"),
"Array1" : [
"A",
"B",
"C",
"D"
]
}
{
"_id" : ObjectId("55cb4ef04f67f8a950c7b8fb"),
"Array1" : [
"A",
"B",
"C",
"X"
]
}
{
"_id" : ObjectId("55cb4ef04f67f8a950c7b8fc"),
"Array1" : [
"A",
"B",
"C",
"L"
]
}