grouping data in mongo using json data stored

grouping data in mongo using json data stored - mongodb

This is the data I have in my mongo db:
{
"_id": ObjectId("556d1c7716efd4a035d8e473"),
"products": [
{
"gtin": 77770000222313,
"gpc": 10000068
},
{
"gtin": 77770000222312,
"gpc": 10000068
}
]
}
How do I aggregate this so that I get gpc value and then an array under that of the gtins? Something like:
{
"gpc":10000068,
"gtin":[77770000222312,77770000222313]
}

Use aggregation framework
db.collection.aggregate(
[
{ $unwind: "$products" },
{ $group: { _id: "$products.gpc", gtin: { $push: "$products.gtin" }}},
{ $project: { gpc: "$_id", gtin: 1, _id: 0 }}
]
)

Related

How do I use $unwind and then $group in the same mongodb query

I have the following mongodb structure...
[
{
track: 'Newcastle',
time: '17:30',
date: '22/04/2022',
bookmakers: [
{
bookmaker: 'Coral',
runners: [
{
runner: 'John',
running: true,
odds: 3.2
},
...
]
},
...
]
},
...
]
I'm trying to find filter the bookmakers array for each document to only include the objects that match the specified bookmaker values, for example:
{ 'bookmakers.bookmaker': { $in: ['Coral', 'Bet365'] } }
At the moment, I'm using the following mongodb query to only select the bookmakers that are specified, however I need to put the documents back together after they've been seperated by the '$unwind', is there a way I can do this using $group?
await HorseRacingOdds.aggregate([
{ $unwind: "$bookmakers" },
{
$group: {
_id: "$_id",
bookmakers: "$bookmakers"
}
},
{
$project: {
"_id": 0,
"__v": 0,
"lastUpdate": 0
}
}
])

How about a plain $addFields with $filter?
db.collection.aggregate([
{
"$addFields": {
"bookmakers": {
"$filter": {
"input": "$bookmakers",
"as": "b",
"cond": {
"$in": [
"$$b.bookmaker",
[
"Coral",
"Bet365"
]
]
}
}
}
}
},
{
$project: {
"_id": 0,
"__v": 0,
"lastUpdate": 0
}
}
])
Here is the Mongo playground for your reference.

Find all objects that have duplicated values in Mongo DB

How can I return all objects from a collection where name is same in all objects?
For example in this case name: John
[
{
_id: 1,
name: "John",
last: "Smith"
},
{
_id: 8,
name: "John",
last: "Snow"
},
{
_id: 16,
name: "John",
last: "McKay"
},
]

you can use group in aggregate to return all data that have the same name
db.collection.aggregate([
{
"$group": {
"_id": "$name",
"orig": {
"$push": "$$ROOT"
}
}
},
{
"$addFields": {
"sizeOrig": {
$size: "$orig"
}
}
},
{
"$match": {
sizeOrig: {
$gt: 0
}
}
},
{
$unwind: "$orig"
},
{
"$replaceRoot": {
"newRoot": "$orig"
}
}
])
example : https://mongoplayground.net/p/DfTA6_pUaRA
but if you want just single data for each duplication , you need just do it by group
db.collection.aggregate([
{
"$group": {
"_id": "$name",
"orig": {
"$push": "$$ROOT"
}
}
}
])
https://mongoplayground.net/p/hd1z77cdtp0

you need to use "findAll({})" or just "find({})" with the collection name it will return an array object that will contain all the documents from the collection. For example if you have a collection or a model which is named "Employees" you need to do the following after connection to db.
Employees.find({});
This will return an array with all docs inside Employee collection. Once you have this returned you can iterate over it.

How to compute frequency for multiple fields using a single pipeline in MongoDB?

Is it possible to calculate the frequency of multiple fields with a single query in MongoDB? I can do that with separate $group stages for each field. How can I optimize it and build one pipeline that can do the job for all items?
I have the following pipeline in MongoDB 4.5
{
$match: {
field1: { $in: ['value1', 'value2'] },
field2: { $in: ['v1', 'v2'] },
}
},
{
$group: {
_id: {
field1: '$field1',
field2: '$field2'
},
frequency: { $sum: 1.0 }
}
}
From this, I obtain data like the following:
{
"_id": {
"field1": "value1",
"field2": "v1"
},
"count": 7.0
},
{
"_id": {
"field1": "value1",
"field2": "v2"
},
"count": 3.0
},
{
"_id": {
"field1": "value2",
"field2": "v1"
},
"count": 4.0
}
The result that I am trying to get is:
{
"field1": [
"value1": 10.0,
"value2": 4.0
],
"field2": [
"v1": 11.0,
"v2": 3.0
]
}

convert your required fields into array key-value format using $objectToArray
$unwind to deconstruct the above converted array
$group by key and value and count sum
$group by key and construct the array of value and count
$group by null and construct the array of field and above array after converting from $arrayToObject
$replaceToRoot to replace above array after converting from array to object
db.collection.aggregate([
{
$match: {
field1: { $in: ["value1", "value2"] },
field2: { $in: ["v1", "v2"] }
}
},
{
$project: {
arr: {
$objectToArray: {
fields1: "$field1",
fields2: "$field2"
}
}
}
},
{ $unwind: "$arr" },
{
$group: {
_id: {
k: "$arr.k",
v: "$arr.v"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
arr: {
$push: {
k: "$_id.v",
v: "$count"
}
}
}
},
{
$group: {
_id: null,
arr: {
$push: {
k: "$_id",
v: { $arrayToObject: "$arr" }
}
}
}
},
{ $replaceRoot: { newRoot: { $arrayToObject: "$arr" } } }
])
Playground

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])

you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.

i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]

Mongodb - aggregation $push if conditional

I am trying to aggregate a batch of documents. There are two fields in the documents I would like to $push. However, lets say they are "_id" and "A" fields, I only want $push "_id" and "A" if "A" is $gt 0.
I tried two approaches.
First one.
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
}])
But this will push a null value to "field" and I don't want it.
Second one.
db.collection.aggregate([{
"$group":
"field": {
"$cond":[
{"$gt",["$A", 0]},
{"$push": {"id":"$_id", "A":"$A"}},
null
]
},
"secondField":{"$push":"$B"}
}])
The second one simply doesn't work...
Is there a way to skip the $push in else case?
ADDED:
Expected documents:
{
"_id":objectid(1),
"A":2,
"B":"One"
},
{
"_id":objectid(2),
"A":3,
"B":"Two"
},
{
"_id":objectid(3),
"B":"Three"
}
Expected Output:
{
"field":[
{
"A":"2",
"_id":objectid(1)
},
{
"A":"3",
"_id":objectid(2)
},
],
"secondField":["One", "Two", "Three"]
}

You can use "$$REMOVE":
This system variable was added in version 3.6 (mongodb docs)
db.collection.aggregate([{
$group:{
field: {
$push: {
$cond:[
{ $gt: ["$A", 0] },
{ id: "$_id", A:"$A" },
"$$REMOVE"
]
}
},
secondField:{ $push: "$B" }
}
])
In this way you don't have to filter nulls.

This is my answer to the question after reading the post suggested by #Veeram
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
},
{
"$project": {
"A":{"$setDifference":["$A", [null]]},
"B":"$B"
}
}])

One more option is to use $filter operator:
db.collection.aggregate([
{
$group : {
_id: null,
field: { $push: { id: "$_id", A : "$A"}},
secondField:{ $push: "$B" }
}
},
{
$project: {
field: {
$filter: {
input: "$field",
as: "item",
cond: { $gt: [ "$$item.A", 0 ] }
}
},
secondField: "$secondField"
}
}])
On first step you combine your array and filter them on second step

$group: {
_id: '$_id',
tasks: {
$addToSet: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.id', ''],
},
'',
],
},
then: '$$REMOVE',
else: {
id: '$tasks.id',
description: '$tasks.description',
assignee: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.assignee._id', ''],
},
'',
],
},
then: undefined,
else: {
id: '$tasks.assignee._id',
name: '$tasks.assignee.name',
thumbnail: '$tasks.assignee.thumbnail',
status: '$tasks.assignee.status',
},
},
},
},
},
},
},
}