Query that combines $project, $unwind & $group - mongodb

I have the following data set:
{
"_id" : ObjectId("57684f2b61f2af6d49fa6dbd"),
"firstname" : "First1",
"surname" : "Sur1",
"email" : "first1#sur1.com",
"goals" : [
{
"gId" : "base1",
"recordDate" : ISODate("2016-06-21T20:05:48.972Z")
},
{
"gId" : "base2",
"recordDate" : ISODate("2016-06-21T20:05:48.972Z")
},
{
"gId" : "base1",
"recordDate" : ISODate("2016-06-21T20:05:48.972Z")
}
]
}
I need the following result:
{
"_id" : ObjectId("57684f2b61f2af6d49fa6dbd"),
"firstname" : "First1",
"surname" : "Sur1",
"email" : "first1#sur1.com",
"goals" : [
{
"gId" : "base1",
"count" : 2
},
{
"gId" : "base2",
"count" : 1
}
]
}
So far I played around with the aggregate query but I couldn't find the solution to my problem. My query looks like that but it doesn't work. The first bit $project runs fine on its own and so does $unwind and $group but I don't know how I can combine it together.
db.getCollection('users').aggregate(
{
$project : {
firstname: "$firstname",
surname: "$surname",
email: "$email",
goals: "$goals"
}
},
{
$unwind: '$goals'
},
{
$group: {
gid: '$goals.gId',
count: {'$sum': 1}
}
}
)
Thanks in advance,
Tom

Try it with the following pipeline
db.getCollection('users').aggregate(
{
$unwind: '$goals'
},
{
$group: {
_id: {
firstname: "$firstname",
surname: "$surname",
email: "$email",
gId: "$goals.gId"
},
count: {'$sum': 1}
}
},
{
$group: {
_id: {
firstname: "$_id.firstname",
surname: "$_id.surname",
email: "$_id.email"
},
goals: {
$push: {
gId: "$_id.gId",
count: "$count"
}
}
}
},
{
$project: {
_id: 0,
firstname: "$_id.firstname",
surname: "$_id.surname",
email: "$_id.email",
goals: 1
}
}
)
Result looks like this
{
"goals" : [
{
"gId" : "base2",
"count" : 1.0
},
{
"gId" : "base1",
"count" : 2.0
}
],
"firstname" : "First1",
"surname" : "Sur1",
"email" : "first1#sur1.com"
}

Related

I'm having trouble with a mongodb function that should be finding duplicates

I have the following function that is supposed to be returning the _id value of duplicates based on the email key. However, no matter what I've tried, I can't get the function to return anything other than any empty object. What am I missing here? Or is there a better approach I should be considering?
var duplicates = [];
db.medicallists
.aggregate([
{
$group: {
_id: {
email: "$email"
},
duplicate_ids: { $addToSet: "$_id" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
},
], { allowDiskUse: true })
.forEach(function(doc) {
doc.duplicate_ids.shift(); // First element skipped for deleting
doc.duplicate_ids.forEach(function(dupId) {
duplicates.push(dupId); // Getting all duplicate ids
});
});
printjson(duplicates);
EDIT:
Here is a sample document:
{
_id : 5a2fed0c8023cf7ea2346067,
primary_spec : "Addiction Medicine",
first_name : "John",
last_name : "Sample",
city : "Las Vegas",
state : "NV",
phone : "1111111111",
fax : "1111111111",
email : "sample#aol.com"
}
I have tested your specific query on a similar data set and it works fine. printjson(duplicates); prints out all the duplicate ids.
Also this will, in fact, remove all the duplicate entries based on email:
db.collection.aggregate([
{
$group: {
_id: {
email: "$email"
},
duplicate_ids: {
$push: "$_id"
},
count: {
$sum: 1
}
}
},
{
$match: {
count: {
$gt: 1
}
}
},
]).forEach(function(doc){
doc.duplicate_ids.shift();
db.collection.remove({
_id: {
$in: doc.duplicate_ids
}
});
})
My starting set was:
{
"_id" : ObjectId("6014331de1ef9ab1f708ddd9"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708ddda"),
"item" : "card",
"email" : "eee#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddb"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddc"),
"item" : "card",
"email" : "aaa#yahoo.com"
}
After running the query, it turned to:
{
"_id" : ObjectId("6014331de1ef9ab1f708ddd9"),
"item" : "card",
"email" : "zzz#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708ddda"),
"item" : "card",
"email" : "eee#yahoo.com"
}
{
"_id" : ObjectId("6014331de1ef9ab1f708dddc"),
"item" : "card",
"email" : "aaa#yahoo.com"
}
Tested on MongoDB server version: 4.2.7

Mongo Db query to get distinct records

I have below collections in DB around 1 million records. Hpw to get distinct eventID and eventName
from the collections in D for any particular date like 29-07-2020?
{
"_id" : 1814099,
"eventId" : "LAS012",
"eventName" : "CustomerTab",
"timeStamp" : ISODate("2018-12-31T20:09:09.820Z"),
"eventMethod" : "click",
"resourceName" : "CustomerTab",
"targetType" : "",
"resourseUrl" : "",
"operationName" : "",
"functionStatus" : "",
"results" : "",
"pageId" : "CustomerPage",
"ban" : "290824901",
"jobId" : "87377713",
"wrid" : "87377713",
"jobType" : "IBJ7FXXS",
"Uid" : "sc343x",
"techRegion" : "W",
"mgmtReportingFunction" : "N",
"recordPublishIndicator" : "Y",
"__v" : 0
}
You can use distinct, for example to fetch unique eventID:
let eventIds = await db.collection.distinct('eventID', {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
})
If you want to retrieve both fields at the same time you'll have to use an aggregation:
db.collection.aggregate([
{
$match: {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
}
},
{
$facet: {
eventIds: [
{
$group: {
_id: "$eventID"
}
}
],
eventName: [
{
$group: {
_id: "$eventName"
}
}
]
}
}
])
And if eventID and eventName are linked to one another:
db.collection.aggregate([
{
$match: {
"timeStamp": {
$gte: ISODate("2018-12-30T00:00:00.000Z"),
$lt: ISODate("2018-12-31T00:00:00.000Z")
}
}
},
{
$group: {
_id: {eventID: "$eventID", eventName: "$eventName"}
}
}
])

Get average per year and label?

I can use this query to get the average sqmPrice for a myArea
db.getCollection('sold').aggregate([
{$match:{}},
{$group: {_id: "$myArea", "sqmPrice": {$avg: "$sqmPrice"} }}
])
Output:
[
{
"_id" : "Yttre Aspudden",
"sqmPrice" : 48845.7777777778
},
{
"_id" : "Hägerstensåsen",
"sqmPrice" : 120
}
]
I would like to group this by year, ideally an object that looks like this:
{
"Yttre Aspudden": {
2008: 1232,
2009: 1244
...
}
...
}
but the formatting is not the most important.
Here is a sample object, I would like to use soldDate:
{
"_id" : ObjectId("5beca41c78f21248ab47f4a6"),
"location" : {
"address" : {
"streetAddress" : "Ljusstöparbacken 26C"
},
"position" : {
"latitude" : 59.31427884,
"longitude" : 18.00892421
},
"namedAreas" : [
"Hägersten-Liljeholmen"
],
"region" : {
"municipalityName" : "Stockholm",
"countyName" : "Stockholms län"
},
"distance" : {
"ocean" : 3777
}
},
"listPrice" : 1895000,
"rent" : 1959,
"floor" : 1,
"livingArea" : 38.5,
"source" : {
"name" : "Fastighetsbyrån",
"id" : 1573,
"type" : "Broker",
"url" : "http://www.fastighetsbyran.se/"
},
"rooms" : 1.5,
"published" : ISODate("2018-11-02T20:55:19.000Z"),
"constructionYear" : 1959,
"objectType" : "Lägenhet",
"booliId" : 3278478,
"soldDate" : ISODate("2018-11-14T00:00:00.000Z"),
"soldPrice" : 2620000,
"soldPriceSource" : "bid",
"url" : "https://www.booli.se/annons/3278478",
"publishedDays" : 1735,
"soldDays" : 1747,
"daysUp" : 160,
"street" : "Ljusstöparbacken",
"streetYear" : "Ljusstöparbacken Hägersten-Liljeholmen 1959",
"yearDay" : 318,
"yearWeek" : 46,
"roughSize" : 40,
"sqmPrice" : 49221,
"myArea" : "Gröndal",
"hotlist" : true
}
You need to generate your keys dynamically so you have to use $arrayToObject. To build an object which aggregates the data you need three $group stages and to create new root of your document you can use $replaceRoot, try:
db.sold.aggregate([
{ $group: {_id: { area: "$myArea", year: { $year: "$soldDate" } }, "sqmPrice": {$avg: "$sqmPrice"} }},
{ $group: { _id: "$_id.area", avgs: { $push: { k: { $toString: "$_id.year" }, v: "$sqmPrice" } } } },
{ $group: { _id: null, areas: { $push: { k: "$_id", v: { $arrayToObject: "$avgs" } } } } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$areas" } } }
])

MongoDB $group will not allow me to $project extra fields

I almost got this one working, but I simply cannot figure out why the $project part does not work for normal fields....
This is "invoice" table:
{
"_id" : "AS6D0",
"invoiceNumber" : 23,
"bookingId" : "AS6D0",
"createDate" : 1490369414,
"dueDate" : 1490369414,
"invoiceLines" : [
{
"lineText" : "Rent Price",
"amountPcs" : "8 x 7500",
"amountTotal" : 60000
},
{
"lineText" : "Discount(TIKO10)",
"amountPcs" : "10%",
"amountTotal" : -10000
},
{
"lineText" : "Final cleaning",
"amountPcs" : "1 x 5000",
"amountTotal" : 5000
},
{
"lineText" : "Reservation fee paid already",
"amountPcs" : "1 x -20000",
"amountTotal" : -20000
}
],
"managerId" : "4M4KE"
}
And this is my query
db.getCollection('invoice').aggregate([
{
$match: {
bookingId: "AS6D0"
}
},
{
$unwind: "$invoiceLines"
},
{
$group: {
_id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"}
}
},
{
$project:{
"_id" : 0,
"invoiceNumber" : 1,
"dueDate" : 1,
"sum" : 1
}
}
])
I get the _id and the sum, but it wont show invoiceNumber and dueDate
You could use a trick like this :
db.getCollection('invoice').aggregate([
{ $match: { } },
{ $unwind: "$invoiceLines" },
{ $group: { _id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"},
invoiceNumber: { $addToSet: "$invoiceNumber" },
dueDate: { $addToSet: "$dueDate" } } }
]);
Thanks to Mateo, this is what I ended up with:
(I do the unwind on the fields to avoid single value arrays)
Update : You don't have to $addToSet to reduce the fields into single value arrays and $unwind. Use $first instead.
db.getCollection('invoice').aggregate([
{
$match: {
bookingId: "AS6D0"
}
},
{
$unwind: "$invoiceLines"
},
{
$group: {
_id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"},
invoiceNumber: { $first: "$invoiceNumber" },
dueDate: { $first: "$dueDate" }
}
},
{
$project:{
"_id" : 0,
"invoiceNumber" : 1,
"dueDate" : 1,
"sum" : 1
}
}
])

MongoDb get distinct items after grouping

I'm using mongodb with the following collection sample
{
"_id" : ObjectId("5703750ca9c436386c4814c9"),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(1),
"created_date" : ISODate("2015-10-03T03:52:03.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814ca"),
"s_id" : NumberLong(132919),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2016-03-18T17:13:43.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814cb"),
"s_id" : NumberLong(215283),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:33.000Z")
}
,
{
"_id" : ObjectId("5703750ca9c436386c4814cc"),
"s_id" : NumberLong(360888),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:41.000Z")
}
This is my aggregation pipeline
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
I need to get distinct s_id in activity_log_docs.
here is a screenshot for the result,
screen shot for the result
i need to avoid duplicated s_id in activity_log_docs array, so i will get distinct s_id
I think something like this should do :
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id" ,
s_id:"$s_id"
},
friend_id: {$first:"$friend_id"}}},
playlist_id: {$first:"$playlist_id"}}},
created_date: {$first:"$created_date"}}},
{ $group: {
_id: {
user_id: "$_id.user_id",
activitytype_id: "$_id.activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$_id.s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
But please double check your own field's name.