How to get single document with group that have duplicate key in MongoDB - mongodb

I have an aggregate as follows:
[
{
"$project" : {
"country_code" : "$country_code",
"event" : "$event",
"user_id" : "$user_id",
"os" : "$os",
"register_time" : "$register_time",
"channel" : "$channel"
}
},
{
"$match" : {
"channel" : "000001",
"register_time" : {
"$gt" : ISODate("2016-06-01T00:00:00Z"),
"$lt" : ISODate("2016-06-30T23:59:00Z")
},
"event" : "Register_with_number"
}
},
{
"$group" : {
"_id" : {
"country_code" : "$country_code",
"user_id" : "$user_id",
"os" : "$os",
"channel" : "$channel",
"register_time" : "$register_time"
},
"count" : {
"$sum" : 1
}
}
}
]
And the result is as follows: you can for the country_code with IN, two records are having same user_id but different register_time, how can I get only one record if the user_id is same.
{ "_id" : { "country_code" : "US", "user_id" : "d2a0fe91", "os" : "Android", "channel" : "000001", "register_time" : ISODate("2016-06-30T22:47:43Z") }, "count" : 1 }
{ "_id" : { "country_code" : "US", "user_id" : "77911591", "os" : "Android", "channel" : "000001", "register_time" : ISODate("2016-06-30T19:47:21Z") }, "count" : 1 }
{ "_id" : { "country_code" : "IN", "user_id" : "1b72fd12", "os" : "Android", "channel" : "000001", "register_time" : ISODate("2016-06-30T19:17:28Z") }, "count" : 1 }
{ "_id" : { "country_code" : "IN", "user_id" : "1b72fd12", "os" : "Android", "channel" : "000001", "register_time" : ISODate("2016-06-30T19:15:13Z") }, "count" : 1 }
{ "_id" : { "country_code" : "ID", "user_id" : "045f1637", "os" : "Android", "channel" : "000001", "register_time" : ISODate("2016-06-30T19:02:19Z") }, "count" : 1 }

There are several solutions, as you did not mention what the document should look like when there are several documents with the same user but different register_time.
The following changes your last $group stage so that it keeps an array of the register_time values with $push or - if you just need one - keeps any of them with $first. Note that when you sort your pipeline by register_time, you could use $first / $last to keep the first / last register_time per user which is perhaps your desired result.
"$group" : {
"_id" : {
"country_code" : "$country_code",
"user_id" : "$user_id",
"os" : "$os",
"channel" : "$channel",
},
"register_times" : {
$push: "$register_time"
},
"any_register_time" : {
$first: "$register_time"
},
"count" : {
"$sum" : 1
}
}

Related

Find element from document

i've passed a mysql database to mongoDB for a project. My db is about a pharmacy. I have a collection of factures, where which has the list of medicines sold. I'm trying to find the medicine which was sold the most.
{
"_id" : ObjectId("5c3c71f2760c4f47c701fe13"),
"cliente" : {
"tlmv" : "910987654",
"nome" : "Josefina Vivida da Paz",
"nif" : "122133144",
"pontos" : NumberLong(0),
"id" : NumberLong(2),
"pass" : "1eab06cab995dfeb32b6b7c709b8a6c62cabacfe",
"email" : "josefina#hotmail.pt"
},
"data_f" : ISODate("2018-06-03T00:00:01Z"),
"data_s" : ISODate("2018-06-02T23:55:59Z"),
"desconto" : 0,
"funcionario" : {
"tlmv" : "934567123",
"nome" : "Pedro Jorge Rito Lima",
"ordenado" : 800.32,
"iban" : "PT 50 2751 3262 76598707612",
"pass" : "3cfa1c281281ffe4f5db2ccfbe7a17f8a9479808",
"niss" : "14385639201",
"id" : NumberLong(2),
"cedula" : "54321"
},
"id" : NumberLong(15),
"id_c" : NumberLong(2),
"id_func" : NumberLong(2),
"medicamentos" : [
{
"categoria" : "Analg�sico",
"receita" : "N",
"des" : "Ben-U-Ron 500",
"qt" : 20,
"formato" : "granulado",
"qt_v" : NumberLong(1),
"pos" : "A12",
"lab" : "Laborat�rio do Rio Ave",
"preco_l" : 2.51,
"un" : "un",
"preco" : 2.51,
"preco_v" : 2.51,
"id" : NumberLong(1),
"stock" : NumberLong(21)
},
{
"categoria" : "Estatina",
"receita" : "S",
"des" : "Sinvastatina",
"qt" : 30,
"formato" : "comprimido",
"qt_v" : NumberLong(1),
"pos" : "K23",
"lab" : "Mylan",
"preco_l" : 16.45,
"un" : "un",
"preco" : 16.45,
"preco_v" : 16.45,
"id" : NumberLong(6),
"stock" : NumberLong(25)
}
],
"pontos_r" : NumberLong(10),
"pontos_u" : NumberLong(0),
"total" : 18.96
}
So my objective is to count every medicine -"medicamento"- sorted by different descriptions-"des". Similiar to Count on mysql. Any ideas how? The code above is abount 1 facture.
You need $unwind to get a medicine per document and then $group with $sum to get count per medicine, try:
db.collection.aggregate([
{
$unwind: "$medicamentos"
},
{
$group: {
_id: "$medicamentos.des",
count: { $sum: 1 }
}
}
])

How to query in mongodb to get distinct record with count

I have collection who's name is transactions.
I'm sharing the object of transactions collection
{
"_id" : ObjectId("58aaec83f1dc6914082afe31"),
"amount" : "33.00",
"coordinates" : {
"lat" : "4.8168",
"lon" : "36.4909"
},
"cuisine" : "Mexican",
"date" : ISODate("0062-02-22T11:46:52.738+05:30"),
"location" : {
"address" : "2414 Trudie Rue",
"city" : "West Alisa",
"state" : "New York",
"zip" : "10000"
},
"place_name" : "Outdoors",
"place_type" : "Wooden"
},
{
"_id" : ObjectId("58aaec83f1dc6914082afe32"),
"amount" : "557.00",
"coordinates" : {
"lat" : "-36.6784",
"lon" : "131.3698"
},
"cuisine" : "Australian",
"date" : ISODate("1294-10-04T19:53:15.562+05:30"),
"location" : {
"address" : "5084 Buckridge Cove",
"city" : "Sylviaview",
"state" : "Hawaii",
"zip" : "51416-6918"
},
"place_name" : "Toys",
"place_type" : "Cotton"
},
{
"_id" : ObjectId("58aaec83f1dc6914082afe33"),
"amount" : "339.00",
"coordinates" : {
"lat" : "45.1468",
"lon" : "91.4097"
},
"cuisine" : "Mexican",
"date" : ISODate("1568-11-25T02:54:53.046+05:30"),
"location" : {
"address" : "94614 Harry Island",
"city" : "Cartwrightside",
"state" : "Louisiana",
"zip" : "18825"
},
"place_name" : "Clothing",
"place_type" : "Frozen"
},
{
"_id" : ObjectId("58aaec83f1dc6914082afe34"),
"amount" : "173.00",
"coordinates" : {
"lat" : "-57.2738",
"lon" : "19.6381"
},
"cuisine" : "Australian",
"date" : ISODate("0804-05-07T03:00:07.724+05:30"),
"location" : {
"address" : "1933 Lewis Street",
"city" : "Aufderharville",
"state" : "Louisiana",
"zip" : "23416"
},
"place_name" : "Beauty",
"place_type" : "Fresh"
},
{
"_id" : ObjectId("58aaec83f1dc6914082afe34"),
"amount" : "173.00",
"coordinates" : {
"lat" : "-57.2738",
"lon" : "19.6381"
},
"cuisine" : "Australian",
"date" : ISODate("0804-05-07T03:00:07.724+05:30"),
"location" : {
"address" : "1933 Lewis Street",
"city" : "Aufderharville",
"state" : "Louisiana",
"zip" : "23416"
},
"place_name" : "Beauty",
"place_type" : "Fresh"
}
I want to get the list of distinct cuisine with total count
Output
{
"name" : 'Mexican',
"count" : '2'
},
{
"name" : 'Australian',
"count" : '3'
},
I could have done easily with mysql but I dot know in mongodb as I'm new with mongodb
I have tried with the example and I found nothing:
db.transactions.aggregate(
{$group: {_id:'$cuisine'},count:{$sum:1}}
).result;
Please try the code below. You should group by cuisine the records and get the count of them. Later in project pipeline you can define the final look.
db.transactions.aggregate([
{ $group: { _id: "$cuisine", count: { $sum: 1 } } },
{ $project:{ _id: 0, name: "$_id", count:"$count" } }
]);

mongodb sort with skip and limit not sort the record according to index

I am trying to do pagination with Mongo using skip and limit.
I want to get the page records sorted by register_time. In the database, the records are sorted by index and not by register_time.
How can I make multiple pages of records (multiple skips) follow the same register_time order ?
db.collection.aggregate(
[ { "$project" : { "os" : "$os",
"register_time" : "$register_time",
"channel" : "$channel",
"event" : "$event",
"user_id" : "$user_id" } },
{ "$match" : { "register_time" :
{ "$gt" : ISODate("2016-06-23T00:00:00Z"),
"$lt" : ISODate("2050-06-25T23:59:00Z") },
"event" : "Register_with_number",
"channel" : "001" } },
{ "$group" : { "_id" :
{ "register_time" : "$register_time",
"user_id" : "$user_id",
"os" : "$os",
"channel" : "$channel" },
"count" : { "$sum" : 1 } } },
{"$skip":4},
{"$limit":10},
{ "$sort" : {"_id.register_time" : -1 } } ])
And here is the skip result
{ "_id" : { "register_time" : ISODate("2016-06-24T08:49:36Z"), "user_id" : "65675f96", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T06:29:56Z"), "user_id" : "f61d0572", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T04:13:31Z"), "user_id" : "d7d1349d", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T03:40:13Z"), "user_id" : "ecea2908", "os" : "Android", "channel" : "040401" }, "count" : 1 }
And here is the second skip result:
{ "_id" : { "register_time" : ISODate("2016-06-24T09:05:13Z"), "user_id" : "6fde06a6", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T07:47:46Z"), "user_id" : "1e5e5712", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T05:34:55Z"), "user_id" : "47dfaa32", "os" : "Android", "channel" : "040401" }, "count" : 1 }
{ "_id" : { "register_time" : ISODate("2016-06-24T05:15:03Z"), "user_id" : "70960ae2", "os" : "Android", "channel" : "040401" }, "count" : 1 }
As you can see, the two page's register_time are not following the same order.
How can I make different pages (skips) follow one consistent order? Thanks.
you need to move $sort before $limit and $skip
as doing $limit you are limiting random documents from collection and next pipeline entries are based on random input
The order matters.
Remember that $sort must be before $skip and $limit. If sort is after them, weird results might be returned.
And if you want to add lookups, the $lookup element should be at the end.
Example:
aggregate([
{ "$match" : { "type" : "xyz" } },
{ "$sort" : { "createdDateTime" : -1 } },
{ "$skip" : 50 },
{ "$limit" : 10 },
{ "$lookup" : {....

Mongodb Aggregation to count element pairs and individual elements

I have following data:
{ "id" : 1, "lsPairs" :[{"location" : "L0", "service" : "S0" }]}
{ "id" : 2, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"}]}
{ "id" : 3, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"}, {"location" : "L2", "service" : "S2"}]}
{ "id" : 4, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"},{"location" : "L2", "service" : "S2"}, {"location" : "L3", "service" : "S3"}]}`
I want to get location count, service count and (location,service) pair count
{ "_id" : "L3" , "count" : 1}
{ "_id" : "L2" , "count" : 2}
{ "_id" : "L1" , "count" : 3}
{ "_id" : "L0" , "count" : 4}
{ "_id" : "S3" , "count" : 1}
{ "_id" : "S2" , "count" : 2}
{ "_id" : "S1" , "count" : 3}
{ "_id" : "S0" , "count" : 4}
{ "_id" : { "loc" : "L2" , "srv" : "S2"} , "count" : 2}
{ "_id" : { "loc" : "L1" , "srv" : "S1"} , "count" : 3}
{ "_id" : { "loc" : "L3" , "srv" : "S3"} , "count" : 1}
{ "_id" : { "loc" : "L0" , "srv" : "S0"} , "count" : 4}`
Now I run group function three times, group different id.
Any idea for using one group to get these result?
You will need to deconstruct the array with $unwind then $group the documents.
collection.aggregate([
{ $unwind: "$lsPairs" },
{ $group: {
_id: {
"loc": "$lsPairs.location",
"srv": "$lsPairs.service"
},
"count": { $sum: 1 }
}}
])
Output
{ "_id" : { "loc" : "L3", "srv" : "S3" }, "count" : 1 }
{ "_id" : { "loc" : "L2", "srv" : "S2" }, "count" : 2 }
{ "_id" : { "loc" : "L1", "srv" : "S1" }, "count" : 3 }
{ "_id" : { "loc" : "L0", "srv" : "S0" }, "count" : 4 }
Keep the first round location-service pair to a collection and reused it.
db.locservice.aggregate([ {$unwind:"$lsPairs"},
{$group:{_id:"$lsPairs",count: { $sum: 1}}},
{$sort:{_id:1}},
{$out:"lsp"} ])
Take location from temp collection and group it.
db.lsp.aggregate([{$project:{_id:0, loc:"$_id.location", count:1}},
{$group:{_id:"$loc", cnt:{$sum:"$count"}}}, {$sort:{_id:1}} ])
Take service from temp collection and group it.
db.lsp.aggregate([{$project:{_id:0, srv:"$_id.service", count:1}},
{$group:{_id:"$srv", cnt:{$sum:"$count"}}}, {$sort:{_id:1}} ])
The following I add location and service to array, can I group two array same time
db.locservice.aggregate([ {$unwind:"$lsPairs"},
{$group:{_id:"$lsPairs",count: { $sum: 1},
locs:{$push:{item:"$lsPairs.location"}},
srvs:{$push:{item:"$lsPairs.service"}}}},
{$project:{count:1, locs:1, srvs:1}} ])
{ "_id" : { "location" : "L3", "service" : "S3" }, "count" : 1, "locs" : [ { "item" : "L3" } ], "srvs" : [ { "item" : "S3" } ] }
{ "_id" : { "location" : "L2", "service" : "S2" }, "count" : 2, "locs" : [ { "item" : "L2" }, { "item" : "L2" } ], "srvs" : [ { "item" : "S2" }, { "item" : "S2" } ] }
{ "_id" : { "location" : "L1", "service" : "S1" }, "count" : 3, "locs" : [ { "item" : "L1" }, { "item" : "L1" }, { "item" : "L1" } ], "srvs" : [ { "item" : "S1" }, { "item" : "S1" }, { "item" : "S1" } ] }
{ "_id" : { "location" : "L0", "service" : "S0" }, "count" : 4, "locs" : [ { "item" : "L0" }, { "item" : "L0" }, { "item" : "L0" }, { "item" : "L0" } ], "srvs" : [ { "item" : "S0" }, { "item" : "S0" }, { "item" : "S0" }, { "item" : "S0" } ] }

Aggregation framework performance on a 10M collection

I have a collection of 10M documents, that is a pre-aggregation of daily events.
A simple $group took more than 8s, is this performance normal ?
Some date from the profiler :
{
"op" : "command",
"ns" : "analytics.$cmd",
"command" : {
"aggregate" : "aggregation",
"pipeline" : [
{
"$group" : {
"_id" : "",
"hits" : {
"$sum" : "$hits"
}
}
}
]
},
"ntoreturn" : 1,
"keyUpdates" : 0,
"numYield" : 15,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(17169805),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(8582619),
"w" : NumberLong(294)
}
},
"responseLength" : 78,
"millis" : 8594,
"ts" : ISODate("2013-12-04T15:57:38.217Z"),
"client" : "127.0.0.1",
"allUsers" : [ ],
"user" : ""
}
Here is one single document
{
"_id" : ObjectId("529e21ee67e807418500daeb"),
"date" : ISODate("2012-09-19T00:00:00Z"),
"hits" : 1,
"infos" : {
"sourceValue" : NumberLong(1),
"eventType" : "createUser",
"sourceType" : "user",
"instance" : "xxx",
"targetType" : "user",
"targetValue" : NumberLong(15)
}
}