MongoDB groupby query - mongodb

I have colletions containing records like
{ "type" : "me", "tid" : "1" }
{ "type" : "me", "tid" : "1" }
{ "type" : "me", "tid" : "1" }
{ "type" : "you", "tid" : "1" }
{ "type" : "you", "tid" : "1" }
{ "type" : "me", "tid" : "2" }
{ "type" : "me", "tid" : "2"}
{ "type" : "you", "tid" : "2"}
{ "type" : "you", "tid" : "2" }
{ "type" : "you", "tid" : "2"}
I have want result like below
[
{"tid" : "1","me" : 3,"you": 2},
{"tid" : "2","me" : 2,"you": 3}
]
I have tried group and; aggregate queries doesn't get required result format.
below is the group query.
db.coll.group({
key: {tid : 1,type:1},
cond: { tid : { "$in" : [ "1","2"]} },
reduce: function (curr,result) {
result.total = result.total + 1
},
initial: { total : 0}
})
it result is like
[
{"tid" : "1", "type" : "me" ,"total": 3 },
{"tid" : "1","type" : "you" ,"total": 2 },
{"tid" : "2", "type" : "me" ,"total": 2 },
{"tid" : "2","type" : "you" ,"total": 3 }
]
following is aggregate query
db.coll.aggregate([
{$match : { "tid" : {"$in" : ["1","2"]}}},
{$group : { _id : {tid : "$tid",type : "$type"},total : {"$sum" : 1}}}
])
gives following result
{
"result" :
[
{"_id" : {"tid" : "1","type" : "me"},"total" : 3},
{"_id" : {"tid" : "2","type" : "me" },"total" : 2},
{"_id" : {"tid" : "2","type" : "you"},"total" : 3}
]
"ok" : 1
}
it is possible to obtain I specified result or I have to do some manipulation in my code.
Thanks

If you change your aggregation to this:
db.so.aggregate([
{ $match : { "tid" : { "$in" : ["1", "2"] } } },
{ $group : {
_id : { tid : "$tid", type : "$type" },
total : { "$sum" : 1 }
} },
{ $group : {
_id : "$_id.tid",
values: { $push: { type: "$_id.type", total: '$total' } }
} }
])
Then your output is:
{
"result" : [
{
"_id" : "1",
"values" : [
{ "type" : "you", "total" : 2 },
{ "type" : "me", "total" : 3 }
]
},
{
"_id" : "2",
"values" : [
{ "type" : "me", "total" : 2 },
{ "type" : "you", "total" : 3 }
]
}
],
"ok" : 1
}
Although that is not the same as what you want, it is going to be the closest that you can get. And in your application, you can easily pull out the values in the same was as with what you would like to get out of it.
Just keep in mind, that in general you can not promote a value (you, me) to a key — unless your key is of a limited set (3-4 items max).

Related

Mongodb - Return all the associative documents with value for the key derived from another query

I have a document of following structure:
{
"Type" : "Request",
"Cat" : "A",
"ID" : 10
}
{
"Type" : "Processed",
"Cat" : "A",
"ID" : 10
}
{
"Type" : "Receieved",
"Cat" : "A",
"ID" : 10
}
{
"Type" : "Receieved",
"Cat" : "B",
"ID" : 11
}
{
"Type" : "Processed",
"Cat" : "C",
"ID" : 12
}
I want documents:
Those documents with Type: "Processed" and get its ID
And all the associated documents with the ID got from above (1st step).
I need the results to be like this:
{
"Type" : "Request"
"Cat" : "A"
"ID" : 10
}
{
"Type" : "Processed"
"Cat" : "A"
"ID" : 10
}
{
"Type" : "Receieved"
"Cat" : "A"
"ID" : 10
}
{
"Type" : "Processed"
"Cat" : "C"
"ID" : 12
}
Can someone help me on how to achieve this ? I used elemmatch under $match in aggregate - but its not working as expected.
You can try something like
db.collection.aggregate([
{$project : {
"ID":1,
"doc.Type" : "$Type",
"doc.Cat" : "$Cat",
"doc.ID" : "$ID"
}
}
{$group : {
_id : "$ID",
docs : {$push : doc}
}
},
{$match : {
"docs.Type":"Processed"
}
},
{$unwind : "$docs"},
{$project : {
_id : 0,
docs : 0,
"Type" : "$docs.Type",
"Cat" : "$docs.Cat",
"ID" : "$docs.ID"
}
}
])

Mongodb : get whether a document is the latest with a field value and filter on the result

I am trying to port an existing SQL schema into Mongo.
We have document tables, with sometimes several times the same document, with a different revision but the same reference. I want to get only the latest revisions of the documents.
A sample input data:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-A",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:23:18.807Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-A",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:30:35.757Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
Given this data, I want this result set (sometimes I want only the last revision, sometimes I want all revisions with an attribute telling me whether it's the latest):
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X",
"lastrev" : true
}
I already have a bunch of filters, sorting, and skip/limit (for pagination of data), so the final result set should be mindful of these constraints.
The current "find" query (built with the .Net driver), which filters fine but gives me all revisions of each document:
coll.find(
{ "$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxxxx"] } }
] },
{ "_id" : 0, "Uid" : 1, "lastrev" : 1, "title" : 1, "code" : 1, "creator" : 1, "owner" : 1, "modificator" : 1, "status" : 1, "reference": 1, "creationdate": 1 }
).sort({ "creationdate" : 1 }).skip(0).limit(10);
Using another question, I have been able to build this aggregation, which gives me the latest revision of each document, but with not enough attributes in the result:
coll.aggregate([
{ $sort: { "creationdate": 1 } },
{
$group: {
"_id": "$reference",
result: { $last: "$creationdate" },
creationdate: { $last: "$creationdate" }
}
}
]);
I would like to integrating the aggregate with the find query.
I have found the way to mix aggregation and filtering:
coll.aggregate(
[
{ $match: {
"$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxx"] } }
]
}
},
{ $sort: { "creationdate": 1 } },
{ $group: {
"_id": "$reference",
"doc": { "$last": "$$ROOT" }
}
},
{ $sort: { "doc.creationdate": 1 } },
{ $skip: skip },
{ $limit: limit }
],
{ allowDiskUse: true }
);
For each result node, this gives me a "doc" node with the document data. It has too much data still (it's missing projections), but it's a start.
Translated in .Net:
FilterDefinitionBuilder<BsonDocument> filterBuilder = Builders<BsonDocument>.Filter;
FilterDefinition<BsonDocument> filters = filterBuilder.Empty;
filters = filters & (filterBuilder.Not(filterBuilder.Exists("deletedid")) | filterBuilder.Eq("deletedid", BsonNull.Value));
filters = filters & (filterBuilder.Not(filterBuilder.Exists("taskid")) | filterBuilder.Eq("taskid", BsonNull.Value));
foreach (var f in fieldFilters) {
filters = filters & filterBuilder.In(f.Key, f.Value);
}
var sort = Builders<BsonDocument>.Sort.Ascending(orderby);
var group = new BsonDocument {
{ "_id", "$reference" },
{ "doc", new BsonDocument("$last", "$$ROOT") }
};
var aggregate = coll.Aggregate(new AggregateOptions { AllowDiskUse = true })
.Match(filters)
.Sort(sort)
.Group(group)
.Sort(sort)
.Skip(skip)
.Limit(rows);
return aggregate.ToList();
I'm pretty sure there are better ways to do this, though.
You answer is pretty close. Instead of $last, $max is better.
About $last operator:
Returns the value that results from applying an expression to the last document in a group of documents that share the same group by a field. Only meaningful when documents are in a defined order.
Get the last revision in each group, see code below in mongo shell:
db.collection.aggregate([
{
$group: {
_id: '$reference',
doc: {
$max: {
"creationdate" : "$creationdate",
"code" : "$code",
"Uid" : "$Uid",
"status" : "$status",
"title" : "$title",
"creator" : "$creator"
}
}
}
},
{
$project: {
_id: 0,
Uid: "$doc.Uid",
status: "$doc.status",
reference: "$_id",
code: "$doc.code",
title: "$doc.title",
creationdate: "$doc.creationdate",
creator: "$doc.creator"
}
}
]).pretty()
The output as your expect:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
}

Delete portion of Sub Document Collection in Mongodb

I am trying to delete a section of sub documents from a collections..
Tried (and many others):
db.collection.remove( {'Segments': {$gte: '20150612141038' }} )
db.collection.remove( { 'Segments.$' : {$gte: '0150612141038' }} )
db.collection.deleteMany( { 'Segments.$' : {$gte: '0150612141038' }} )
db.collection.deleteOne( { 'Segments.$' : {$eq: '0150612141038' }} )
just can't figure out what I am doing wrong...
{
"_id" : ObjectId("56e32c5147e030bc0a000035"),
"Date" : "2015-06-12",
"UnitID" : "5",
"Segments" : {
"20150612141037" : {
"Parameters" : {
"647" : "0",
"649" : "16",
"653" : "0",
"655" : "0",
"656" : "0",
"658" : "98",
"664" : "447.0486",
"666" : "442.7083",
"677" : "122.8004",
}
},
"20150612141038" : {
"Parameters" : {
"658" : "96",
"664" : "451.3889",
"666" : "447.0486",
"677" : "122.7892"
}
},
"20150612141039" : {
"Parameters" : {
"658" : "44",
"664" : "442.7083",
"677" : "122.8004",
"704" : "1"
}
},
First of all I think your queries are wrong.
If you try this query to the mongoShell:
db.bios.find({'Segments': {$gte: '20150612141038'}})
you don't have anything as output because 20150612141038 is a document and not a field. Maybe you can reshape your schema to set 20150612141038 as a field and Segments as an array of documents:
{
"_id" : ObjectId("56e32c5147e030bc0a000035"),
"Date" : "2015-06-12",
"UnitID" : "5",
"Segments" : [
{
"segmentId" : "20150612141037",
"Parameters" : {
"647" : "0",
"649" : "16",
"653" : "0",
"655" : "0",
"656" : "0",
"658" : "98",
"664" : "447.0486",
"666" : "442.7083",
"677" : "122.8004"
}
},
{
"segmentId" : "20150612141038",
"Parameters" : {
"658" : "96",
"664" : "451.3889",
"666" : "447.0486",
"677" : "122.7892"
}
},
{
"segmentId" : "20150612141039",
"Parameters" : {
"658" : "44",
"664" : "442.7083",
"677" : "122.8004",
"704" : "1"
}
}
]
}
and then you can use $pull operator to remove documents from Segments array where the field Segments.segmentId has a certain value:
db.collection.update({},
{$pull:{Segments:{"segmentId":{$eq:"20150612141039"}}}},
{ multi: true}
)

MongoDB Aggregation - return default value for documents that don't match query

I'm having trouble figuring out the right aggregation pipe operations to return the results I need.
I have a collection similar to the following :-
{
"_id" : "writer1",
"Name" : "writer1",
"Website" : "website1",
"Reviews" : [
{
"Film" : {
"Name" : "Jurassic Park",
"Genre" : "Action"
},
"Score" : 4
},
{
"Technology" : {
"Name" : "Mad Max",
"Genre" : "Action"
},
"Score" : 5
}
]
}
{
"_id" : "writer2",
"Name" : "writer2",
"Website" : "website1",
"Reviews" : [
{
"Technology" : {
"Name" : "Mad Max",
"Genre" : "Action"
},
"Score" : 5
}
]
}
And this is my aggregation so far : -
db.writers.aggregate([
{ "$unwind" : "$Reviews" },
{ "$match" : { "Reviews.Film.Name" : "Jurassic Park" } },
{ "$group" : { "_id" : "$Website" , "score" : { "$avg" : "$Reviews.Score" },
writers :{ $push: { name:"$Name", score:"$Reviews.Score" } }
}}
])
This returns only writers who have a review of the matching film and also only websites that have at least 1 writer who has reviewed the film,
however, I need to return all websites containing a list of their all writers, with a score of 0 if they haven't written a review for the specified film.
so, I am currently getting : -
{ "_id" : "website1", "score" : 4, "writers" : [ { "name" : "writer1", "score" : 4 } ] }
When I actually need : -
{ "_id" : "website1", "score" : 2, "writers" : [ { "name" : "writer1", "score" : 4 },{ "name" :"writer2", "score" : 0 } ] }
Can anyone point me in the right direction?
Cheers

Mongodb Aggregation to count element pairs and individual elements

I have following data:
{ "id" : 1, "lsPairs" :[{"location" : "L0", "service" : "S0" }]}
{ "id" : 2, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"}]}
{ "id" : 3, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"}, {"location" : "L2", "service" : "S2"}]}
{ "id" : 4, "lsPairs" :[{"location" : "L0", "service" : "S0" },{"location" : "L1", "service" : "S1"},{"location" : "L2", "service" : "S2"}, {"location" : "L3", "service" : "S3"}]}`
I want to get location count, service count and (location,service) pair count
{ "_id" : "L3" , "count" : 1}
{ "_id" : "L2" , "count" : 2}
{ "_id" : "L1" , "count" : 3}
{ "_id" : "L0" , "count" : 4}
{ "_id" : "S3" , "count" : 1}
{ "_id" : "S2" , "count" : 2}
{ "_id" : "S1" , "count" : 3}
{ "_id" : "S0" , "count" : 4}
{ "_id" : { "loc" : "L2" , "srv" : "S2"} , "count" : 2}
{ "_id" : { "loc" : "L1" , "srv" : "S1"} , "count" : 3}
{ "_id" : { "loc" : "L3" , "srv" : "S3"} , "count" : 1}
{ "_id" : { "loc" : "L0" , "srv" : "S0"} , "count" : 4}`
Now I run group function three times, group different id.
Any idea for using one group to get these result?
You will need to deconstruct the array with $unwind then $group the documents.
collection.aggregate([
{ $unwind: "$lsPairs" },
{ $group: {
_id: {
"loc": "$lsPairs.location",
"srv": "$lsPairs.service"
},
"count": { $sum: 1 }
}}
])
Output
{ "_id" : { "loc" : "L3", "srv" : "S3" }, "count" : 1 }
{ "_id" : { "loc" : "L2", "srv" : "S2" }, "count" : 2 }
{ "_id" : { "loc" : "L1", "srv" : "S1" }, "count" : 3 }
{ "_id" : { "loc" : "L0", "srv" : "S0" }, "count" : 4 }
Keep the first round location-service pair to a collection and reused it.
db.locservice.aggregate([ {$unwind:"$lsPairs"},
{$group:{_id:"$lsPairs",count: { $sum: 1}}},
{$sort:{_id:1}},
{$out:"lsp"} ])
Take location from temp collection and group it.
db.lsp.aggregate([{$project:{_id:0, loc:"$_id.location", count:1}},
{$group:{_id:"$loc", cnt:{$sum:"$count"}}}, {$sort:{_id:1}} ])
Take service from temp collection and group it.
db.lsp.aggregate([{$project:{_id:0, srv:"$_id.service", count:1}},
{$group:{_id:"$srv", cnt:{$sum:"$count"}}}, {$sort:{_id:1}} ])
The following I add location and service to array, can I group two array same time
db.locservice.aggregate([ {$unwind:"$lsPairs"},
{$group:{_id:"$lsPairs",count: { $sum: 1},
locs:{$push:{item:"$lsPairs.location"}},
srvs:{$push:{item:"$lsPairs.service"}}}},
{$project:{count:1, locs:1, srvs:1}} ])
{ "_id" : { "location" : "L3", "service" : "S3" }, "count" : 1, "locs" : [ { "item" : "L3" } ], "srvs" : [ { "item" : "S3" } ] }
{ "_id" : { "location" : "L2", "service" : "S2" }, "count" : 2, "locs" : [ { "item" : "L2" }, { "item" : "L2" } ], "srvs" : [ { "item" : "S2" }, { "item" : "S2" } ] }
{ "_id" : { "location" : "L1", "service" : "S1" }, "count" : 3, "locs" : [ { "item" : "L1" }, { "item" : "L1" }, { "item" : "L1" } ], "srvs" : [ { "item" : "S1" }, { "item" : "S1" }, { "item" : "S1" } ] }
{ "_id" : { "location" : "L0", "service" : "S0" }, "count" : 4, "locs" : [ { "item" : "L0" }, { "item" : "L0" }, { "item" : "L0" }, { "item" : "L0" } ], "srvs" : [ { "item" : "S0" }, { "item" : "S0" }, { "item" : "S0" }, { "item" : "S0" } ] }