MongoDB query to break ties and remove duplicates - mongodb

I have documents which have a Version, URL, and DateAdded field (among others but these are the relevant ones).
I'd like to find all documents where the Version is "5.5" and the DateAdded is less than or equal to January 1, 2013. That's pretty straightforward, but I also want the following behavior:
If two or more documents have the same URL, only return the one with the most recent DateAdded (provided, again, that is is less than or equal to January 1, 2013). It would be great if all of this could be expressed in a single query (but my main concern is performance).
I've been doing this last bit of filtering in my client code (outside of MongoDB) but this ends up being inefficient, not to mention inelegant.
I've also tried using Mongo's MapReduce functionality to accomplish the same thing but this is extremely slow, as it appears to copy much of my collection to another collection.
Is there a performant solution?

This should do the trick.
Example data:
db.foo.insert({ "_id" : ObjectId("528bd5bded29286a62959513"), "Version" : "5.3", "URL" : "foo.bar.com/asdfwoaef", "DateAdded" : ISODate("2012-10-05T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd5e8ed29286a62959514"), "Version" : "5.6", "URL" : "foo.bar.com/asdfwoaef", "DateAdded" : ISODate("2012-12-05T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd621ed29286a62959515"), "Version" : "5.5", "URL" : "foo.bar.com/aafoobbb", "DateAdded" : ISODate("2012-11-04T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd629ed29286a62959516"), "Version" : "5.5", "URL" : "foo.bar.com/aafoobbb", "DateAdded" : ISODate("2012-11-05T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd642ed29286a62959517"), "Version" : "5.5", "URL" : "foo.bar.com/aafoobbb", "DateAdded" : ISODate("2013-01-02T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd744ed29286a62959518"), "Version" : "5.5", "URL" : "foo.bar.com/ccbarcc", "DateAdded" : ISODate("2013-01-02T00:00:00Z") })
db.foo.insert({ "_id" : ObjectId("528bd780ed29286a62959519"), "Version" : "5.5", "URL" : "foo.bar.com/ccbarcc", "DateAdded" : ISODate("2012-04-05T00:00:00Z") })
Pipeline:
pipeline = [
{
"$match" : {
"Version" : "5.5",
"DateAdded" : {
"$lt" : ISODate("2013-01-01T00:00:00Z")
}
}
},
{
"$sort" : {
"URL" : 1,
"DateAdded" : -1
}
},
{
"$group" : {
"_id" : "$URL",
"doc" : {
"$first" : {
"id" : "$_id",
"DateAdded" : "$DateAdded"
}
}
}
}
]
db.foo.aggregate(pipeline)
And here is the result:
{
"result" : [
{
"_id" : "foo.bar.com/ccbarcc",
"doc" : {
"id" : ObjectId("528bd780ed29286a62959519"),
"DateAdded" : ISODate("2012-04-05T00:00:00Z")
}
},
{
"_id" : "foo.bar.com/aafoobbb",
"doc" : {
"id" : ObjectId("528bd629ed29286a62959516"),
"DateAdded" : ISODate("2012-11-05T00:00:00Z")
}
}
],
"ok" : 1
}

Related

MongoDB between date range query - nested property

How to write query to return object between dates? Query should search nested property. I use $gte and $lte but it doesn't seem to work as I expected. I want to return 'task' object which has got history.startTime between two dates.
db.tasks.find({'history.startTime' : { '$gte': ISODate("2017-02-04T00:00:00.000Z"), '$lt': ISODate("2017-02-05T23:00:00.000Z")} }).pretty()
{
"_id" : ObjectId("588f53c5d00baa2558fd56ae"),
"desc" : "test3",
"category" : "Category1",
"project" : "Project1",
"_creator" : "582afb3800c1bc1f203edf39",
"history" : [
{
"startTime" : ISODate("2017-02-06T11:49:42.570Z"),
"stopTime" : ISODate("2017-02-06T11:49:45.725Z"),
"_id" : ObjectId("589862d9449b4629f8dbaba7"),
"dt" : 3.155
},
{
"startTime" : ISODate("2017-02-06T08:53:53.086Z"),
"stopTime" : ISODate("2017-02-06T11:47:58.098Z"),
"_id" : ObjectId("5898626e449b4629f8dbaba6"),
"dt" : 10445.012
},
{
"startTime" : ISODate("2017-01-30T15:30:46.287Z"),
"stopTime" : ISODate("2017-01-30T15:32:52.979Z"),
"_id" : ObjectId("588f5c2cd00baa2558fd56b0"),
"dt" : 126.692
},
{
"startTime" : ISODate("2017-01-30T13:55:09.738Z"),
"stopTime" : ISODate("2017-01-30T14:55:13.974Z"),
"_id" : ObjectId("588f53d1d00baa2558fd56af"),
"dt" : 3604.236
}
],
"isCompleted" : false,
"isPerforming" : false,
"duration" : 14179.095000000001,
"updated" : ISODate("2017-02-06T11:49:45.725Z"),
"creationDate" : ISODate("2017-01-30T14:55:01.045Z"),
"__v" : 4
}
It is an array. Your query won't work. You have to use $elemMatch.
db.tasks.find({
'history': {
$elemMatch: {
startTime: {
$gte: ISODate("2017-02-04T00:00:00.000Z"),
$lte: ISODate("2017-02-05T23:00:00.000Z")
}
}
}
});

Mongo query to return latest entry

Hi I have the following data structure in mongodb:
{
"_id" : "4087322f-1ad0-4595-935f-b41ef6d87306",
"lastModifiedDate" : ISODate("2016-11-22T20:48:05.904Z"),
"notes" : " I do not like him that much",
"peopleId" : "1121",
"status" : "asses"
}
I wish to query the data such that it will return the latest value for each peopleId which also matches a chosen status. I've been looking into $group however doing the equal status is causing some issue.
I have got as far as grouping the values by peopleId using:
.aggregate([
{$group : {_id : "$peopleId", "allData" : {$push : "$$ROOT"}}},
{$sort : { lastModifiedDate : -1}}
]).pretty()
To give give me these results:
{
"_id" : "1123",
"allData" : [
{
"_id" : "c9d6a6ce-104d-414a-8d89-512e556d7aba",
"lastModifiedDate" : ISODate("2016-11-22T20:55:35.662Z"),
"notes" : "He's in!",
"peopleId" : "1123",
"status" : "done"
},
{
"_id" : "14e522d8-6cae-42ee-ad52-d5cf3cae0c29",
"lastModifiedDate" : ISODate("2016-11-22T20:56:46.128Z"),
"notes" : "He's in!",
"peopleId" : "1123",
"status" : "asses"
}
]
}
{
"_id" : "1121",
"allData" : [
{
"_id" : "d2c3e5da-8696-4dcc-a5cd-1f2657f0192c",
"lastModifiedDate" : ISODate("2016-11-22T20:46:35.097Z"),
"notes" : " I do not like him",
"peopleId" : "1121",
"status" : "pending"
},
{
"_id" : "4087322f-1ad0-4595-935f-b41ef6d87306",
"lastModifiedDate" : ISODate("2016-11-22T20:48:05.904Z"),
"notes" : " I do not like him that much",
"peopleId" : "1121",
"status" : "asses"
},
{
"_id" : "be4fa5bd-da8c-4c1a-9010-6a86afd11dbc",
"lastModifiedDate" : ISODate("2016-11-22T20:49:27.268Z"),
"notes" : "He's in!\nTue Nov 22 12:49:27 PST 2016 He's TF OUT",
"peopleId" : "1121",
"status" : "done"
}
]
}
From this I need to extract the most recent entry based on lastModifiedDate and then check that is matches a given status.
Here are the solutions.
Solution 1:-
If you want all the fields in the collection in the output.
db.collection.aggregate([
{$match : {"status" : "asses"}},
{$sort : {peopleId : 1, lastModifiedDate : -1}},
{$group : {_id : "$peopleId", "allData" : {$push : "$$ROOT"}}},
{$project: {_id :0, allData : {$slice : ["$allData", 0, 1]}}}
]);
Output:-
{
"allData" : [
{
"_id" : ObjectId("5834c004ba41f1f22e600c7f"),
"lastModifiedDate" : ISODate("2016-11-24T20:48:05.904Z"),
"notes" : " I do not like him that much",
"peopleId" : "1",
"status" : "asses"
}
]
}
Solution 2:-
If you don't want all the fields from the collection in the output.
db.collection.aggregate([
{$match : {"status" : "asses"}},
{$sort : {peopleId : 1, lastModifiedDate : -1}},
{$group : {_id : "$peopleId", lastModDate : {$first : "$lastModifiedDate"}}}
])
Output:-
{
"_id" : "1",
"lastModDate" : ISODate("2016-11-24T20:48:05.904Z")
}
i created a collection with following docs
{ "_id" : "4087322f-1ad0-4595-935f-b41ef6d87306", "lastModifiedDate" : ISODate("2016-11-22T20:48:05.904Z"), "notes" : " I do not like him that much",
"peopleId" : "1121", "status" : "asses" }
{ "_id" : "4087322f-1ad0-4595-935f-b41ef6d87305", "lastModifiedDate" : ISODate("2016-10-22T20:48:05.904Z"), "notes" : " I do not like him that much",
"peopleId" : "1121", "status" : "asses" }
{ "_id" : "4087322f-1ad0-4595-935f-b41ef6d87304", "lastModifiedDate" : ISODate("2016-11-21T20:48:05.904Z"), "notes" : " I do not like him that much",
"peopleId" : "1121", "status" : "asses" }
and if i execute the following query
db.mel.find().sort({lastModifiedDate:-1}).limit(1)
it gives me
{ "_id" : "4087322f-1ad0-4595-935f-b41ef6d87306", "lastModifiedDate" : ISODate("2016-11-22T20:48:05.904Z"), "notes" : " I do not like him that much",
"peopleId" : "1121", "status" : "asses" }
which is latest as per the lastmodifeddate.

Inconsistent query results with embedded documents on MongoDB

I've got a collection called payments with an example of its document shown below:
{
"_id" : ObjectId("579b5ee817e3aaac2f0aebc1"),
"updatedAt" : ISODate("2016-07-29T11:04:01.209-03:00"),
"createdAt" : ISODate("2016-07-29T10:49:28.113-03:00"),
"createdBy" : ObjectId("5763f56010cd7b03008147d4"),
"contract" : ObjectId("578cb907f1575f0300d84d09"),
"recurrence" : [
{
"when" : ISODate("2016-05-29T11:03:45.606-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e2d"),
"transaction" : {
"createdAt" : ISODate("2016-05-29T11:03:45.608-03:00"),
"tid" : "9999999999999999B01A",
"status" : 4,
"code" : "00",
"message" : "Transação autorizada"
},
"status" : "PAGO"
},
{
"when" : ISODate("2016-06-29T11:03:45.608-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e2c"),
"transaction" : {
"createdAt" : ISODate("2016-06-29T11:03:45.608-03:00"),
"tid" : "9999999999999999B01A",
"status" : 4,
"code" : "00",
"message" : "Transação autorizada"
},
"status" : "PAGO"
},
{
"when" : ISODate("2016-07-29T11:03:45.608-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e2b"),
"status" : "ERRO",
"transaction" : {
"code" : "56",
"createdAt" : ISODate("2016-07-29T11:04:01.196-03:00"),
"message" : "Autorização negada",
"status" : 5,
"tid" : "1006993069000730B88A"
}
},
{
"when" : ISODate("2016-07-30T11:03:45.608-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e2a"),
"status" : "PENDENTE"
},
{
"when" : ISODate("2016-07-31T11:03:45.608-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e29"),
"status" : "PENDENTE"
},
{
"when" : ISODate("2016-08-01T11:03:45.608-03:00"),
"_id" : ObjectId("579b6241ea945e3631f64e28"),
"status" : "PENDENTE"
}
],
"status" : "PAGO",
"conditions" : {
"originalValue" : 7406.64,
"totalValue" : 7400,
"upfrontValue" : 1500,
"upfrontInstallments" : 3,
"balanceInstallments" : 9
},
"__v" : 0,
"transaction" : {
"code" : "00",
"createdAt" : ISODate("2016-07-29T10:49:46.610-03:00"),
"message" : "Transação autorizada",
"status" : 6,
"tid" : "1006993069000730AF5A"
}
}
If I run the query below, I get the desired document shown above:
db.payments.find({ "recurrence.transaction.tid": "1006993069000730B88A" })
However, if I run this other query, MongoDB returns my entire collection (presumably because it didn't match the subdocument's id):
db.payments.find({ "recurrence._id": ObjectId("579b6241ea945e3631f64e2b") })
Both queries should return the same result! I also checked some other questions including this one so unless I'm going crazy I'm doing the same thing. Not sure why the inconsistent results though.
Tryout this:
db.payments.find({ recurrence : { $elemMatch: { "transaction.tid": "1006993069000730B88A"} } }).pretty()

Get document based on multiple criteria of embedded collection

I have the following document, I need to search for multiple items from the embedded collection"items".
Here's an example of a single SKU
db.sku.findOne()
{
"_id" : NumberLong(1192),
"description" : "Uploaded via CSV",
"items" : [
{
"_id" : NumberLong(2),
"category" : DBRef("category", NumberLong(1)),
"description" : "840 tag visual",
"name" : "840 Visual Mini Round",
"version" : NumberLong(0)
},
{
"_id" : NumberLong(7),
"category" : DBRef("category", NumberLong(2)),
"description" : "Maxi",
"name" : "Maxi",
"version" : NumberLong(0)
},
{
"_id" : NumberLong(11),
"category" : DBRef("category", NumberLong(3)),
"description" : "Button",
"name" : "Button",
"version" : NumberLong(0)
},
{
"_id" : NumberLong(16),
"category" : DBRef("category", NumberLong(4)),
"customizationFields" : [
{
"_class" : "CustomizationField",
"_id" : NumberLong(1),
"displayText" : "Custom Print 1",
"fieldName" : "customPrint1",
"listOrder" : 1,
"maxInputLength" : 12,
"required" : false,
"version" : NumberLong(0)
},
{
"_class" : "CustomizationField",
"_id" : NumberLong(2),
"displayText" : "Custom Print 2",
"fieldName" : "customPrint2",
"listOrder" : 2,
"maxInputLength" : 17,
"required" : false,
"version" : NumberLong(0)
}
],
"description" : "2 custom lines of farm print",
"name" : "Custom 2",
"version" : NumberLong(2)
},
{
"_id" : NumberLong(20),
"category" : DBRef("category", NumberLong(5)),
"description" : "Color Red",
"name" : "Red",
"version" : NumberLong(0)
}
],
"skuCode" : "NF-USDA-XC2/SM-BC-R",
"version" : 0,
"webCowOptions" : "840miniwithcust2"
}
There are repeat items.id throughout the embedded collection. Each Sku is made up of multiple items, all combinations are unique, but one item will be part of many Skus.
I'm struggling with the query structure to get what I'm looking for.
Here are a few things I have tried:
db.sku.find({'items._id':2},{'items._id':7})
That one only returns items with the id of 7
db.sku.find({items:{$all:[{_id:5}]}})
That one doesn't return anything, but it came up when looking for solutions. I found about it in the MongoDB manual
Here's an example of a expected result:
sku:{ "_id" : NumberLong(1013),
"items" : [ { "_id" : NumberLong(5) },
{ "_id" : NumberLong(7) },
{ "_id" : NumberLong(12) },
{ "_id" : NumberLong(16) },
{ "_id" :NumberLong(2) } ] },
sku:
{ "_id" : NumberLong(1014),
"items" : [ { "_id" : NumberLong(5) },
{ "_id" : NumberLong(7) },
{ "_id" : NumberLong(2) },
{ "_id" : NumberLong(16) },
{ "_id" :NumberLong(24) } ] },
sku:
{ "_id" : NumberLong(1015),
"items" : [ { "_id" : NumberLong(5) },
{ "_id" : NumberLong(7) },
{ "_id" : NumberLong(12) },
{ "_id" : NumberLong(2) },
{ "_id" :NumberLong(5) } ] }
Each Sku that comes back has both a item of id:7, and id:2, with any other items they have.
To further clarify, my purpose is to determine how many remaining combinations exist after entering the first couple of items.
Basically a customer will start specifying items, and we'll weed it down to the remaining valid combinations. So Sku.items[0].id=5 can only be combined with items[1].id=7 or items[1].id=10 …. Then items[1].id=7 can only be combined with items[2].id=20 … and so forth
The goal was to simplify my rules for purchase, and drive it all from the Sku codes. I don't know if I dug a deeper hole instead.
Thank you,
On the part of extracting the sku with item IDs 2 and 7, when I recall correctly, you have to use $elemMatch:
db.sku.find({'items' :{ '$all' :[{ '$elemMatch':{ '_id' : 2 }},{'$elemMatch': { '_id' : 7 }}]}} )
which selects all sku where there is each an item with _id 2 and 7.
You can use aggregation pipelines
db.sku.aggregate([
{"$unwind": "$sku.items"},
{"$group": {"_id": "$_id", "items": {"$addToSet":{"_id": "$items._id"}}}},
{"$match": {"items._id": {$all:[2,7]}}}
])

Mongodb pull data from subarray

Hi I have below mongodb collection
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
i want to pull sub array where bus_id=8.
Final result i want to be like this
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
When i tried with below query
db.collectionname.update({},{$pull: {buses: {bus_id:8}}},{multi: true})
I got below error in console,
Cannot apply $pull/$pullAll modifier to non-array
Can any one please suggest me how to achieve this,and also need php mongodb query also.
Thanks in Advance
Worked fine for me for your sample document:
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
> db.bus.update({}, { "$pull" : { "buses" : { "bus_id" : 8 } } }, { "multi" : true })
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
The cause of the problem is that some buses element is not an array. What does the query
> db.bus.find({ "buses.0" : { "$exists" : 0}, "buses" : { "$ne" : [] } })
return? This query finds documents where there is no 0th element of the array and the array is not empty, so it should return documents where buses is not an array.