Mongodb merge chunk with $maxkey value on sharded cluster - mongodb

I have shardkey {thread_id:1,_id:1} on collection "post",
and i want to merge 2 following chunk :
{
"_id" : "forum.post-thread_id_\"547dc7c2de2cf22b688b4572\"_id_ObjectId('549c519660e24b65118b456c')",
"lastmod" : Timestamp(3012, 3),
"lastmodEpoch" : ObjectId("50829c0e172de38a3398f72c"),
"ns" : "forum.post",
"min" : {
"thread_id" : "547dc7c2de2cf22b688b4572",
"_id" : ObjectId("549c519660e24b65118b456c")
},
"max" : {
"thread_id" : ObjectId("50901d4e1dd7198161000063"),
"_id" : ObjectId("50901d4e1dd7198161000068")
},
"shard" : "shard3"
}
{
"_id" : "forum.post-thread_id_ObjectId('50901d4e1dd7198161000063')_id_ObjectId('50901d4e1dd7198161000068')",
"lastmod" : Timestamp(604, 0),
"lastmodEpoch" : ObjectId("50829c0e172de38a3398f72c"),
"ns" : "forum.post",
"min" : {
"thread_id" : ObjectId("50901d4e1dd7198161000063"),
"_id" : ObjectId("50901d4e1dd7198161000068")
},
"max" : {
"thread_id" : {
"$maxKey" : 1
},
"_id" : {
"$maxKey" : 1
}
},
"shard" : "shard3"
}
It need to merge because thread_id supposed to be string , with current condition 1st chunk hold all new data (string -> ObjectId()), and 2nd chunk only hold documents with thread_id "ObjectId()"
I have tried this command :
reference
db.runCommand({
mergeChunks : 'forum.post',
bounds : [{
thread_id : "547dc7c2de2cf22b688b4572",
_id : ObjectId("549c519660e24b65118b456c")
}, {
thread_id : {
$type : 127
},
_id : {
$type : 127
}
}
]
})
And i got this error :
{
"ok" : 0,
"errmsg" : "shard key bounds [{
thread_id: " 547dc7c2de2cf22b688b4572 ",
_id: ObjectId(" 549c519660e24b65118b456c ")
},{
thread_id: { $type: 127 }, _id: { $type: 127 } })
are not valid for shard key pattern { thread_id: 1.0, _id: 1.0 }"
}
Does anyone know how to fix this ?
Mongodb Version 2.4.9

It appear that "mergechunk" command was on later version 2.6.x ++
I solved my problem using this command :
db.runCommand({
mergeChunks : 'forum.post',
bounds : [{
thread_id : "547dc7c2de2cf22b688b4572",
_id : ObjectId("549c519660e24b65118b456c")
}, {
thread_id : MaxKey,
_id : MaxKey
}
]
})
it appear that for "write" we should use :
db.foo.insert({ id : MaxKey });
For "Query" we should use :
db.foo.find({ id : { $type : 127 } });

Related

How to group mongo sub document

I am newbie to mongo, i am trying to take the group by values in a subdocument, and having the mongo collection structure as like :
{
"_id" : ObjectId("589d4e4b270f8b1635d400b1"),
"myShopId" : 439,
"products" : [
{
"productId" : "1234",
"productName" : "sambarpowder 500 gm",
"productCategory" : "masala",
"mrp" : "90",
"_id" : ObjectId("589d595f6da20b72fe006ea9")
},
{
"productId" : "5678",
"productName" : "moong dhal 200 gms",
"productCategory" : "dhal",
"mrp" : "38 ",
"_id" : ObjectId("589d595f6da20b72fe006eaa")
},
{
"productId" : "5678",
"productName" : "moong dhal 200 gms",
"productCategory" : "dhal",
"mrp" : "38 ",
"_id" : ObjectId("589d595f6da20b72fe006eaa")
}
],
"isAlive" : 1,
"__v" : 3
}
Here, I want to do group by in this.
for eg in mysql:
select productCategory from products where shopId = '439' groupby productCategory
How can i achieve the group by in mongo sub document
My Expected output is like :
category : [{
productCategory : masala
_id : ObjectId("589d595f6da20b72fe006ea9")
},
{
productCategory : dhal
_id : ObjectId("589d595f6da20b72fe006eaa")
}
]
Hope this will help,
db.test.aggregate([{
$match: {
myShopId: 439
}
}, {
$unwind: "$products"
}, {
$group: {
_id: {
"productCategory": "$products.productCategory"
},
"id": {
$first: "$products._id"
}
}
}])
Output:
{ "_id" : { "productCategory" : "dhal" }, "id" : ObjectId("589d595f6da20b72fe006eaa") }
{ "_id" : { "productCategory" : "masala" }, "id" : ObjectId("589d595f6da20b72fe006ea9") }

MongoDB Aggregate Slow Performance When Using Sort

I have collection (tvshow episodes) with more than 1,200,000 document,
here is my schema :
var episodeSchema = new Schema({
imdbId: { type : String },
showId: {type : String},
episodeId: { type : String },
episodeIdNumber:{ type : Number },
episodeTitle:{ type : String },
showTitle:{type : String},
seasonNumber:{type : Number},
episodeNumber:{type : Number},
airDate : {type : String},
summary:{type : String}
});
I created Index for episodeTitle episodeIdNumber seasonNumber episodeNumber episodeId and showId
Now i used mongodb aggregate group to get every tvshow episodes
here is the aggregate query i used :
episode.aggregate( [
{ $match : { showId : "scorpion" } },
{$sort:{"episodeNumber":-1}},
{ $group: {
_id: "$seasonNumber", count: { $sum: 1 } ,
episodes : { $push: { episodeId : "$episodeId" , episodeTitle: "$episodeTitle" , episodeNumber: "$episodeNumber" , seasonNumber: "$seasonNumber" , airDate: "$airDate" } }
} }
,
{ $sort : { _id : -1 } }
] )
Now when i am run this query its take more than 2605.907 ms , after some digging i found out why its slow , it was because of using {$sort:{"episodeNumber":-1}} , without using {$sort:{"episodeNumber":-1}} its take around 19.178 ms to run.
As i mentioned above i create an index for episodeNumber field and based on MongoDB Aggregation Pipeline Optimization
i used sort after match so basically everything was ok , and i didn't anything wrong.
So after this i thought something wrong with my indexes , so i removed episodeNumber index and reindexd , but i had same time nothing changed.
At end one time i tried run aggregate group query without episodeNumber indexed and surprisingly it was faster ! its take around 20.118 ms .
I wants know why this happened , isn't indexes to get faster query ?
Update
query explain output :
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"showId" : "scorpion"
},
"sort" : {
"episodeNumber" : -1
},
"fields" : {
"airDate" : 1,
"episodeId" : 1,
"episodeNumber" : 1,
"episodeTitle" : 1,
"seasonNumber" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "test.episodes",
"indexFilterSet" : false,
"parsedQuery" : {
"showId" : {
"$eq" : "scorpion"
}
},
"winningPlan" : {
"stage" : "EOF"
},
"rejectedPlans" : [ ]
}
}
},
{
"$group" : {
"_id" : "$seasonNumber",
"count" : {
"$sum" : {
"$const" : 1
}
},
"episodes" : {
"$push" : {
"episodeId" : "$episodeId",
"episodeTitle" : "$episodeTitle",
"episodeNumber" : "$episodeNumber",
"seasonNumber" : "$seasonNumber",
"airDate" : "$airDate"
}
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : -1
}
}
}
],
"ok" : 1
}

execStats is always empty in MongoDB "aggregate" commands profiling results

I am trying to profile the performance of an aggregation pipeline, specifically checking whether indices are used, how many objects are scanned, etc.
I'm setting the DB to full profiling:
db.setProfilingLevel(2)
But then in the db's 'system.profile' collection, in the result record for the aggregation command, the execStats is always empty.
Here is the full result for the command:
{
"op" : "command",
"ns" : "mydb.$cmd",
"command" : {
"aggregate" : "mycolection",
"pipeline" : [{
"$match" : {
"date" : {
"$gte" : "2013-11-26"
}
}
}, {
"$sort" : {
"user_id" : 1
}
}, {
"$project" : {
"user_id" : 1,
"_id" : 0
}
}, {
"$group" : {
"_id" : "$user_id",
"agg_val" : {
"$sum" : 1
}
}
}],
"allowDiskUse" : true
},
"keyUpdates" : 0,
"numYield" : 16,
"lockStats" : {
"timeLockedMicros" : {
"r" : NumberLong(3143653),
"w" : NumberLong(0)
},
"timeAcquiringMicros" : {
"r" : NumberLong(140),
"w" : NumberLong(3)
}
},
"responseLength" : 4990,
"millis" : 3237,
"execStats" : { },
"ts" : ISODate("2014-11-26T16:20:59.576Z"),
"client" : "127.0.0.1",
"allUsers" : [],
"user" : ""
}
Support execStats for aggregation command was added in mongo 3.4.

Mongodb pull data from subarray

Hi I have below mongodb collection
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
i want to pull sub array where bus_id=8.
Final result i want to be like this
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
When i tried with below query
db.collectionname.update({},{$pull: {buses: {bus_id:8}}},{multi: true})
I got below error in console,
Cannot apply $pull/$pullAll modifier to non-array
Can any one please suggest me how to achieve this,and also need php mongodb query also.
Thanks in Advance
Worked fine for me for your sample document:
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(50),
"status" : NumberLong(3),
"bus_id" : NumberLong(8)
},
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
> db.bus.update({}, { "$pull" : { "buses" : { "bus_id" : 8 } } }, { "multi" : true })
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })
> db.bus.findOne()
{
"_id" : ObjectId("53ce993639203f573671d3f5"),
"user_id" : NumberLong(51),
"buses" : [
{
"slot_id" : NumberLong(67),
"status" : NumberLong(3),
"bus_id" : NumberLong(12)
}
]
}
The cause of the problem is that some buses element is not an array. What does the query
> db.bus.find({ "buses.0" : { "$exists" : 0}, "buses" : { "$ne" : [] } })
return? This query finds documents where there is no 0th element of the array and the array is not empty, so it should return documents where buses is not an array.

MongoDB groupby query

I have colletions containing records like
{ "type" : "me", "tid" : "1" }
{ "type" : "me", "tid" : "1" }
{ "type" : "me", "tid" : "1" }
{ "type" : "you", "tid" : "1" }
{ "type" : "you", "tid" : "1" }
{ "type" : "me", "tid" : "2" }
{ "type" : "me", "tid" : "2"}
{ "type" : "you", "tid" : "2"}
{ "type" : "you", "tid" : "2" }
{ "type" : "you", "tid" : "2"}
I have want result like below
[
{"tid" : "1","me" : 3,"you": 2},
{"tid" : "2","me" : 2,"you": 3}
]
I have tried group and; aggregate queries doesn't get required result format.
below is the group query.
db.coll.group({
key: {tid : 1,type:1},
cond: { tid : { "$in" : [ "1","2"]} },
reduce: function (curr,result) {
result.total = result.total + 1
},
initial: { total : 0}
})
it result is like
[
{"tid" : "1", "type" : "me" ,"total": 3 },
{"tid" : "1","type" : "you" ,"total": 2 },
{"tid" : "2", "type" : "me" ,"total": 2 },
{"tid" : "2","type" : "you" ,"total": 3 }
]
following is aggregate query
db.coll.aggregate([
{$match : { "tid" : {"$in" : ["1","2"]}}},
{$group : { _id : {tid : "$tid",type : "$type"},total : {"$sum" : 1}}}
])
gives following result
{
"result" :
[
{"_id" : {"tid" : "1","type" : "me"},"total" : 3},
{"_id" : {"tid" : "2","type" : "me" },"total" : 2},
{"_id" : {"tid" : "2","type" : "you"},"total" : 3}
]
"ok" : 1
}
it is possible to obtain I specified result or I have to do some manipulation in my code.
Thanks
If you change your aggregation to this:
db.so.aggregate([
{ $match : { "tid" : { "$in" : ["1", "2"] } } },
{ $group : {
_id : { tid : "$tid", type : "$type" },
total : { "$sum" : 1 }
} },
{ $group : {
_id : "$_id.tid",
values: { $push: { type: "$_id.type", total: '$total' } }
} }
])
Then your output is:
{
"result" : [
{
"_id" : "1",
"values" : [
{ "type" : "you", "total" : 2 },
{ "type" : "me", "total" : 3 }
]
},
{
"_id" : "2",
"values" : [
{ "type" : "me", "total" : 2 },
{ "type" : "you", "total" : 3 }
]
}
],
"ok" : 1
}
Although that is not the same as what you want, it is going to be the closest that you can get. And in your application, you can easily pull out the values in the same was as with what you would like to get out of it.
Just keep in mind, that in general you can not promote a value (you, me) to a key — unless your key is of a limited set (3-4 items max).