mongodb count two condition one time? - mongodb

I need to count two query like below:
1. data[k]['success'] = common.find({'url': {"$regex": k}, 'result.titile':{'$ne':''}}).count()
2. data[k]['fail'] = common.find({'url': {"$regex": k}, 'result.titile':''}).count()
I think it would be more efficient if mongodb can work like below:
result = common.find({'url': {"$regex": k})
count1 = result.find({'result.titile':{'$ne':''}})
count2 = result.count() - count1
//result do not have find or count method, just for example
Two count are basing same search condition{'url': {"$regex": k}, splited by {'result.titile':{'$ne':''}} or not.
Is there some build-in way to do these without writing custom js?

The async method would be the preferred one if at all your client supports it.
You could also aggregate as below:
$match the docs which have the urls.
$group by the _id as null, and take the $sum of all documents. We need those documents, to get the sum of those which do not have a title, so just accumulate them using the $push operator.
$unwind the documents.
$match those which do not have a title.
$group, and get the $sum.
$project the desired result.
sample code:
db.t.aggregate([
{$match:{"url":{"$regex":k}}},
{$group:{"_id":null,
"count_of_url_matching_docs":{$sum:1},
"docs":{$push:"$$ROOT"}}},
{$unwind:"$docs"},
{$match:{"docs.result.titile":{$ne:""}}},
{$group:{"_id":null,
"count_of_url_matching_docs":{$first:"$count_of_url_matching_docs"},
"count_of_docs_with_titles":{$sum:1}}},
{$project:{"_id":0,
"count_of_docs_with_titles":"$count_of_docs_with_titles",
"count_difference":{$subtract:[
"$count_of_url_matching_docs",
"$count_of_docs_with_titles"]}}}
])
Test data:
db.t.insert([
{"url":"s","result":{"titile":1}},
{"url":"s","result":{"titile":""}},
{"url":"s","result":{"titile":""}},
{"url":"s","result":{"titile":2}}
])
Test Result:
{ "count_of_docs_with_titles" : 2, "count_difference" : 2 }

Use .aggregate() with a conditional key for grouping via $cond:
common.aggregate([
{ "$match": { "url": { "$regex": k } } },
{ "$group": {
"_id": {
"$cond": {
"if": { "$ne": [ "$result.title", "" ] },
"then": "success",
"else": "fail"
}
},
"count": { "$sum": 1 }
}}
])
However it is actually more efficient to run both queries in parallel if your environment supports it, such as with nodejs
async.parallel(
[
function(callback) {
common.count({
"url": { "$regex": k },
"result.title": { "$ne": "" }
}, function(err,count) {
callback(err,{ "success": count });
});
},
function(callback) {
common.count({
"url": { "$regex": k },
"result.title": ""
}, function(err,count) {
callback(err,{ "fail": count });
});
}
],
function(err,results) {
if (err) throw err;
console.log(results);
}
)
Which makes sense really since each item is not being tested and each result can actually run on the server at the same time.

Related

get document with same 3 fields in a collection

i have a collection with more then 1000 documents and there are some documents with same value in some fields, i need to get those
the collection is:
[{_id,fields1,fields2,fields3,etc...}]
what query can i use to get all the elements that have the same 3 fields for example:
[
{_id:1,fields1:'a',fields2:1,fields3:'z'},
{_id:2,fields1:'a',fields2:1,fields3:'z'},
{_id:3,fields1:'f',fields2:2,fields3:'g'},
{_id:4,fields1:'f',fields2:2,fields3:'g'},
{_id:5,fields1:'j',fields2:3,fields3:'g'},
]
i need to get
[
{_id:2,fields1:'a',fields2:1,fields3:'z'},
{_id:4,fields1:'f',fields2:2,fields3:'g'},
]
in this way i can easly get a list of "duplicate" that i can delete if needed, it's not really important get id 2 and 4 or 1 and 3
but 5 would never be included as it's not 'duplicated'
EDIT:
sorry but i forgot to mention that there are some document with null value i need to exclude those
This is the perfect use case of window field. You can use $setWindowFields to compute $rank in the grouping/partition you want. Then, get those rank not equal to 1 to get the duplicates.
db.collection.aggregate([
{
$match: {
fields1: {
$ne: null
},
fields2: {
$ne: null
},
fields3: {
$ne: null
}
}
},
{
"$setWindowFields": {
"partitionBy": {
fields1: "$fields1",
fields2: "$fields2",
fields3: "$fields3"
},
"sortBy": {
"_id": 1
},
"output": {
"duplicateRank": {
"$rank": {}
}
}
}
},
{
$match: {
duplicateRank: {
$ne: 1
}
}
},
{
$unset: "duplicateRank"
}
])
Mongo Playground
I think you can try this aggregation query:
First group by the feilds you want to know if there are multiple values.
It creates an array with the _ids that are repeated.
Then get only where there is more than one ($match).
And last project to get the desired output. I've used the first _id found.
db.collection.aggregate([
{
"$group": {
"_id": {
"fields1": "$fields1",
"fields2": "$fields2",
"fields3": "$fields3"
},
"duplicatesIds": {
"$push": "$_id"
}
}
},
{
"$match": {
"$expr": {
"$gt": [
{
"$size": "$duplicatesIds"
},
1
]
}
}
},
{
"$project": {
"_id": {
"$arrayElemAt": [
"$duplicatesIds",
0
]
},
"fields1": "$_id.fields1",
"fields2": "$_id.fields3",
"fields3": "$_id.fields2"
}
}
])
Example here

Mongodb aggregate match query with priority on full match

I am attempting to do a mongodb regex query on a field. I'd like the query to prioritize a full match if it finds one and then partials afterwards.
For instance if I have a database full of the following entries.
{
"username": "patrick"
},
{
"username": "robert"
},
{
"username": "patrice"
},
{
"username": "pat"
},
{
"username": "patter"
},
{
"username": "john_patrick"
}
And I query for the username 'pat' I'd like to get back the results with the direct match first, followed by the partials. So the results would be ordered ['pat', 'patrick', 'patrice', 'patter', 'john_patrick'].
Is it possible to do this with a mongo query alone? If so could someone point me towards a resource detailing how to accomplish it?
Here is the query that I am attempting to use to perform this.
db.accounts.aggregate({ $match :
{
$or : [
{ "usernameLowercase" : "pat" },
{ "usernameLowercase" : { $regex : "pat" } }
]
} })
Given your precise example, this could be accomplished in the following way - if your real world scenario is a little bit more complex you may hit problems, though:
db.accounts.aggregate([{
$match: {
"username": /pat/i // find all documents that somehow match "pat" in a case-insensitive fashion
}
}, {
$addFields: {
"exact": {
$eq: [ "$username", "pat" ] // add a field that indicates if a document matches exactly
},
"startswith": {
$eq: [ { $substr: [ "$username", 0, 3 ] }, "pat" ] // add a field that indicates if a document matches at the start
}
}
}, {
$sort: {
"exact": -1, // sort by our primary temporary field
"startswith": -1 // sort by our seconday temporary
}
}, {
$project: {
"exact": 0, // get rid of the "exact" field,
"startswith": 0 // same for "startswith"
}
}])
Another way would be using $facet which may prove a bit more powerful by enabling more complex scenarios but slower (several people here will hate me, though, for this proposal):
db.accounts.aggregate([{
$facet: { // run two pipelines against all documents
"exact": [{ // this one will capture all exact matches
$match: {
"username": "pat"
}
}],
"others": [{ // this one will capture all others
$match: {
"username": { $ne: "pat", $regex: /pat/i }
}
}]
}
}, {
$project: {
"result": { // merge the two arrays
$concatArrays: [ "$exact", "$others" ]
}
}
}, {
$unwind: "$result" // flatten the resulting array into separate documents
}, {
$replaceRoot: { // restore the original document structure
"newRoot": "$result"
}
}])

How to find records in a bson object

var otherLanguages=[ "English","Arabic","French"];
var first, second;
db.collection.find({ $and: [ { "Language" : { $nin : otherLanguages} },{"Language":{ $ne:null}} ]}).forEach(function(obj){
shell out 341 docs one by one. In these docs,I want to find out documents that satisfy two if statements. Later, I want to collect the count it.
if (obj.find({ $and: [{'POS': { $eq: "Past" } },{'Desp': { $ne: null } }] })) { first= first+1;}
if (obj.find({ $and: [{'POS': { $eq: "Past" } },{'Desp': { $eq: null } }] })) {second= second+1;}
});
print (first,second)
I know that I cannot use find() function on the obj, but Is there a way to search on this "bson obj" to find the count.
If this is not feasible, then please suggest a way to get the desired result.
If I understand your question correctly you can achieve that by using the aggregation framework like so:
db.collection.aggregate({
// filter out all documents that you don't care about
$match: {
"Language": { $nin: otherLanguages, $ne: null },
"POS": "Past"
},
}, {
// then split into groups...
$group: {
_id: { $eq: [ "$Desp", null ] }, // ...one for the "eq: null" and one for the "ne: null"
"count": { $sum: 1 } // ...and count the number of documents in each group
}
})

Find empty documents in a database

I have queried an API which is quiet inconsistent and therefore does not return objects for all numerical indexes (but most of them). To further go on with .count() on the numerical index I've been inserting empty documents with db.collection.insert({})
My question now is: how would I find and count these objects?
Something like db.collection.count({}) won't work obviously.
Thanks for any idea!
Use the $where operator. The Javascript expression returns only documents containing a single key. (that single key being the documents "_id" key)
db.collection.find({ "$where": "return Object.keys(this).length == 1" }).count()
For MongoDB 3.4.4 and newer, consider running the following aggregate pipeline which uses $objectToArray (which is available from MongoDB 3.4.4 and newer versions) to get the count of those empty documents/null fields:
db.collection.aggregate([
{ "$project": {
"hashmaps": { "$objectToArray": "$$ROOT" }
} },
{ "$project": {
"keys": "$hashmaps.k"
} },
{ "$group": {
"_id": null,
"count": { "$sum": {
"$cond": [
{
"$eq":[
{
"$ifNull": [
{ "$arrayElemAt": ["$keys", 1] },
0
]
},
0
]
},
1,
0
]
} }
} }
]);

Aggregation in mongodb for nested documents

I have a document in the following format:
"summary":{
"HUL":{
"hr_0":{
"ts":None,
"Insights":{
"sentiments":{
"pos":37,
"neg":3,
"neu":27
},
"topics":[
"Basketball",
"Football"
],
"geo":{
"locations":{
"Delhi":34,
"Kolkata":56,
"Pune":79,
"Bangalore":92,
"Mumbai":54
},
"mst_act":{
"loc":Bangalore,
"lat_long":None
}
}
}
},
"hr_1":{....},
"hr_2":{....},
.
.
"hr_23":{....}
I want to run an aggregation in pymongo that sums up the pos, neg and neu sentiments for all hours of the day "hr_0" to "hr_23".
I am having trouble in constructing the pipeline command in order to do this as the fields I am interested in are in nested dictionaries. Would really appreciate your suggestions.
Thanks!
It's going to be pretty difficult to come up with an aggregation pipeline that will give you the desired aggregates because your document schema has some dynamic keys which you can't use as an identifiey expression in the group operator pipeline.
However, a workaround using the current schema would be to iterate over the find cursor and extract the values you want to add up within the loop. Something like the following:
pos_total = 0
neg_total = 0
neu_total = 0
cursor = db.collection.find()
for doc in cursor:
for i in range(0, 24):
pos_total += doc["summary"]["HUL"]["hr_"+str(i)]["Insights"]["sentiments"]["pos"]
neg_total += doc["summary"]["HUL"]["hr_"+str(i)]["Insights"]["sentiments"]["neg"]
neu_total += ddoc["summary"]["HUL"]["hr_"+str(i)]["Insights"]["sentiments"]["neu"]
print(pos_total)
print(neg_total)
print(neu_total)
If you could do with changing the schema, then the following schema would be ideal for using the aggregation framework:
{
"summary": {
"HUL": [
{
"_id": "hr_0",
"ts": None,
"Insights":{
"sentiments":{
"pos":37,
"neg":3,
"neu":27
},
"topics":[
"Basketball",
"Football"
],
"geo":{
"locations":{
"Delhi":34,
"Kolkata":56,
"Pune":79,
"Bangalore":92,
"Mumbai":54
},
"mst_act":{
"loc":Bangalore,
"lat_long":None
}
}
}
},
{
"_id": "hr_2",
"ts": None,
"Insights":{
"sentiments":{
"pos":37,
"neg":3,
"neu":27
},
...
}
},
...
{
"_id": "hr_23",
"ts": None,
"Insights":{
"sentiments":{
"pos":37,
"neg":3,
"neu":27
},
...
}
}
]
}
}
The aggregation pipeline that would give you the required totals is:
var pipeline = [
{
"$unwind": "$summary.HUL"
},
{
"$group": {
"_id": "$summary.HUL._id",
"pos_total": { "$sum": "$summary.HUL.Insights.sentiments.pos" },
"neg_total": { "$sum": "$summary.HUL.Insights.sentiments.neg" },
"neu_total": { "$sum": "$summary.HUL.Insights.sentiments.neu" },
}
}
]
result = db.collection.aggregate(pipeline)