I'm looking for a query that acts as $setIsSubset, except accounting for duplicate values.
For example, [1,1,2,3] is a subset of [1,2,3,4], because sets don't have duplicate values.
How can I write a query such that [1,1,2,3] is not a subset of [1,2,3,4]?
An example of expected outputs:
INPUT | TARGET | RESULT
[1] [1,2,3,4] TRUE
[1,2,3] [1,2,3,4] TRUE
[1,1,2,3] [1,2,3,4] FALSE
[1,2,3,4] [1,2,3,4] TRUE
[1,3] [1,2,3,4] TRUE
[1,11,5] [1,2,3,4] FALSE
[1,2,2,3] [1,2,3,4] FALSE
I would suggest not to do such heavy processing in mongo query as you can do the same task easily in any programming language. But, if you still need it in mongo, the following query can get you the expected output, provided both input and target arrays are sorted.
db.collection.aggregate([
{
$project:{
"modifiedInput":{
$reduce:{
"input":"$input",
"initialValue":{
"data":[],
"postfix":0,
"index":0,
"nextElem":{
$arrayElemAt:["$input",1]
}
},
"in":{
"data":{
$concatArrays:[
"$$value.data",
[
{
$concat:[
{
$toString:"$$this"
},
"-",
{
$toString:"$$value.postfix"
}
]
}
]
]
},
"postfix":{
$cond:[
{
$eq:["$$this","$$value.nextElem"]
},
{
$sum:["$$value.postfix",1]
},
0
]
},
"nextElem": {
$arrayElemAt:["$input", { $sum : [ "$$value.index", 2] }]
},
"index":{
$sum:["$$value.index",1]
}
}
}
},
"modifiedTarget":{
$reduce:{
"input":"$target",
"initialValue":{
"data":[],
"postfix":0,
"index":0,
"nextElem":{
$arrayElemAt:["$target",1]
}
},
"in":{
"data":{
$concatArrays:[
"$$value.data",
[
{
$concat:[
{
$toString:"$$this"
},
"-",
{
$toString:"$$value.postfix"
}
]
}
]
]
},
"postfix":{
$cond:[
{
$eq:["$$this","$$value.nextElem"]
},
{
$sum:["$$value.postfix",1]
},
0
]
},
"nextElem": {
$arrayElemAt:["$target", { $sum : [ "$$value.index", 2] }]
},
"index":{
$sum:["$$value.index",1]
}
}
}
}
}
},
{
$project:{
"_id":0,
"matched":{
$eq:[
{
$size:{
$setDifference:["$modifiedInput.data","$modifiedTarget.data"]
}
},
0
]
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d6e005db674d5c90f46d355"),
"input" : [
1
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d356"),
"input" : [
1,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d357"),
"input" : [
1,
1,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d358"),
"input" : [
1,
2,
3,
4
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d359"),
"input" : [
1,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d35a"),
"input" : [
1,
5,
11
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d35b"),
"input" : [
1,
2,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
Output:
{ "matched" : true }
{ "matched" : true }
{ "matched" : false }
{ "matched" : true }
{ "matched" : true }
{ "matched" : false }
{ "matched" : false }
Explanation: To avoid elimination of same values, we are adding the postfix counter to each. For example, [1,1,1,2,3,3,4,4] would become ["1-0","1-1","1-2","2-0","3-0","3-1","4-0","4-1","4-2"]. Afrer the conversion of both input and target arrays, the set difference is calculated. It's a match, if the size of set difference is zero.
You can try below aggregation:
let input = [1,2,3];
let inputSize = 3;
db.collection.aggregate([
{
$project: {
uniqueTarget: { $setUnion: [ "$target" ] }
}
},
{
$addFields: {
filtered: {
$reduce: {
input: input,
initialValue: "$uniqueTarget",
in: {
$filter: {
input: "$$value",
as: "current",
cond: { $ne: [ "$$this", "$$current" ] }
}
}
}
}
}
},
{
$project: {
result: {
$eq: [
{ $size: "$filtered" },
{ $subtract: [ { $size: "$uniqueTarget" }, inputSize ] }
]
}
}
}
])
It starts with $setUnion to ensure there are no duplicates in target array. Then you can run $reduce to iterate through input and remove currently processed element from target. Every iteration should remove single element so expected $size of filtered is equal $size of uniqueTarget - inputSize
Mongo Playground (1)
Mongo Playground (2)
Related
I have a collection:
{
values: [null, null, 1, 2, 3, 4.6],
}
I want to receive a property which tells me if any of those values is a number.
I've tried:
{
$project: {
hasNumber: {
$in: [{ $eq: [{ $type: '$$CURRENT' }, 'number'] }, '$values'],
},
},
}
but it doesn't work, is something like this possible with aggregations?
Please try this :
db.yourCollectionName.aggregate([{
$project: {
values: 1, hasNumber: {
$gt: [{
$size: {
$filter:
{
input: "$values",
as: "value",
cond: { $eq: [{ $type: '$$value' }, 'int'] }
// To check & include double as well, replace above cond with this :
//cond: { $or :[ {$eq: [{ $type: '$$value' }, 'int']} , {$eq: [{ $type: '$$value' }, 'double']}] }
}
}
}, 0]
}
}
}])
Collection Data :
/* 1 */
{
"_id" : ObjectId("5e14d9dd627ef78236ea77e3"),
"values" : [
null,
null,
1,
2,
3,
4.6
]
}
/* 2 */
{
"_id" : ObjectId("5e14d9e4627ef78236ea785f"),
"values" : [
null,
null
]
}
/* 3 */
{
"_id" : ObjectId("5e14decc627ef78236eb12d3"),
"values" : [
"1",
4.6
]
}
Result :
/* 1 */
{
"_id" : ObjectId("5e14d9dd627ef78236ea77e3"),
"values" : [
null,
null,
1,
2,
3,
4.6
],
"hasNumber" : true
}
/* 2 */
{
"_id" : ObjectId("5e14d9e4627ef78236ea785f"),
"values" : [
null,
null
],
"hasNumber" : false
}
/* 3 */ // If we're checking for double this hasNumber will be true
{
"_id" : ObjectId("5e14decc627ef78236eb12d3"),
"values" : [
"1",
4.6
],
"hasNumber" : false
}
Debugging your code...
$in: [{ $eq: [{ $type: '$$CURRENT' }, 'number'] }, '$values'],
You are checking if false is inside $values.
Explanation:
'$$CURRENT' returns raw document
{ $type: '$$CURRENT' } returns 'object'
$eq:['object', 'number'] will always return false
$in:[ 'false', '$values'] will be false
I've solved with $convert operator
db.collection.aggregate([
{
$project: {
hasNumber: {
$cond: [
{
$eq: [
{
$map: {
input: "$values",
in: {
$convert: {
input: "$$this",
to: "int",
onError: -999,
onNull: -999
}
}
}
},
"$values"
]
},
true,
false
]
}
}
}
])
{
_id: ObjectId("5dbdacc28cffef0b94580dbd"),
"comments" : [
{
"_id" : ObjectId("5dbdacc78cffef0b94580dbf"),
"replies" : [
{
"_id" : ObjectId("5dbdacd78cffef0b94580dc0")
},
]
},
]
}
How to count the number of element in comments and sum with number of relies
My approach is do 2 query like this:
1. total elements of replies
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{ $unwind: "$comments",},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
2. count total elements of comments
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
Then sum up both, do we have any better solution to write the query like return the sum of of total element comments + replies
You can use $reduce and $concatArrays to "merge" an inner "array of arrays" into a single list and measure the $size of that. Then simply $add the two results together:
db.posts.aggregate([
{ "$match": { _id:ObjectId("5dbdacc28cffef0b94580dbd") } },
{ "$addFields": {
"totalBoth": {
"$add": [
{ "$size": "$comments" },
{ "$size": {
"$reduce": {
"input": "$comments.replies",
"initialValue": [],
"in": {
"$concatArrays": [ "$$value", "$$this" ]
}
}
}}
]
}
}}
])
Noting that an "array of arrays" is the effect of an expression like $comments.replies, so hence the operation to make these into a single array where you can measure all elements.
Try using the $unwind to flatten the list you get from the $project before using $count.
This is another way of getting the result.
Input documents:
{ "_id" : 1, "array1" : [ { "array2" : [ { id: "This is a test!"}, { id: "test1" } ] }, { "array2" : [ { id: "This is 2222!"}, { id: "test 222" }, { id: "222222" } ] } ] }
{ "_id" : 2, "array1" : [ { "array2" : [ { id: "aaaa" }, { id: "bbbb" } ] } ] }
The query:
db.arrsizes2.aggregate( [
{ $facet: {
array1Sizes: [
{ $project: { array1Size: { $size: "$array1" } } }
],
array2Sizes: [
{ $unwind: "$array1" },
{ $project: { array2Size: { $size: "$array1.array2" } } },
],
} },
{ $project: { result: { $concatArrays: [ "$array1Sizes", "$array2Sizes" ] } } },
{ $unwind: "$result" },
{ $group: { _id: "$result._id", total1: { $sum: "$result.array1Size" }, total2: { $sum: "$result.array2Size" } } },
{ $addFields: { total: { $add: [ "$total1", "$total2" ] } } },
] )
The output:
{ "_id" : 2, "total1" : 1, "total2" : 2, "total" : 3 }
{ "_id" : 1, "total1" : 2, "total2" : 5, "total" : 7 }
//Sample collection
db.grades.insertMany([{ _id: 1, quizzes: [ 1, 2, 3 ] },
{ _id: 2, quizzes: [ ] },
{ _id: 3, quizzes: [ 3, 8, 9 ] }])
Below is the query i am using and getting results.
//Product of Sum
db.grades.aggregate(
[
{ $project:
{ sumof:
{
$map:
{
input: "$quizzes",
as: "grade",
in: { $sum :[ "$$grade", "$$grade" ] }
}
}
}
},
{ $project :
{ "productOfSum":
{ $reduce :
{ input : '$sumof',
initialValue: 1,
in : {$multiply :["$$value","$$this"]}
}
}
}
}
]
)
The output of the query is as below.
{"_id":1,"productOfSum":48}
{"_id":2,"productOfSum":1}
{"_id":3,"productOfSum":1728}
Can any one advice how and why for "_id:2", value is coming as 1 even though array is null?
Try this:
db.grades.aggregate(
[
{$project:
{quizzes:
{$cond:
[{$eq: [{$size: "$quizzes"}, 0]} , [0], "$quizzes"] }
}
},
{ $project:
{ sumof:
{
$map:
{
input: "$quizzes",
as: "grade",
in: { $sum :[ "$$grade", "$$grade" ] }
}
}
}
},
{ $project :
{ "productOfSum":
{ $reduce :
{ input : '$sumof',
initialValue: 1,
in : {$multiply :["$$value","$$this"]}
}
}
}
}
]
)
I have the following documents:
{ "col1": "camera", "fps": 1, "lat": 3 },
{ "col1": "camera", "fps": 3, "lat": 2 }
{ "col1": "camera", "foo": 9, "bar": 7 }
{ "col1": "camera", "bar": 8, "bar": 1 }
{ "col1": "camera", "check": 4, "lat": 3 }
How can I get the following:
{ "fps": 2, "lat": 3, "bar": 3, "check": 1, "foo": 1 }
Where each of these values are the number of occurrences (count) of each key (fps appears 2, foo, appears once, etc)
Aggregate
Here a solution (i named my collection cameras) if keys are known in advance :
db.cameras.aggregate([
{
$project: {
_id: 1,
fps: {
$ifNull: [ "$fps", 0 ]
},
lat: {
$ifNull: [ "$lat", 0 ]
},
bar: {
$ifNull: [ "$bar", 0 ]
},
foo: {
$ifNull: [ "$foo", 0 ]
},
check: {
$ifNull: [ "$check", 0 ]
}
}
},
{
$project: {
_id: 1,
fps: {
$cond: [ { $eq: [ "$fps", 0 ] } , 0, 1 ]
},
lat: {
$cond: [ { $eq: [ "$lat", 0 ] } , 0, 1 ]
},
bar: {
$cond: [ { $eq: [ "$bar", 0 ] } , 0, 1 ]
},
foo: {
$cond: [ { $eq: [ "$foo", 0 ] } , 0, 1 ]
},
check: {
$cond: [ { $eq: [ "$check", 0 ] } , 0, 1 ]
}
}
},
{
$group: {
_id: null,
fps: {
$sum: "$fps"
},
lat: {
$sum: "$lat"
},
bar: {
$sum: "$bar"
},
foo: {
$sum: "$foo"
},
check: {
$sum: "$check"
}
}
}
])
Results :
{
"result" : [
{
"_id" : null,
"fps" : 2,
"lat" : 3,
"bar" : 2,
"foo" : 1
}
],
"ok" : 1
}
MapReduce
Another solution if keys are unknown is mapReduce :
db.cameras.mapReduce(
function() {
var keys = Object.keys(this);
keys.forEach(function(key) {
emit(key, 1);
});
},
function(key, values) {
return Array.sum(values);
},
{
query: {},
out: {
inline: 1
}
}
)
Results :
{
"results" : [
{
"_id" : "_id",
"value" : 5
},
{
"_id" : "bar",
"value" : 2
},
{
"_id" : "check",
"value" : 1
},
{
"_id" : "col1",
"value" : 5
},
{
"_id" : "foo",
"value" : 1
},
{
"_id" : "fps",
"value" : 2
},
{
"_id" : "lat",
"value" : 3
}
],
"timeMillis" : 1,
"counts" : {
"input" : 5,
"emit" : 19,
"reduce" : 5,
"output" : 7
},
"ok" : 1,
}
But the result is not exactly in the same format.
Let's say I have a collection:
[
{
_id: product_a,
values: [
{ id: 1, value: 0 },
{ id: 2, value: 1 },
{ id: 3, value: 2 },
]
},
{
_id: product_b,
values: [
{ id: 1, value: 1 },
{ id: 2, value: 2 },
{ id: 3, value: 2 },
]
},
// etc ...
];
Is there any way to query this collection by aggregating the product of a sub-set of "values"?
If I query { values: [ 1, 3 ] }, I would get something like:
[
{
_id: product_a,
result: 0 // since 0 * 2 = 0
},
{
_id: product_b,
result: 2 // since 1 * 2 = 2
},
// etc ...
];
Here is how you can do it with the schema the way you have it using 2.2 aggregation framework. Note that this would be a lot simpler if the id/value pairs were stored with id as key.
Your aggregation pipeline:
[
{
"$unwind" : "$values"
},
{
"$match" : {
"values.id" : {
"$in" : [
1,
3
]
}
}
},
{
"$project" : {
"_id" : 1,
"val1" : {
"$cond" : [
{
"$eq" : [
"$values.id",
1
]
},
"$values.value",
-1
]
},
"val3" : {
"$cond" : [
{
"$eq" : [
"$values.id",
3
]
},
"$values.value",
-1
]
}
}
},
{
"$group" : {
"_id" : "$_id",
"val1" : {
"$max" : "$val1"
},
"val3" : {
"$max" : "$val3"
}
}
},
{
"$project" : {
"_id" : 1,
"result" : {
"$multiply" : [
"$val1",
"$val3"
]
}
}
}
]
Note that the last step would do the job if the structure of your document was:
{_id: "product_x", values: [ {id1: value}, {id2: value} etc. ]}
Now run the query from the shell with:
> db.collection.aggregate(pipeline)
Or from your code via db.runCommand({"aggregate":"collection","pipeline":[...]})