MongoDB Aggregation Query Self Join - mongodb

I have this data:
[
{ "_id": 1, "B": 1, "C": 1 },
{ "_id": 2, "B": 1, "C": 1 },
{ "_id": 3, "B": 1, "C": 2 },
{ "_id": 5, "B": 2, "C": 1 },
{ "_id": 6, "B": 2, "C": 1 },
{ "_id": 7, "B": 2, "C": 3 },
{ "_id": 8, "B": 3, "C": 1 },
{ "_id": 9, "B": 3, "C": 1 },
{ "_id": 10, "B": 3, "C": 1 }
]
What I want to know:
I like to get all distinct "B"s for which no "C"s exist with the value 2 or 3.
For the given data the result should be only 3 because for the other "B"s rows exist with "C" values 2 or 3.
In SQL I would create a left outer join to get this.

The following query should do
db.getCollection('foo').distinct("B", {"C" : {$nin: [2, 3]}});

I found out this query returns the correct result:
db.mytest.distinct("B", {"B" : {$nin: db.mytest.distinct("B", {"C" : {$in: [2, 3]}})}});

Related

Postgresql - How to get an array of key values from a jsonb array of objects

Is it possible to convert a jsonb array of objects to an array with only the values from a specific key without using unnest?
Another way to ask this is, in PSQL can you pick a value by key name from an array of objects and map it into an array?
This shows the format of the data to convert:
SELECT t.column1 AS data FROM (VALUES
('[{"a": 1, "b": 5 },{"a": 2, "b": 6 },{"a": 3, "b": 7 },{"a": 4, "b": 8 }]'::jsonb),
('[{"a": 2, "b": 5 },{"a": 4, "b": 6 },{"a": 5, "b": 7 },{"a": 6, "b": 8 }]'::jsonb),
('[{"a": 7, "b": 5 },{"a": 8, "b": 6 },{"a": 9, "b": 7 },{"a": 10, "b": 8 }]'::jsonb)
) t;
Result
data
----------------------
[{"a": 1, "b": 5}, {"a": 2, "b": 6}, {"a": 3, "b": 7}, {"a": 4, "b": 8}]
[{"a": 2, "b": 5}, {"a": 4, "b": 6}, {"a": 5, "b": 7}, {"a": 6, "b": 8}]
[{"a": 7, "b": 5}, {"a": 8, "b": 6}, {"a": 9, "b": 7}, {"a": 10, "b": 8}]
This is the required result where data from the "a" key has been mapped into the array
data
---------------------
[1,2,3,4]
[2,4,5,6]
[7,8,9,10]
You can do this with the keyvalue method of jsonpath.
with t as (VALUES
('[{"a": 1, "b": 5 },{"a": 2, "b": 6 },{"a": 3, "b": 7 },{"a": 4, "b": 8 }]'::jsonb),
('[{"a": 2, "b": 5 },{"a": 4, "b": 6 },{"a": 5, "b": 7 },{"a": 6, "b": 8 }]'::jsonb),
('[{"a": 7, "b": 5 },{"a": 8, "b": 6 },{"a": 9, "b": 7 },{"a": 10, "b": 8 }]'::jsonb)
)
select jsonb_path_query_array(column1, '$.keyvalue()?(#.key=="a").value ') from t;

Find and update pairs in collection with matching and non-matching/inverse values

I need the find matching pairs in a single collection based on the values of fields and update them.
Let's say I have the following collection:
[{ "_id": "a", "client": 1, "tag": "", "debit": 12, "credit": -20 },
{ "_id": "b", "client": 1, "tag": "G", "debit": -12, "credit": 20 } ,
{ "_id": "c", "client": 1, "tag": "G", "debit": 12, "credit": 20 },
{ "_id": "d", "client": 2, "tag": "", "debit": 13, "credit": 0 },
{ "_id": "e", "client": 2, "tag": "G", "debit": -13, "credit": 0 },
{ "_id": "f", "client": 3, "tag": "", "debit": 13, "credit": 0 },
{ "_id": "g", "client": 3, "tag": "G", "debit": 14, "credit": 0 },
{ "_id": "h", "client": 4, "tag": "", "debit": 0, "credit": 0 }]
Now for two documents to build a pair they have to:
have the same client
one has the "" tag, one has the "G" tag
one has to have the inverse value of debit OR credit
bonus: ignore 0 if possible (since -0 == 0, I could also just remove the field)
bonus: if multiple pairs with "G" are found only take one of the two
So the example collection would result in the following pairs:
("a", "b") or ("a", "c"):
a.client == b.client && a.tag == "" && b.tag == "G" && a.debit == -b.debit
for "c": a.credit == -c.credit
("d", "e"):
normal pair: d.client == e.client && d.tag == "" && e.tag == "G" && d.debit == -e.debit
("f", "g") DON'T match, because 0 doesn't count
"h" has no match
The result could look something like this:
[{ "pair": ["a", "b"] }, { "pair": ["d", "e"] }]
The goal is to update all documents who appear in any pair, so this could also just be a list of ids.
Final goal:
[{ "_id": "a", "client": 1, "tag": "", "debit": 12, "credit": -20, "s": true },
{ "_id": "b", "client": 1, "tag": "G", "debit": -12, "credit": 20, "s": true } ,
{ "_id": "c", "client": 1, "tag": "G", "debit": 12, "credit": 20 },
{ "_id": "d", "client": 2, "tag": "", "debit": 13, "credit": 0, "s": true },
{ "_id": "e", "client": 2, "tag": "G", "debit": -13, "credit": 0, "s": true },
{ "_id": "f", "client": 3, "tag": "", "debit": 13, "credit": 0 },
{ "_id": "g", "client": 3, "tag": "G", "debit": 14, "credit": 0 },
{ "_id": "h", "client": 4, "tag": "", "debit": 0, "credit": 0 }]
I know I can group by client but how do I group by non matching fields?
And how can I compare the values of two documents, since I have to compare each documents with all others..
In SQL I solved it with this statement, maybe this helps or makes it clearer (I know it doesn't filter duplicates, the one with duplicates is too long to get the point across):
SELECT d1.id, d2.id
FROM documents d1, documents d2
WHERE d1.client = d2.client
AND d1.tag = "" AND d2.tag = "G"
AND (d1.debit = -d2.debit OR d1.credit = -d2.credit)
Thanks :)
I hope you get a more succinct answer, but here's one way to update (using "$merge") the documents in the collection using your logic to identify them as pairs.
db.documents.aggregate([
{
"$match": {
"tag": ""
}
},
{
"$set": {
"docStore": "$$ROOT"
}
},
{
"$lookup": {
"from": "documents",
"localField": "client",
"foreignField": "client",
"let": {
"credit": "$credit",
"debit": "$debit",
"tag": "$tag"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{"$eq": ["$tag", "G"]},
{
"$or": [
{
"$and": [
{"$ne": ["$$credit", 0]},
{"$eq": [{"$add": ["$$credit", "$credit"]}, 0]}
]
},
{
"$and": [
{"$ne": ["$$debit", 0]},
{"$eq": [{"$add": ["$$debit", "$debit"]}, 0]}
]
}
]
}
]
}
}
}
],
"as": "pairs"
}
},
{
"$match": {
"$expr": {
"$gt": [{"$size": "$pairs"}, 0]
}
}
},
{
"$set": {
"pair": [
"$docStore",
{"$first": "$pairs"}
]
}
},
{
"$project": {
"_id": 0,
"pair": 1
}
},
{"$unwind": "$pair"},
{
"$replaceWith": {
"$mergeObjects": [
"$pair",
{"s": true}
]
}
},
{"$merge": "documents"}
])
Try it on mongoplayground.net.

Grouping multiple documents with nested array of objects in MongoDB

I'm having documents that are having this structures
x = {
"scalar": 1,
"array": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
"array2": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
}
and
y = {
"scalar": 2,
"array": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
}
The end results I'm trying to find is this
{
"scalar": 3, # SUM of scalar
"array": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
}
I've tried to use double $unwind and then do push by. I'm thinking of using $reduce to get the final results
Query
one way to do it, is by facet, you want 3 groupings and facet can do that , like break into 3 seperate parts, to not mix the unwinds, i think this is the most simple way to do it
Test code here
db.collection.aggregate([
{
"$facet": {
"scalar": [
{
"$project": {
"scalar": 1
}
},
{
"$group": {
"_id": null,
"sum": {
"$sum": "$scalar"
}
}
},
{
"$unset": [
"_id"
]
}
],
"array": [
{
"$project": {
"array": 1
}
},
{
"$unwind": {
"path": "$array"
}
},
{
"$group": {
"_id": "$array.key",
"sum": {
"$sum": "$array.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
],
"array2": [
{
"$project": {
"array2": 1
}
},
{
"$unwind": {
"path": "$array2"
}
},
{
"$group": {
"_id": "$array2.key",
"sum": {
"$sum": "$array2.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
]
}
},
{
"$set": {
"scalar": {
"$arrayElemAt": [
"$scalar.sum",
0
]
}
}
}
])
Other alternative is to unwind both arrays, but then unwinds and groups will be mixed, making things complicated i think.
Also $reduce cant be used for grouping in MongoDB i think, because we can't construct dynamic paths.
If group-reduce and have this data (key=key value=value)
{"1" : 5 , "2" : 3}
And we see {"key" 1, "value" : 5} how we can check if the above data contains the 1 as key? We cant construct dynamic paths, like $$this.1 . Only way it to convert it to an array and back to object that will be so slow.

Sort multiple condition MongoDB

I have collection of user, and this is the following of documents :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 }
I want to show the list of users with some of criterias and ordering with some conditons, online users and most liked in the top of the list,after online user list is show offline users with most scored & most liked. The following of rules :
If online is 1 must be sort by descending of like.
If online is 0 and score is 1 must be sort by descending of score.
If online is 0 and score is 0 must be sort by descending of like.
So, the result can be like :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 }
I have finished until point 2, my query following :
db.users.aggregate([
{
$project :
{
"id" : 1,
"name" : 1,
"online: 1,
"like" : 1,
"score" : 1,
"sort" : {
$cond:
{
"if" :
{
$eq : ["$online", true]
},
"then" : "$like",
"else" : "$score"
}
}
}
},
{
$sort :
{
"online" : -1,
"sort" : -1,
"id" : 1
}
},
{
$skip : 0
},
{
$limit : 9
}
])
But I have the current result following :
{ "_id": 1, "name": "A", "online": 1, "like": 10, "score": 1 },
{ "_id": 4, "name": "D", "online": 1, "like": 8, "score": 0 },
{ "_id": 5, "name": "E", "online": 1, "like": 7, "score": 1 },
{ "_id": 6, "name": "F", "online": 0, "like": 10, "score": 1 },
{ "_id": 3, "name": "C", "online": 0, "like": 8, "score": 1 },
{ "_id": 2, "name": "B", "online": 0, "like": 9, "score": 0 },
{ "_id": 7, "name": "G", "online": 0, "like": 5, "score": 0 },
{ "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 }
{ "_id": 9, "name": "I", "online": 0, "like": 6, "score": 0 },
You can see, based on point 3, instance { "_id": 8, "name": "H", "online": 0, "like": 13, "score": 0 } should be on top with score is 0
First create additional column call point with value (1 - online)*score
After this sort data by:
online desc
point desc (online = 1 pointbe always 0, online is 0 point is score)
like desc
You can use this query
db.yourtable.aggregate(
[
{ $project:{
"id" : 1,
"name" : 1,
"online": 1,
"like" : 1,
"score" : 1,
point: { $multiply: [
{$subtract: [1,"$online"]}
, "$score"
]}
}
}
,{ $sort : { online: -1, point : -1, like : -1 } }
]
);
Please check below query :
db.getCollection('yourTable').aggregate([
{
$project :
{
"id" : 1,
"name" : 1,
"online": 1,
"like" : 1,
"score" : 1,
onlineSortLike: {
$cond: {
if: { $and: [{ $eq: ['$online',1 ] }] },
then: '$like',
else: 0,
},
},
sortOfflineScore: {
$cond: {
if: { $and: [{ $eq: ['$online',0] }] },
then: '$score',
else: 0,
},
},
sortOfflineScoreLike: {
$cond: {
if: { $and: [{ $eq: ['$online', 0] }] },
then: '$like',
else: 0,
},
},
}
},
{
$sort :
{
"online" : -1,
"onlineSortLike" : -1,
"sortOfflineScore" : -1,
"sortOfflineScoreLike" : -1
}
},
{
$skip : 0
},
{
$limit : 9
}
])

MongoDB $or query with index

I need make a query with indice at the mongodb, I will show below a minimal case example of my real case.
I have following collection with flowing data:
devsrv(mongod-3.0.4) test> db.teste.find()
{
"_id": ObjectId("57b324c341aaa4b930ef3b92"),
"a": 1,
"b": 1
}
{
"_id": ObjectId("57b324c941aaa4b930ef3b93"),
"a": 1,
"b": 2
}
{
"_id": ObjectId("57b324cd41aaa4b930ef3b94"),
"a": 1,
"b": 3
}
{
"_id": ObjectId("57b324d141aaa4b930ef3b95"),
"a": 1,
"b": 4
}
{
"_id": ObjectId("57b324d541aaa4b930ef3b96"),
"a": 1,
"b": 5
}
{
"_id": ObjectId("57b324da41aaa4b930ef3b97"),
"a": 1,
"b": 6
}
{
"_id": ObjectId("57b324df41aaa4b930ef3b98"),
"a": 1,
"b": 7
}
{
"_id": ObjectId("57b324e441aaa4b930ef3b99"),
"a": 1,
"b": 8
}
{
"_id": ObjectId("57b324f341aaa4b930ef3b9a"),
"a": 1,
"b": ""
}
{
"_id": ObjectId("57b324f641aaa4b930ef3b9b"),
"a": 1,
"b": " "
}
{
"_id": ObjectId("57b324fc41aaa4b930ef3b9c"),
"a": 1,
"b": null
}
{
"_id": ObjectId("57b3250341aaa4b930ef3b9d"),
"a": 1
}
{
"_id": ObjectId("57b46ace41aaa4b930ef3b9e"),
"a": 2
}
And I have the following indexes:
devsrv(mongod-3.0.4) test> db.teste.getIndexes()
[
{
"v": 1,
"key": {
"_id": 1
},
"name": "_id_",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"a": 1,
"b": 1
},
"name": "a_1_b_1",
"ns": "test.teste"
},
{
"v": 1,
"key": {
"b": 1
},
"name": "b_1",
"ns": "test.teste"
}
]
And I need make a query equal this:
devsrv(mongod-3.0.4) test> db.teste.find({$or:[{"b":null},{"b":""},{"b":" "},{"b":{$lt:3}}],"a":1}).explain("executionStats")
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "test.teste",
"indexFilterSet": false,
"parsedQuery": {
"$and": [
{
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
{
"a": {
"$eq": 1
}
}
]
},
"winningPlan": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
}
}
}
},
"rejectedPlans": [
{
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"a": 1,
"b": 1
},
"indexName": "a_1_b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"a": [
"[1.0, 1.0]"
],
"b": [
"[MinKey, MaxKey]"
]
}
}
}
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 6,
"executionTimeMillis": 0,
"totalKeysExamined": 8,
"totalDocsExamined": 14,
"executionStages": {
"stage": "FETCH",
"filter": {
"a": {
"$eq": 1
}
},
"nReturned": 6,
"executionTimeMillisEstimate": 0,
"works": 10,
"advanced": 6,
"needTime": 2,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 7,
"inputStage": {
"stage": "FETCH",
"filter": {
"$or": [
{
"b": {
"$eq": null
}
},
{
"b": {
"$eq": ""
}
},
{
"b": {
"$eq": " "
}
},
{
"b": {
"$lt": 3
}
}
]
},
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"docsExamined": 7,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 7,
"executionTimeMillisEstimate": 0,
"works": 8,
"advanced": 7,
"needTime": 1,
"needFetch": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"invalidates": 0,
"keyPattern": {
"b": 1
},
"indexName": "b_1",
"isMultiKey": false,
"direction": "forward",
"indexBounds": {
"b": [
"[null, null]",
"[-inf.0, 3.0)",
"[\"\", \"\"]",
"[\" \", \" \"]"
]
},
"keysExamined": 8,
"dupsTested": 0,
"dupsDropped": 0,
"seenInvalidated": 0,
"matchTested": 0
}
}
}
},
"serverInfo": {
"host": "devsrv",
"port": 27017,
"version": "3.0.4",
"gitVersion": "0481c958daeb2969800511e7475dc66986fa9ed5"
},
"ok": 1
}
But MongoDB isn't using the two indexes together.
Each $or term is effectively a separate query, so it helps to structure your query so that each term aligns with the index you're hoping to use. In this case that means moving the a: 1 part inside of each $or term:
db.teste.find({
$or:[
{a: 1, b: null},
{a: 1, b: ""},
{a: 1, b: " "},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
The explain output shows that the a_1_b_1 is used for this query.
But you can simplify this a bit more by using $in to combine the first three terms into one:
db.teste.find({
$or:[
{a: 1, b: {$in: [null, "", " "]}},
{a: 1, b: {$lt: 3}}
]}).explain('executionStats')
This is also able to use the a_1_b_1 index.
The code is
db.teste.explain("executionStats").find({a: 1,
$or:[{b: null},
{b: ""},
{b: " "},
{b: {$lt:3}}]
}).hint({a: 1, b: 1})
Be careful with the hint command as the query optimizer chooses the most efficient query by measuring actual performance of the query with every suitable index.