MongoDB sort by join result with aggregate lookup - mongodb

I have two collections user and post
> db.user.find().pretty()
{
"_id" : ObjectId("5d1473bc1b48d9309580a9de"),
"user_id" : NumberLong(1),
"region" : "US",
"is_join" : true
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9df"),
"user_id" : NumberLong(2),
"region" : "KR",
"is_join" : true
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e0"),
"user_id" : NumberLong(3),
"region" : "US",
"is_join" : true
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e1"),
"user_id" : NumberLong(4),
"region" : "US",
"is_join" : true
}
{
"_id" : ObjectId("5d1487fc1b48d9321ff5dc1f"),
"user_id" : NumberLong(5),
"region" : "US",
"is_join" : true
}
> db.post.find().pretty()
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e2"),
"post_id" : NumberLong(1),
"user_id" : NumberLong(3),
"body" : "hi"
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e3"),
"post_id" : NumberLong(2),
"user_id" : NumberLong(1),
"body" : "hello"
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e4"),
"post_id" : NumberLong(3),
"user_id" : NumberLong(2),
"body" : "go"
}
{
"_id" : ObjectId("5d1473bc1b48d9309580a9e5"),
"post_id" : NumberLong(4),
"user_id" : NumberLong(4),
"body" : "python"
}
{
"_id" : ObjectId("5d14941b1b48d93314907345"),
"post_id" : NumberLong(5),
"user_id" : NumberLong(1),
"body" : "aa"
}
I want to join via mongo aggregate lookup operation.
So I made query like this.
db.user.aggregate([
{
'$match': {
'region': 'US',
}
},
{
'$lookup': {
'from': 'post',
'localField': 'user_id',
'foreignField': 'user_id',
'as': 'user'
}
},
{
'$project': {
'_id': 0,
'user.post_id': 1
}
}
])
Result
{ "user" : [ { "post_id" : NumberLong(2) }, { "post_id" : NumberLong(5) } ] }
{ "user" : [ { "post_id" : NumberLong(1) } ] }
{ "user" : [ { "post_id" : NumberLong(4) } ] }
{ "user" : [ ] }
As you know that post_id is unordered.
But I want to sort it descending.
Desired result
{ "user" : [ { "post_id" : NumberLong(1) } ] }
{ "user" : [ { "post_id" : NumberLong(2) } ] }
{ "user" : [ { "post_id" : NumberLong(4) } ] }
{ "user" : [ { "post_id" : NumberLong(5) } ] }
Even Better
{ "user" : [ 1, 2, 4, 5 }] }
How can I modify aggregate query?
Thanks.

You can do following to get what you want:
1) $unwind the user array
2) use $sort to sort all documents with post_id
3) use $group as the next stage in your aggregation query, and push user.post_id to form one sorted user array from all the docs:
db.user.aggregate([
{
'$match': {
'region': 'US',
}
},
{
'$lookup': {
'from': 'post',
'localField': 'user_id',
'foreignField': 'user_id',
'as': 'user'
}
},
{
$unwind : {
path : "$user",
preserveNullAndEmptyArrays : true
}
},
{
$sort : {
'user.post_id' : 1
}
},
{
$group : {
_id : null,
user : {
$push : "$user.post_id"
}
}
}
])
Read more about $sort , $unwind and $group for more information.

Related

$lookup nested external id after $group

I want to replace the external user ids with the real user properties after $lookup, my data:
"comments" : [
{
"user_Id" : ObjectId("aaa"),
"content" : "aaaa",
"rep" : [
{
"user_Id" : ObjectId("bbb"),
"comment" : "bbbb",
},
{
"user_Id" : ObjectId("ccc"),
"comment" : "cccc",
}
]
},
{
"user_Id" : ObjectId("ddd"),
"content" : "ddd",
"rep" : [ ]
}
]
User collection:
"users" : [
{
"_id" : ObjectId("aaa"),
"name" : "user1",
"email" : "test1#test.com",
},
{
"_id" : ObjectId("bbb"),
"username" : "user2",
"email" : "test2#test.com",
}
]
What i want to archieve:
"comments" : [
{
"user" : {
"_id" : ObjectId("aaa"),
"name" : "user1",
"email" : "test1#test.com",
}
"content" : "aaaa",
"rep" : [
{
"userId" : {
"_id" : ObjectId("bbb"),
"username" : "user2",
"email" : "test2#test.com",
},
"comment" : "bbbb",
},
{
"user" : {
"_id" : ObjectId("aaa"),
"name" : "user1",
"email" : "test1#test.com",
},
"comment" : "cccc",
}
]
},
{
"user" : {
"_id" : ObjectId("bbb"),
"username" : "user2",
"email" : "test2#test.com",
},
"content" : "ddd",
"rep" : [ ]
}
]
Right now i managed to get my user info from the external id but i'm going crazy trying to get the user object inside the replies too, i tried to group after grouping but nothing to do, this is what i did:
db.pages.aggregate([
{
$match: { _id: ObjectId('5db599f3fffdee1c822269e0b3') }
},
{
$project: {
comments: 1,
}
},
{ $unwind: '$comments' },
{
$lookup:
{
from: 'users',
localField: 'comments.user_Id',
foreignField: '_id',
as: 'us'
}
},
{ $unwind: '$us' },
{
$group: {
_id: {
user: {
id: '$us._id',
name: '$us.username',
email: '$us.email',
},
comments: {
comment: '$comments.comment',
rep: '$comments.rep'
},
}
}
}
]).pretty()

Multiple condition on same column mongodb

{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1,
"event" : "eventA",
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventB,
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventC,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
,
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 2
"event" : "eventA,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
Now I want to get the count of events performed by the user so far. For eventA count will be 2, for eventB 1, and for eventC is 1. But this query will have multiple conditions so my condition is getting the count only if the user has performed (eventA or eventB) and eventC. So in that case from above doc user id 2 event will not be considered because they have not performed (eventA or eventB)
Also along with event match with mutiple and or condition I also want to apply filter on user_raw_data so my query should be like this
db.web_channel_events.aggregate([
{
$match: {
"channel_id": "1098",
"channel_node_id": "2177"
}
},
{
$group: {
"_id": {
"user_id": "$user_id",
"event": "$event"
},
"count": {
$sum: 1
}
}
},
{
$group: {
"_id": "$_id.user_id",
"event_details": {
$push: {
"k": "$_id.event",
"v": "$count"
}
}
}
},
{
$match: {
$and: [
{
$or: [
{
"event_details.k": "eventA",
"event_details.v": {
"$gte": 1
}
},
{
"event_details.k": "eventB",
"event_details.v": {
"$gte": 1
}
}
]
},
{
"event_details.k": "eventC",
"event_details.v": {
"$gte": 1
}
},
{
"user_raw_data.Name": "akhilesh"
}
]
}
},
{
"$unwind": "$event_details"
},
{
$group: {
"_id": "$event_details.k",
"count": {
$sum: "$event_details.v"
}
}
}
]).pretty();
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"user_id":"$user_id",
"event":"$event"
},
"user_id":{
$first:"$user_id"
},
"event":{
$first:"$event"
},
"count":{
$sum:1
}
}
},
{
$group:{
"_id":"$user_id",
"user_id":{
$first:"$user_id"
},
"event_details":{
$push:{
"k":"$event",
"v":"$count"
}
}
}
},
{
$addFields:{
"event_details":{
$arrayToObject:"$event_details"
}
}
},
{
$match:{
$and:[
{
$or:[
{
"event_details.eventA":{
$gt:0
}
},
{
"event_details.eventB":{
$gt:0
}
}
]
},
{
"event_details.eventC":{
$gt:0
}
}
]
}
},
{
$group:{
"_id":null,
"eventA":{
$sum:"$event_details.eventA"
},
"eventB":{
$sum:"$event_details.eventB"
},
"eventC":{
$sum:"$event_details.eventC"
}
}
},
{
$project:{
"_id":0,
"event_details.eventA":"$eventA",
"event_details.eventB":"$eventB",
"event_details.eventC":"$eventC"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5ccfe06e2434de5c345d0588"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa609"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60a"),
"event" : "eventB",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60b"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60c"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60d"),
"event" : "eventC",
"user_id" : 2,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
Output:
{ "event_details" : { "eventA" : 2, "eventB" : 1, "eventC" : 2 } }
Query analysis:
Grouping the data on the basis of user_id and event and calculating the count of that specific incident
Grouping only on the basis of user_id and pushing event and its
count into an array event_details as key-value pairs
Converting the event_details into an object
Applying the condition that the event count for ( A or B ) and C
should be greater than 0
Sum up individual event counts

MongoDB perform join with aggregate

I have two collections User and Post.
User
user_id
region
is_join
Post
post_id
user_id
body
is_block
From perspective of rdb, User:Post relationship is 1:N.
Each user can write multiple post.
For example, currently documents is inserted like this.
User
> db.user.find()
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d2"), "user_id" : NumberLong(1), "region" : "US", "is_join" : true }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d5"), "user_id" : NumberLong(2), "region" : "KR", "is_join" : true }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d8"), "user_id" : NumberLong(3), "region" : "US", "is_join" : true }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28da"), "user_id" : NumberLong(4), "region" : "KR", "is_join" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28dc"), "user_id" : NumberLong(5), "region" : "US", "is_join" : true }
Post
> db.post.find()
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d3"), "post_id" : NumberLong(1), "user_id" : NumberLong(1), "body" : "first", "is_block" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d4"), "post_id" : NumberLong(4), "user_id" : NumberLong(1), "body" : "fourth", "is_block" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d6"), "post_id" : NumberLong(2), "user_id" : NumberLong(2), "body" : "second", "is_block" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d7"), "post_id" : NumberLong(3), "user_id" : NumberLong(2), "body" : "third", "is_block" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28d9"), "post_id" : NumberLong(5), "user_id" : NumberLong(3), "body" : "fifth", "is_block" : true }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28db"), "post_id" : NumberLong(6), "user_id" : NumberLong(4), "body" : "sixth", "is_block" : false }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28dd"), "post_id" : NumberLong(7), "user_id" : NumberLong(5), "body" : "seven", "is_block" : true }
{ "_id" : ObjectId("5d15e41a1b48d9417ebc28de"), "post_id" : NumberLong(8), "user_id" : NumberLong(5), "body" : "eight", "is_block" : false }
To perform join via aggregate, there is more condition that have to apply.
User region='US', is_join=true
Posts created by user with the result of number 1
Post is_block=false
Sort by post_id
(Optional) If user do not wrote any post, except it. I know it can be perform through preserveNullAndEmptyArrays, but I think it cause performance issue.
Desired result
{
"posts" : [
{
"post_id" : NumberLong(1),
"body" : "first",
"is_block" : false
},
{
"post_id" : NumberLong(2),
"body" : "second",
"is_block" : false
},
{
"post_id" : NumberLong(3),
"body" : "third",
"is_block" : false
},
{
"post_id" : NumberLong(4),
"body" : "fourth",
"is_block" : false
},
{
"post_id" : NumberLong(8),
"body" : "eight",
"is_block" : false
}
]
}
post_id = 5 was excluded by is_block=true
post_id = 6 was excluded by is_join=false
post_id = 7 was excluded by is_block=true
And whole result is sorted by post_id.
I'm new at mongodb So, maybe I thinking too much in the form of a relational database.
And I don't know it can be perform on NoSQL.
Is there any way about it?
Any suggestion, very appreciate.
Thanks.
You can achieve such result using lookup pipeline operator.
const region = 'US';
const is_join = true;
const is_block = false;
const query = [
{
$match: {
region: region,
is_join: is_join
}
},
{
$lookup: {
let: { user_id: "$user_id" },
from: 'posts',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$$user_id", "$user_id"], },
{ $eq: ["$is_block", is_block] }
]
}
}
},
{
$sort: {
post_id: 1
}
}
],
as: "posts"
}
},
{
$unwind: "$posts"
},
{
$group:{
_id: "mygroup",
posts: {
$push: {
post_id: "$posts.post_id",
body: "$posts.body",
is_block: "$posts.is_block",
}
}
}
},
{
$project:{
_id: false
}
}
]
db.users.aggregate(query)
OUTPUT
{
"posts" : [
{
"post_id" : 1,
"body" : "first",
"is_block" : false
},
{
"post_id" : 4,
"body" : "fourth",
"is_block" : false
},
{
"post_id" : 8,
"body" : "eight",
"is_block" : false
}
]
}
Try this -
To join two collection, you can use aggregation as -
User.aggregate([{
'$match': { 'region':'US', 'is_join': true } // match from users collection
}, {
$lookup: { // it will aggregate the result from both collection
from: 'posts',
localField: '_id',
foreignField: 'user_id',
as: 'posts'
}
},
{"$unwind":"$posts"},
{$match : { "posts.is_block" : false } }, // check inside post collection
{$sort : { "posts.post_id" : 1}}, // sort the data
], (err, users) => {
if (err) return callback(err, null);
console.log('users :', users);
});

MongoDB 3.4 MongoDBRef dereferencing with .NET MongoDB Driver

I have following collections:
Collection A
{
"_id" : ObjectId("5aaa3b170e26ed1eba223ba9"),
"name" : "A1",
"ref" : {
"$ref" : "B",
"$id" : ObjectId("5aaa33740e26ed1eba223ba1")
}
}
{
"_id" : ObjectId("5aaa3b170e26ed1eba223baa"),
"name" : "A2",
"ref" : {
"$ref" : "C",
"$id" : ObjectId("5aaa33740e26ed1eba223ba2")
}
}
Collection B
{
"_id" : ObjectId("5aaa33740e26ed1eba223ba1"),
"name" : "B1"
}
...
Collection C
{
"_id" : ObjectId("5aaa33740e26ed1eba223ba2"),
"name" : "C1"
}
...
It is posable to get folowing result?
{
"_id" : ObjectId("5aaa3b170e26ed1eba223ba9"),
"name" : "A1",
"result" : [
{
"name" : "B1"
}
]
}
{
"_id" : ObjectId("5aaa3b170e26ed1eba223baa"),
"name" : "A2",
"result" : [
{
"name" : "C1"
}
]
}
I tried it with $Project and §Lookups, unfortunately withou success.
Hire is example:
db.A.aggregate([
{$project: {
name : 1,
refId: {$arrayElemAt: [{$objectToArray:"$ref"},1]},
refCol: {$arrayElemAt: [{$objectToArray:"$ref"},0]},
}
},
{$lookup : {
from : "refCol.v",
localField : "refId.v",
foreignField : "_id",
as : "result"
}
},
{$project : {"result._id" : 0, refId : 0, refCol : 0}}
])
In this example I can't reference the "refCol.v" field in the $lookup function.
Have someone a tip or a better solution for me?
You can achieve this by populate() method:
const collectionA = require('../models/collectionA');
collectionA.find({}).populate({
path: 'collectionB',
select: {
'name': 1,
'_id': 0
}
})
}).then((data) => {
if (data) {
res.send(data);
}
}).catch((err) => {
res.send(err);
})

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}