I am new to the mongo database. Using Robo3t software
I have to find out duplicate values inside an array based on channel_id.
I did a research and found that aggregation needs to be used to do grouping and find respective count.
I have developed the following query but results are not as expected.
Sample Documents:
{
"_id" : ObjectId("59b674d141b47e5401897d31"),
"subscribed_channels" : [
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1002",
"channel_name" : "StarGold",
"channelPrice":"75"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1003",
"channel_name" : "SetMax",
"channelPrice":"80"
}
],
"viewer_account_id" : "59b6745b41b47e5401143b3d",
"public_id_type" : "PHONE_NUMBER",
"viewer_id" : "+919322264403",
"role" : "CONSUMER",
"active" : true,
"date_time_created" : NumberLong(1505129681330),
"date_time_modified" : NumberLong(1569320824387)
}
{
"_id" : ObjectId("59b674d141b47e5401897d31"),
"subscribed_channels" : [
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1002",
"channel_name" : "StarGold",
"channelPrice":"75"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
}
],
"viewer_account_id" : "59b6745b41b47e5401143c56",
"public_id_type" : "PHONE_NUMBER",
"viewer_id" : "+919322264404",
"role" : "CONSUMER",
"active" : true,
"date_time_created" : NumberLong(1505129681330),
"date_time_modified" : NumberLong(1569320824387)
}
Above are just 2 records of document viewers
Query :
db.getCollection('viewers').aggregate([
{
"$group" :
{_id:{
//viewer_id:"$consumer_id",
enterprise_id:"$subscribed_channels.channel_id",
},
"viewer_id": {
$first: "$viewer_id"
},
count:{$sum:1}
}},
{
"$match": {"count": { "$gt": 1 }}
}
])
Actual Output :
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1002",
"1003"
]
},
"consumer_id" : "+919322264403",
"count" : 2.0
}
{
"_id" : {
"enterprise_id" : [
"1001",
"1002",
"1001",
"1001
]
},
"consumer_id" : "+919322264404",
"count" : 2.0
}
Expected Output :
I want to group based on subscribed_channels.channel_id and get a count respectively
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1002",
"1003"
]
},
"consumer_id" : "+919322264403",
"count" : 2.0
}
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1001",
"1002
]
},
"consumer_id" : "+919322264404",
"count" : 3.0
}
Grouping is not happening based on channel_id, also the count is incorrect.
The count is not even giving me no of channel-id subscribed, also not giving duplicate channel_ids.
Please guide me in building a query that gives the correct result.
Try below query :
Query :
db.collection.aggregate([
/** project only needed fields & transform fields as you like */
{
$project: {
customer_id: "$viewer_id",
enterprise_id: "$subscribed_channels.channel_id",
count: {
/** Subtract size of original array & newly formed array which has unique values to get count of duplicates */
$subtract: [
{
$size: "$subscribed_channels.channel_id" // get size of original array
},
{
$size: {
$setUnion: ["$subscribed_channels.channel_id", []] // This will give you an array with unique elements & get size of it
}
}
]
}
}
}
]);
Test : MongoDB-Playground
Related
Here is my question.
This is my sample records
{
"_id" : ObjectId("5d9b69fae4757402b4b4ca0d"),
"status_changed_utc" : [
{
"status" : NumberInt(1),
"time" : ISODate("2019-05-20T23:03:10.000+0000")
},
{
"status" : NumberInt(2),
"time" : ISODate("2019-05-23T23:04:03.000+0000")
},
{
"status" : NumberInt(4),
"time" : ISODate("2019-05-23T23:05:06.000+0000")
},
{
"status" : NumberInt(5),
"time" : ISODate("2019-05-23T23:05:07.000+0000")
},
{
"status" : NumberInt(6),
"time" : ISODate("2019-05-23T23:05:09.000+0000")
}
],
"requested_completion_utc" : ISODate("2019-05-22T23:05:09.000+0000")
},
{
"_id" : ObjectId("5d9b69fae4757402b4b4ca1e"),
"status_changed_utc" : [
{
"status" : NumberInt(1),
"time" : ISODate("2019-06-20T23:03:10.000+0000")
},
{
"status" : NumberInt(2),
"time" : ISODate("2019-07-23T23:04:03.000+0000")
},
{
"status" : NumberInt(4),
"time" : ISODate("2019-07-23T23:05:06.000+0000")
},
{
"status" : NumberInt(5),
"time" : ISODate("2019-05-23T23:05:07.000+0000")
},
{
"status" : NumberInt(6),
"time" : ISODate("2019-07-23T23:05:09.000+0000")
}
],
"requested_completion_utc" : ISODate("2019-08-22T23:05:09.000+0000")
},
I expect to find out the record which the date of the "requested_completion_utc" field larger than the date from the "status_changed_utc" field when "status_changed_utc.status" is NumberInt(2).
In this example, I expected to get the second record.
Except for $unwind function, is there any other ways to handle this issue?
Thanks
If the NumberInt(2) is always in the second position of the array. It should be pretty easy.
db.whatever.find({ $expr: { $gt: [ "$requested_completion_utc" , "$status_changed_utc.1" ] } })
the requirement is to find the record that meet the following standard, how to write the query without using unwind?
requested_completion_utc > status_changed_utc.time and status_changed_utc.status=2, where the status_changed_utc.time is from the record that has status_changed_utc.status=2
Eventually, we found the answer.
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$match: {
{
$expr: {
$gt:[
{
$size: {
$filter:{
"input":"$status_changed_utc",
"as":"doc",
"cond":{
$and: [
{
$eq:["$$doc.status",2]
},
{
$gt:["$$doc.time", "$requested_completion_utc"]
}
]
}
}
}
},
0
]
}
}
}
},
]
);
I am having problems aggregating my Product Document in MongoDB.
My Product Document is:
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T18:25:48.026+01:00"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
},
And my Category Document is:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T18:25:47.196+01:00")
},
My aim is to perform aggregation on the Product Document in order to count the number of items within each Category. So I have the Category "Art", I need to count the products are in the "Art" Category:
My current aggregate:
db.product.aggregate(
{ $unwind : "$categories" },
{
$group : {
"_id" : { "name" : "$name" },
"doc" : { $push : { "category" : "$categories" } },
}
},
{ $unwind : "$doc" },
{
$project : {
"_id" : 0,
"name" : "$name",
"category" : "$doc.category"
}
},
{
$group : {
"_id" : "$category",
"name": { "$first": "$name" },
"items_in_cat" : { $sum : 1 }
}
},
{ "$sort" : { "items_in_cat" : -1 } },
)
Which does actually work but not as I need:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : null, // Why is the name of the category no here?
"items_in_cat" : 4
},
As we can see the name is null. How can I aggregate the output to be:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : "Art",
"items_in_cat" : 4
},
We need to use $lookup to fetch the name from Category collection.
The following query can get us the expected output:
db.product.aggregate([
{
$unwind:"$categories"
},
{
$group:{
"_id":"$categories",
"items_in_cat":{
$sum:1
}
}
},
{
$lookup:{
"from":"category",
"let":{
"id":"$_id"
},
"pipeline":[
{
$match:{
$expr:{
$eq:["$_id","$$id"]
}
}
},
{
$project:{
"_id":0,
"name":1
}
}
],
"as":"categoryLookup"
}
},
{
$unwind:{
"path":"$categoryLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"_id":1,
"name":{
$ifNull:["$categoryLookup.name","NA"]
},
"items_in_cat":1
}
}
]).pretty()
Data set:
Collection: product
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T17:25:48.026Z"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
}
Collection: category
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Craft",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
Output:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"items_in_cat" : 1,
"name" : "Craft"
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"items_in_cat" : 1,
"name" : "Art"
}
I'm struggling to identified duplicated elements in my MongoDB records, here is my problem :
I have a Mongo collection named "elements".
Example of a record in this collection :
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
I would like to identify records where the array "items" contains objects with the same "ref_id".
In my example we can see that the last two objects of the "items" array have the same "ref_id" : ObjectId("5d1b2204e851271e80c823d5").
I tried a bunch of aggregate function but unfortunately couldn't came out with a solution.
The following query can get us the expected output:
db.elements.aggregate([
{
$unwind:"$items"
},
{
$group:{
"_id":"$_id",
"root":{
$first:"$$ROOT"
},
"items":{
$push:"$items"
},
"distinctItems":{
$addToSet: "$items.ref_id"
}
}
},
{
$match:{
$expr:{
$ne:[
{
$size:"$items"
},
{
$size:"$distinctItems"
}
]
}
}
},
{
$addFields:{
"root.items":"$items"
}
},
{
$replaceRoot:{
"newRoot":"$root"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
{
"_id" : ObjectId("5d654b9d7d0ab652c42315f2"),
"name" : "B",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
}
]
}
Output:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
Explanation: We are populating an array of distinct ref_id from each document and matching if the size of the populated array is equal to the size of actual items array.
this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}
i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}