Finding/Counting Duplicate Values in Array in MongoDB - mongodb

I am new to the mongo database. Using Robo3t software
I have to find out duplicate values inside an array based on channel_id.
I did a research and found that aggregation needs to be used to do grouping and find respective count.
I have developed the following query but results are not as expected.
Sample Documents:
{
"_id" : ObjectId("59b674d141b47e5401897d31"),
"subscribed_channels" : [
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1002",
"channel_name" : "StarGold",
"channelPrice":"75"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1003",
"channel_name" : "SetMax",
"channelPrice":"80"
}
],
"viewer_account_id" : "59b6745b41b47e5401143b3d",
"public_id_type" : "PHONE_NUMBER",
"viewer_id" : "+919322264403",
"role" : "CONSUMER",
"active" : true,
"date_time_created" : NumberLong(1505129681330),
"date_time_modified" : NumberLong(1569320824387)
}
{
"_id" : ObjectId("59b674d141b47e5401897d31"),
"subscribed_channels" : [
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1002",
"channel_name" : "StarGold",
"channelPrice":"75"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
},
{
"channel_id" : "1001",
"channel_name" : "StarPlus",
"channelPrice":"100"
}
],
"viewer_account_id" : "59b6745b41b47e5401143c56",
"public_id_type" : "PHONE_NUMBER",
"viewer_id" : "+919322264404",
"role" : "CONSUMER",
"active" : true,
"date_time_created" : NumberLong(1505129681330),
"date_time_modified" : NumberLong(1569320824387)
}
Above are just 2 records of document viewers
Query :
db.getCollection('viewers').aggregate([
{
"$group" :
{_id:{
//viewer_id:"$consumer_id",
enterprise_id:"$subscribed_channels.channel_id",
},
"viewer_id": {
$first: "$viewer_id"
},
count:{$sum:1}
}},
{
"$match": {"count": { "$gt": 1 }}
}
])
Actual Output :
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1002",
"1003"
]
},
"consumer_id" : "+919322264403",
"count" : 2.0
}
{
"_id" : {
"enterprise_id" : [
"1001",
"1002",
"1001",
"1001
]
},
"consumer_id" : "+919322264404",
"count" : 2.0
}
Expected Output :
I want to group based on subscribed_channels.channel_id and get a count respectively
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1002",
"1003"
]
},
"consumer_id" : "+919322264403",
"count" : 2.0
}
{
"_id" : {
"enterprise_id" : [
"1001",
"1001",
"1001",
"1002
]
},
"consumer_id" : "+919322264404",
"count" : 3.0
}
Grouping is not happening based on channel_id, also the count is incorrect.
The count is not even giving me no of channel-id subscribed, also not giving duplicate channel_ids.
Please guide me in building a query that gives the correct result.

Try below query :
Query :
db.collection.aggregate([
/** project only needed fields & transform fields as you like */
{
$project: {
customer_id: "$viewer_id",
enterprise_id: "$subscribed_channels.channel_id",
count: {
/** Subtract size of original array & newly formed array which has unique values to get count of duplicates */
$subtract: [
{
$size: "$subscribed_channels.channel_id" // get size of original array
},
{
$size: {
$setUnion: ["$subscribed_channels.channel_id", []] // This will give you an array with unique elements & get size of it
}
}
]
}
}
}
]);
Test : MongoDB-Playground

Related

How to use mongodb query to compare the element in DB to another element in an array stored in same collection

Here is my question.
This is my sample records
{
"_id" : ObjectId("5d9b69fae4757402b4b4ca0d"),
"status_changed_utc" : [
{
"status" : NumberInt(1),
"time" : ISODate("2019-05-20T23:03:10.000+0000")
},
{
"status" : NumberInt(2),
"time" : ISODate("2019-05-23T23:04:03.000+0000")
},
{
"status" : NumberInt(4),
"time" : ISODate("2019-05-23T23:05:06.000+0000")
},
{
"status" : NumberInt(5),
"time" : ISODate("2019-05-23T23:05:07.000+0000")
},
{
"status" : NumberInt(6),
"time" : ISODate("2019-05-23T23:05:09.000+0000")
}
],
"requested_completion_utc" : ISODate("2019-05-22T23:05:09.000+0000")
},
{
"_id" : ObjectId("5d9b69fae4757402b4b4ca1e"),
"status_changed_utc" : [
{
"status" : NumberInt(1),
"time" : ISODate("2019-06-20T23:03:10.000+0000")
},
{
"status" : NumberInt(2),
"time" : ISODate("2019-07-23T23:04:03.000+0000")
},
{
"status" : NumberInt(4),
"time" : ISODate("2019-07-23T23:05:06.000+0000")
},
{
"status" : NumberInt(5),
"time" : ISODate("2019-05-23T23:05:07.000+0000")
},
{
"status" : NumberInt(6),
"time" : ISODate("2019-07-23T23:05:09.000+0000")
}
],
"requested_completion_utc" : ISODate("2019-08-22T23:05:09.000+0000")
},
I expect to find out the record which the date of the "requested_completion_utc" field larger than the date from the "status_changed_utc" field when "status_changed_utc.status" is NumberInt(2).
In this example, I expected to get the second record.
Except for $unwind function, is there any other ways to handle this issue?
Thanks
If the NumberInt(2) is always in the second position of the array. It should be pretty easy.
db.whatever.find({ $expr: { $gt: [ "$requested_completion_utc" , "$status_changed_utc.1" ] } })
the requirement is to find the record that meet the following standard, how to write the query without using unwind?
requested_completion_utc > status_changed_utc.time and status_changed_utc.status=2, where the status_changed_utc.time is from the record that has status_changed_utc.status=2
Eventually, we found the answer.
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$match: {
{
$expr: {
$gt:[
{
$size: {
$filter:{
"input":"$status_changed_utc",
"as":"doc",
"cond":{
$and: [
{
$eq:["$$doc.status",2]
},
{
$gt:["$$doc.time", "$requested_completion_utc"]
}
]
}
}
}
},
0
]
}
}
}
},
]
);

Problems aggregating MongoDB

I am having problems aggregating my Product Document in MongoDB.
My Product Document is:
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T18:25:48.026+01:00"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
},
And my Category Document is:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T18:25:47.196+01:00")
},
My aim is to perform aggregation on the Product Document in order to count the number of items within each Category. So I have the Category "Art", I need to count the products are in the "Art" Category:
My current aggregate:
db.product.aggregate(
{ $unwind : "$categories" },
{
$group : {
"_id" : { "name" : "$name" },
"doc" : { $push : { "category" : "$categories" } },
}
},
{ $unwind : "$doc" },
{
$project : {
"_id" : 0,
"name" : "$name",
"category" : "$doc.category"
}
},
{
$group : {
"_id" : "$category",
"name": { "$first": "$name" },
"items_in_cat" : { $sum : 1 }
}
},
{ "$sort" : { "items_in_cat" : -1 } },
)
Which does actually work but not as I need:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : null, // Why is the name of the category no here?
"items_in_cat" : 4
},
As we can see the name is null. How can I aggregate the output to be:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : "Art",
"items_in_cat" : 4
},
We need to use $lookup to fetch the name from Category collection.
The following query can get us the expected output:
db.product.aggregate([
{
$unwind:"$categories"
},
{
$group:{
"_id":"$categories",
"items_in_cat":{
$sum:1
}
}
},
{
$lookup:{
"from":"category",
"let":{
"id":"$_id"
},
"pipeline":[
{
$match:{
$expr:{
$eq:["$_id","$$id"]
}
}
},
{
$project:{
"_id":0,
"name":1
}
}
],
"as":"categoryLookup"
}
},
{
$unwind:{
"path":"$categoryLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"_id":1,
"name":{
$ifNull:["$categoryLookup.name","NA"]
},
"items_in_cat":1
}
}
]).pretty()
Data set:
Collection: product
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T17:25:48.026Z"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
}
Collection: category
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Craft",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
Output:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"items_in_cat" : 1,
"name" : "Craft"
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"items_in_cat" : 1,
"name" : "Art"
}

MongoDB - Find duplicated elements in record property

I'm struggling to identified duplicated elements in my MongoDB records, here is my problem :
I have a Mongo collection named "elements".
Example of a record in this collection :
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
I would like to identify records where the array "items" contains objects with the same "ref_id".
In my example we can see that the last two objects of the "items" array have the same "ref_id" : ObjectId("5d1b2204e851271e80c823d5").
I tried a bunch of aggregate function but unfortunately couldn't came out with a solution.
The following query can get us the expected output:
db.elements.aggregate([
{
$unwind:"$items"
},
{
$group:{
"_id":"$_id",
"root":{
$first:"$$ROOT"
},
"items":{
$push:"$items"
},
"distinctItems":{
$addToSet: "$items.ref_id"
}
}
},
{
$match:{
$expr:{
$ne:[
{
$size:"$items"
},
{
$size:"$distinctItems"
}
]
}
}
},
{
$addFields:{
"root.items":"$items"
}
},
{
$replaceRoot:{
"newRoot":"$root"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
{
"_id" : ObjectId("5d654b9d7d0ab652c42315f2"),
"name" : "B",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
}
]
}
Output:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
Explanation: We are populating an array of distinct ref_id from each document and matching if the size of the populated array is equal to the size of actual items array.

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}

mongodb aggregation match multiple $and on the same field

i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}