Number of movie of genre 'drama' by country - mongodb

I have a collection as follow,
[
{
"_id": "movie:1",
"title": "Vertigo",
"year": 1958,
"genre": "drama",
"country": "DE"
},
{
"_id": "movie:2",
"title": "Alien",
"year": 1979,
"genre": "Science-fiction",
"country": "USA"
},
{
"_id": "movie:3",
"title": "Sacrifice",
"year": 1986,
"genre": "drama",
"summary": null,
"country": "FR"
}
]
I have the following query,
db.moviesEmbedded.aggregate([
{"$group" : {_id:{country:"$country", genre: "$genre", movie: "$title"}, movies:{$sum:1} } } ]
But it doesn't give me the correct output. Can someone please help me with this?

You have to $match first to filter the desired values. So you need this query:
db.collection.aggregate({
"$match": {
"genre": "drama"
}
},
{
"$group": {
"_id": "$country",
"total": {
"$sum": 1
}
}
})
Example here.
Note that you can output the total or multiple values in this way if you prefer:
db.collection.aggregate({
"$match": {
"genre": "drama"
}
},
{
"$group": {
"_id": "$country",
"movie": {
"$push": {
"title": "$title",
"year": "$year"
}
}
}
})
Example here

Related

Compare duplicates by Score By Not Max Field or Earlier Date

I need to compare duplicated documents and get the duplicated ones with the Lowest Score.
If the Score between two duplicates is Equal, then get the one with earlier date.
{
"_id": UUID("c77c72de-edd8-4576-a72c-983cf93a0f31"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 5,
},
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("f1a063a5-f9dd-4998-b6aa-df2071dd8677"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 1
}
Later, the resulting documents will be deleted.
Expected Result:
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 2 // If both Scores Equal, Compare CreationDate earlier
}
Mongo Version 4.2.21
This would be easier with some of the newer "$group" accumulators introduced in more recent versions of MongoDB, but here's one way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$DocumentId",
"count": {"$sum": 1},
"docs": {"$push": "$$ROOT"}
}
},
{ // if only 1, keep it
"$match": {
"$expr": {"$gt": ["$count", 1]}
}
},
{ // find the doc to keep
"$set": {
"keepDoc": {
"$reduce": {
"input": "$docs",
"initialValue": {
"Score": {"$minKey": 1}
},
"in": {
"$switch": {
"branches": [
{
"case": {"$gt": ["$$this.Score", "$$value.Score"]},
"then": "$$this"
},
{
"case": {"$eq": ["$$this.Score", "$$value.Score"]},
"then": {
"$cond": [
{"$gt": ["$$this.CreationDate", "$$value.CreationDate"]},
"$$this",
"$$value"
]
}
}
],
"default": "$$value"
}
}
}
}
}
},
{ // get docs other than keepDoc
"$project": {
"_id": 0,
"expiredDocs": {
"$filter": {
"input": "$docs",
"cond": {"$ne": ["$$this", "$keepDoc"]}
}
}
}
},
{"$unwind": "$expiredDocs"},
{"$replaceWith": "$expiredDocs"}
])
Try it on mongoplayground.net.
N.B.: On mongoplayground.net, there's no easy way that I know of to enter binary UUID values in the BSON configuration, so I just used strings. It should be inconsequential to the pipeline.

MongoDB Unwind Error: cannot encode object of type: <class 'set'>

hope you're fine.
I cannot seem to find a way to aggregate the following document by 'equity id'.
{
"_id": {
"$oid": "6001dc246192c700013e8252"
},
"user": "blablabla",
"_type": "User::Individual",
"created_at": {
"$date": "2021-01-15T18:17:11.130Z"
},
"integrations": [{
"_id": {
"$oid": "6001dc62e7a0970001258da8"
},
"status": "completed",
"authentication_failed_msg": null
}],
"portfolios": [{
"_id": {
"$oid": "6001dc62e7a0970001258da9"
},
"_type": "SimplePortfolio",
"transactions": [{
"_id": {
"$oid": "6001dc62e7a0970001258daa"
},
"settlement_period": 2,
"expenses": 0,
"source": "integration",
"_type": "Transaction::Equity::Buy",
"date": {
"$date": "2020-03-02T00:00:00.000Z"
},
"shares": 100,
"price": 13.04,
"equity_id": "abcd"
}, {
"_id": {
"$oid": "6001dc62e7a0970001258dab"
},
"settlement_period": 2,
"expenses": 0,
"source": "integration",
"_type": "Transaction::Equity::Buy",
"date": {
"$date": "2020-03-02T00:00:00.000Z"
},
"shares": 1000,
"price": 1.03,
"equity_id": "efgh"
I tried something like
db.collection.aggregate([{"$unwind": {'$portfolios.transactions'}},
{"$group" : {"_id": "$equity_id"}}])
Got error InvalidDocument: cannot encode object: {'$portfolios.transactions'}, of type: <class 'set'>
Ideally what I want a list grouped by user and equity_id and a sum of its shares. Does anyone know if the error is caused by my aggregation or the document structure?
You should $unwind twice.
db.collection.aggregate([
{
"$unwind": "$portfolios"
},
{
"$unwind": "$portfolios.transactions"
},
{
"$group": {
"_id": "$portfolios.transactions.equity_id"
}
}
])
mongoplayground

How to have multiple nested arrays in an array

I have the collection data from a csv file with header. When i run my query
db.ties.aggregate(
[
{
$group:
{
_id: { "SHOP": "$SHOP" },
isLinkedTo: { $push: { "PERSON": "$PERSON", "CITY": "$CITY", "ROOM": "$ROOM", "STYLE": "$STYLE", "hasDonated": {"DATE": "$DATE", "OBJECT": "$OBJECT", "COST": "$COST", "COLOR": "$COLOR", "PAYMENT": "$PAYMENT"}}}
}
},
{ $out: "ties"}
],
{ allowDiskUse: true }
)
I have like result:
{
"_id": {
"Shop": "FirstShopNameCovered"
},
"isLinkedTo": [{
"PERSON": "Carleen",
"CITY": "Rome",
"ROOM": "Kitchen",
"STYLEPREFERED": "Modern",
"hasDonated": {
"DATE": "2019-10-11",
"OBJECT": "Set of dishes",
"COST": 72,
"COLOR": "White",
"PAYMENT": "Credit card"
}
}, {
"PERSON": "Carleen",
"CITY": "Rome",
"ROOM": "Kitcher",
"STYLEPREFERED": "Modern",
"hasDonated": {
"DATE": "2018-10-26",
"OBJECT": "Set of chairs",
"COST": 353,
"COLOR": "Grey",
"PAYMENT": "Coupon"
}
}, {
"PERSON": "Pernick",
"CITY": "Venezia",
"ROOM": "Bathroom",
"STYLE": "Minimalist",
"hasDonated": {
"DATE": "2018-09-18",
"OBJECT": "Mirror",
"COST": 68,
"COLOR": "Brown",
"PAYMENT": "Credit card"
}
}
You can see that there is replicated the Person "PERSON": "Carleen" with all data with 2 different arrays hasDonated.
I wish have something like this result, with person not replicated that contains all hasDonated arrays where he is present:
"_id": {
"Shop": "NameCovered"
},
"isLinkedTo": [{
"PERSON": "Carleen",
"CITY": "Rome",
"ROOM": "Kitchen",
"STYLE": "RetrĂ²",
"hasDonated": {
"DATE": "2019-10-11",
"OBJECT": "Set of dishes",
"COST": 72,
"COLOR": "White",
"PAYMENT": "Credit card"
},
{
"DATE": "2018-10-26",
"OBJECT": "Chair",
"COST": 53,
"COLOR": "Grey",
"PAYMENT": "Coupon"
}
}, {
"PERSON": "Pernick",
"CITY": "Venezia",
"ROOM": "Bathroom",
"STYLE": "Minimalist",
"hasDonated": {
"DATE": "2018-09-18",
"OBJECT": "Mirror",
"COST": 68,
"COLOR": "Brown",
"PAYMENT": "Credit card"
}
How can I do to have the result like this?
First we need to $unwind to flat the array. Then group the hasDonated using $group where unique is found by combination of "_id" and "PERSON" as you mentioned.
[
{
"$unwind": "$isLinkedTo"
},
{
$group: {
_id: {
_id: "$_id",
per: "$isLinkedTo.PERSON"
},
isLinkedTo: {
$first: {
PERSON: "$isLinkedTo.PERSON",
CITY: "$isLinkedTo.CITY",
ROOM: "$isLinkedTo.ROOM",
STYLEPREFERED: "$isLinkedTo.STYLEPREFERED"
}
},
hasDonated: {
$addToSet: "$isLinkedTo.hasDonated"
}
}
},
{
$addFields: {
_id: "$_id._id",
"isLinkedTo.hasDonated": "$hasDonated"
}
},
{
$project: {
hasDonated: 0
}
},
{
$group: {
_id: "$_id",
isLinkedTo: {
$push: "$isLinkedTo"
}
}
}
]
Working Mongo playground

How to find most common value for specific categories in MongoDB?

I have a dataset in MongoDB that looks like this:
{ "name": "Tom's", "category": "coffee shop" },
{ "name": "Red Lobster", "category": "restaurant" },
{ "name": "Tom's", "category": "coffee shop" },
{ "name": "Starbucks", "category": "coffee shop" },
{ "name": "Central Park", "category": "park" },
{ "name": "Office", "category": "office" },
{ "name": "Red Lobster", "category": "restaurant" },
{ "name": "Home", "category": "home" },
{ ... } // and so on
How can I find the most common value for specific categories? For example, the most common occurring value for coffee shop and restaurant should be Tom's and Red Lobster, respectively.
My current $aggregate query only seems to list the most common occurring value among ALL of the dataset:
db.collection.aggregate(
{ "$group": { "_id": { "name": "$name" }, "count": { "$sum":1 } }},
{ "$group": { "_id": "$_id.name", "count": { "$sum": "$count" } }},
{ "$sort": { "count":-1 }}
)
You can try the below query.
$group on category and name to get the count for each category and name combination.
$sort the input documents by category and count desc.
$group on category with $first to pick the document with most occurrences.
db.collection_name.aggregate([
{
"$group": {
"_id": {
"category": "$category",
"name": "$name"
},
"count": {
"$sum": 1
}
}
},
{
"$sort": {
"_id.category": 1,
"count": -1
}
},
{
"$group": {
"_id": {
"category": "$_id.category"
},
"name": {
"$first": "$_id.name"
},
"count": {
"$first": "$count"
}
}
}
])

MongoDB aggregate results into nested array

I'm quite new to MongoDB and I am currently facing a situation. Below are 2 sample records from the whole database that I have :
{
"_id": 1,
"Record": 1,
"Link": [ "https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html" ],
"Location": [ "USA", "PAN", "USA", "USA", "PAN" ],
"Organization": [ "GN", "SOUTHCOM", "UCMJ", "PRC" ],
"Date": [ "2016" ],
"People": [ "P.Walter" ]
}
{
"_id": 2,
"Record": 2,
"Link": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html" ],
"Location": [ "NIC", "GTM", "JAM", "GTM", "PAN" ],
"Organization": [ "CENTAM", "Calibre Mining Corporation", "STRATFOR", "Alder Resources" ],
"Date": [ "2013" ],
"People": [ "Daniel Ortega", "Hugo Chavez", "Paulo Gregoire" ]
}
Basically, I'm trying to get an output like this :
{
"Country": "US",
"Years": [
{
"Year": "2016",
"Links": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
},
{
"Year": "2013",
"Links": [ ""https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
}
]
"Link_Count": 6
}
{
"Country": "UK",
"Years": [
{
"Year": "2009",
"Links": [ "https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html",
"https://wikileaks.org/gifiles/docs/90/9058_wax-12312008-csv-.html" ]
},
{
"Year": "2011",
"Links": [ ""https://wikileaks.org/gifiles/docs/11/111533_-latam-centam-brief-110822-.html",
"https://wikileaks.org/plusd/cables/1979PANAMA06344_e.html"]
}
]
"Link_Count": 5
}
I've tried to aggregate it, but I couldn't achieve what I want like I've given in the output. Here's my query :
db.test.aggregate([
{
"$unwind": "$Location"
},
{
"$group" : {
"_id": {
"Country": "$Location",
"Year": "$Date",
"Links": "$Link"
},
Loc: {
$addToSet: "$Location"
}
}
},
{
"$unwind": "$Loc"
},
{
"$group": {
"_id": "$Loc",
"Years": { "$push": {
"Year": "$_id.Year",
"Links": "$_id.Links"
}
}
}
}
]).toArray()
I used $unwind and $addToSet on $Location because there are duplicates found within $Location. I'm open to any suggestions or solution so please do tell! Thanks in advance!
You can use :
db.test.aggregate([{
"$unwind": "$Location"
}, {
"$unwind": "$Date"
}, {
"$unwind": "$Link"
}, {
"$group": {
"_id": {
"Country": "$Location",
"Year": "$Date"
},
Links: {
$addToSet: "$Link"
}
}
}, {
"$group": {
"_id": "$_id.Country",
Years: {
$push: {
"Year": "$_id.Year",
"Links": "$Links"
}
},
Link_Count: { $sum: { $size: "$Links" } }
}
}])
The idea is to $unwind all arrays to be able to $push link into a new array, and to count the grouped record with $size for the last $group stage.