Mongo sorted list complexity - mongodb

For the following sorted list:
{
sorted_list : [{name : <string>,score : <Number>}]
}
What are the complexities of the following commands (in 'O' notations)?
Find:
collection.find( { _id: 1}, { sorted_list: { $slice: [ <skip>, <limit> ] } } )
Insert:
collection.update(
{ _id: 1 },
{
$push: {
sorted_list: {
$each: [ { name: 3, score: 8 }, { name: 4, score: 7 }, { name: 5, score: 6 } ],
$sort: { score: 1 }
}
}
}
)
Remove:
collection.update({"sorted_list.name": name},{ $pull: { "sorted_list.name": <name> } },{ multi: true });
EDIT
Let's assume ther following index exists:
{ "sorted_list.name" : 1}

Related

MongoDB add grand total to sortByCount() in an aggregation pipeline

I have grouped all the users by country, but I would also like to have a row showing the grand total (users are tagged to a single country in our use case).
Data Model / Sample Input
The collection is filled with objects representing a country (name) and each contains a list of user objects in an array under users.
{ _id: ObjectId("..."),
name: 'SG',
type: 'COUNTRY',
increment: 200,
users:
[ ObjectId("..."),
ObjectId("..."),
...
Query
db.collection.aggregate([{$match:{type:"COUNTRY"}},{$unwind:"$users"},{$sortByCount:"$name"}])
Current Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
Expected Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
{ _id: null, count: 96 } <<< TOTAL COUNT ADDED
Any tips to achieve this without resorting to complex or dirty tricks?
You can also try using $facet to calculate counts by country name and total count, and then combine them together. Something like this:
db.collection.aggregate([
{
$match: {
type: "COUNTRY"
}
},
{
"$unwind": "$users"
},
{
"$facet": {
"groupCountByCountry": [
{
"$sortByCount": "$name"
}
],
"totalCount": [
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
array: {
"$concatArrays": [
"$groupCountByCountry",
"$totalCount"
]
}
}
},
{
"$unwind": "$array"
},
{
"$replaceRoot": {
"newRoot": "$$ROOT.array"
}
}
])
Here's the playground link.
I recommend just doing this in memory as the alternative is "hacky" but in order to achieve this in Mongo you just need to group all documents, add a new documents and unwind again, like so:
db.collection.aggregate([
{
$group: {
_id: null,
roots: {
$push: "$$ROOT"
},
sum: {
$sum: "$count"
}
}
},
{
$addFields: {
roots: {
"$concatArrays": [
"$roots",
[
{
_id: null,
count: "$sum"
}
]
]
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: "$roots"
}
}
])
Mongo Playground

MongoDB get count of field per season from MM/DD/YYYY date field

I am facing a problem in MongoDB. Suppose, I have the following collection.
{ id: 1, issueDate: "07/05/2021", code: "31" },
{ id: 2, issueDate: "12/11/2020", code: "14" },
{ id: 3, issueDate: "02/11/2021", code: "98" },
{ id: 4, issueDate: "01/02/2021", code: "14" },
{ id: 5, issueDate: "06/23/2020", code: "14" },
{ id: 6, issueDate: "07/01/2020", code: "31" },
{ id: 7, issueDate: "07/05/2022", code: "14" },
{ id: 8, issueDate: "07/02/2022", code: "20" },
{ id: 9, issueDate: "07/02/2022", code: "14" }
The date field is in the format MM/DD/YYYY. My goal is to get the count of items with each season (spring (March-May), summer (June-August), autumn (September-November) and winter (December-February).
The result I'm expecting is:
count of fields for each season:
{ "_id" : "Summer", "count" : 6 }
{ "_id" : "Winter", "count" : 3 }
top 2 codes (first and second most recurring) per season:
{ "_id" : "Summer", "codes" : {14, 31} }
{ "_id" : "Winter", "codes" : {14, 98} }
How can this be done?
You should never store date/time values as string, store always proper Date objects.
You can use $setWindowFields opedrator for that:
db.collection.aggregate([
// Convert string into Date
{ $set: { issueDate: { $dateFromString: { dateString: "$issueDate", format: "%m/%d/%Y" } } } },
// Determine the season (0..3)
{
$set: {
season: { $mod: [{ $toInt: { $divide: [{ $add: [{ $subtract: [{ $month: "$issueDate" }, 1] }, 1] }, 3] } }, 4] }
}
},
// Count codes per season
{
$group: {
_id: { season: "$season", code: "$code" },
count: { $count: {} },
}
},
// Rank occurrence of codes per season
{
$setWindowFields: {
partitionBy: "$_id.season",
sortBy: { count: -1 },
output: {
rank: { $denseRank: {} },
count: { $sum: "$count" }
}
}
},
// Get only top 2 ranks
{ $match: { rank: { $lte: 2 } } },
// Final grouping
{
$group: {
_id: "$_id.season",
count: { $first: "$count" },
codes: { $push: "$_id.code" }
}
},
// Some cosmetic for output
{
$set: {
season: {
$switch: {
branches: [
{ case: { $eq: ["$_id", 0] }, then: 'Winter' },
{ case: { $eq: ["$_id", 1] }, then: 'Spring' },
{ case: { $eq: ["$_id", 2] }, then: 'Summer' },
{ case: { $eq: ["$_id", 3] }, then: 'Autumn' },
]
}
}
}
}
])
Mongo Playground
I will give you clues,
You need to use $group with _id as $month on issueDate, use accumulator $sum to get month wise count.
You can divide month by 3, to get modulo, using $toInt, $divide, then put them into category using $cond.
Another option:
db.collection.aggregate([
{
$addFields: {
"season": {
$switch: {
branches: [
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"06",
"07",
"08"
]
]
},
then: "Summer"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"03",
"04",
"05"
]
]
},
then: "Spring"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"12",
"01",
"02"
]
]
},
then: "Winter"
}
],
default: "No date found."
}
}
}
},
{
$group: {
_id: {
s: "$season",
c: "$code"
},
cnt1: {
$sum: 1
}
}
},
{
$sort: {
cnt1: -1
}
},
{
$group: {
_id: "$_id.s",
codes: {
$push: "$_id.c"
},
cnt: {
$sum: "$cnt1"
}
}
},
{
$project: {
_id: 0,
season: "$_id",
count: "$cnt",
codes: {
"$slice": [
"$codes",
2
]
}
}
}
])
Explained:
Add one more field for season based on $switch per month(extracted from issueDate string)
Group to collect per season/code.
$sort per code DESCENDING
group per season to form an array with most recurring codes in descending order.
Project the fields to the desired output and $slice the codes to limit only to the fist two most recurring.
Comment:
Indeed keeping dates in string is not a good idea in general ...
Playground

Mongo query on last sorted array item

Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.

MongoDB use two unwind on aggregate for getting the value of repetition count

I have a dataset like this:
{
"_id" : ObjectId("5bacc9295af10e2764648baa"),
"slug" : ["Maruti", "Honda"],
"page" : "Ford"
},
{
"_id" : ObjectId("5bacc9295af10e2764648bab"),
"slug" : ["Maruti", "Honda", "Tata"],
"page" : "Hyundai"
},
{
"_id" : ObjectId("5bacc9295af10e2764648bac"),
"slug" : ["Maruti"],
"page" : "Ford"
},
{
"_id" : ObjectId("5bacc9295af10e2764648bad"),
"slug" : ["Ford", "Hyundai"],
"page" : "Tata"
}
Now if I want to get the repetition count of Page then I will Do the Aggregate Query Like this:
MyCollectionName.aggregate([
{ $unwind: { path: "$page" } },
{ $group: { _id: "$page", count: { $sum: 1 } } },
{
$project: {
_id: 0,
vehiclename: "$_id",
count: { $multiply: ["$count", 1] }
}
},
{ $sort: { count: -1 } }
])
.then(data => {
console.log(data)
//get the result like this which is fine
[
{ vehiclename : 'Ford', count: 2},
{ vehiclename : 'Hyundai', count: 1},
{ vehiclename : 'Tata', count: 1}
]
})
.catch(e => {
console.log(e)
})
Similarly if I do for Slug then my Query will be like this:
MyCollectionName.aggregate([
{ $unwind: { path: "$slug" } },
{ $group: { _id: "$slug", count: { $sum: 1 } } },
{
$project: {
_id: 0,
vehiclename: "$_id",
count: { $multiply: ["$count", 1] }
}
},
{ $sort: { count: -1 } }
])
.then(data => {
console.log(data)
//get the result like this which is fine
[
{ vehiclename : 'Maruti', count: 3},
{ vehiclename : 'Honda', count: 2},
{ vehiclename : 'Tata', count: 1},
{ vehiclename : 'Ford', count: 1},
{ vehiclename : 'Hyundai', count: 1}
]
})
.catch(e => {
console.log(e)
})
Now I want to do this on Single query Instead of Seperate query.
I am bit confused of using unwind and after getting the both combination value on a single query.
Desired output will be like this:
[
{ vehiclename : 'Maruti', count: 3},
{ vehiclename : 'Ford', count: 3},
{ vehiclename : 'Honda', count: 2},
{ vehiclename : 'Tata', count: 2},
{ vehiclename : 'Hyundai', count: 1}
]
Any help is really Appreciated.
I got the solution. Please notify if I am doing something wrong..
MyCollectionName.aggregate([
{
$facet: {
groupByPage: [
{ $unwind: "$page" },
{
$group: {
_id: "$page",
count: { $sum: 1 }
}
}
],
groupBySlug: [
{ $unwind: "$slug" },
{
$group: {
_id: "$slug",
count: { $sum: 1 }
}
}
]
}
},
{
$project: {
pages: {
$concatArrays: ["$groupByPage", "$groupBySlug"]
}
}
},
{ $unwind: "$pages" },
{
$group: {
_id: "$pages._id",
count: { $sum: "$pages.count" }
}
},
{ $sort: { count: -1 } }
])
.then(data => {
console.log(data)
})
.catch(e => {
console.log(e)
})

Grouping and counting across documents?

I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]