How to extract number from a string in mongodb query? - mongodb

The only problem with commented query is that "$DAMAGE_PROPERTY" is written in 0K, 1K, 250K, 5.20M format, and the $todecimal/$toDouble aren't able to convert that to an integer. My purpose is to extract number out of string in the query itself and not iterate over the result array.
try {
const db = client.db(dbName);
let collection = db.collection("Storm");
let query = {
STATE: "INDIANA",
EVENT_TYPE: event,
DAMAGE_PROPERTY: { $nin: ["", null] },
YEAR: { $gte: startYear, $lte: endYear },
};
// --> aggregation failed.
// let res = await collection
// .aggregate([
// { $match: query },
// {
// $project: {
// document: "$$ROOT",
// damage: {
// $sum: {
// $toDouble: "$DAMAGE_PROPERTY", //Cause of Error: no conversion
// },
// },
// },
// },
// ])
// .toArray();
// console.log(res);
// ---> Solution I want to avoid
let res = await collection.find(query).toArray();
var totalDamage = 0;
for (let i = 0; i < res.length; i++) {
totalDamage += parseFloat(res[i].DAMAGE_PROPERTY);
}
console.log(
`Total Damage to property in Indiana from ${startYear} to ${endYear} due to ${event} is ${totalDamage}K`
);
} catch (err) {
console.log(err);
} finally {
client.close();
}
Test Data in json:
{
"BEGIN_YEARMONTH": 200809,
"BEGIN_DAY": 14,
"BEGIN_TIME": 830,
"END_YEARMONTH": 200809,
"END_DAY": 14,
"END_TIME": 1030,
"EPISODE_ID": 21247,
"YEAR": 2008,
"DEATHS_DIRECT": 0,
"DEATHS_INDIRECT": 0,
"DAMAGE_PROPERTY": "5.20M",
},

Providing another solution to avoid clutter/confuse, Answer #1:
db.collection.aggregate([
{
$group: {
_id: null,
DAMAGE_PROPERTY: {
$push: {
$let: {
vars: {
damage: {
$toDouble: {
$substrBytes: ["$DAMAGE_PROPERTY", 0, { $subtract: [{ $strLenCP: "$DAMAGE_PROPERTY" }, 1] }]
}
},
unit: {
$substrBytes: ["$DAMAGE_PROPERTY", { $subtract: [{ $strLenCP: "$DAMAGE_PROPERTY" }, 1] }, 1]
}
},
in: {
$switch: {
branches: [
{
case: { $eq: ["$$unit", "M"] },
then: {
$multiply: ["$$damage", 100]
}
},
{
case: { $eq: ["$$unit", "K"] },
then: {
$multiply: ["$$damage", 10]
}
},
// You can have more cases in future
],
default: "Error: No unit matched." // or 0
}
}
}
}
}
}
},
{
$project: {
_id: 0,
totalDamage: { $sum: "$DAMAGE_PROPERTY" }
}
}
]);
Output:
{
"totalDamage" : 615
}

UPDATE based on coments:
db.collection.aggregate([
{
$group: {
_id: null,
DAMAGE_PROPERTY: {
$push: {
$cond: {
if: { $lt: [{ $size: { $split: ["$DAMAGE_PROPERTY", "K"] } }, 2] },
then: {
$multiply: [
{
$toDouble: { $first: { $split: ["$DAMAGE_PROPERTY", "M"] } }
},
100
]
},
else: {
$multiply: [
{
$toDouble: { $first: { $split: ["$DAMAGE_PROPERTY", "K"] } }
},
10
]
}
}
}
}
}
},
{
$project: {
_id: 0,
totalDamage: { $sum: "$DAMAGE_PROPERTY" }
}
}
]);
Output for the updated query
{
"totalDamage" : 615
}
What is $cond doing?
STEP 1: Try to split DAMAGE_PROPERTY by "K".
// Example 1: split_result = ["5.2M"]
// Example 2: split_result = ["9.5", ""]
if the length of split_result array is less than 2:
Try to split DAMAGE_PROPERTY by "M"
// For example: split_result = ["5.2", ""]
typecast the first string to decimal
return the result
else:
Split the DAMAGE_PROPERTY by "K"
// For example: split_result = ["9.5", ""]
typecast the first string to decimal
return the result
Try this:
db.collection.aggregate([
{
$group: {
_id: null,
DAMAGE_PROPERTY: {
$push: {
$toDouble: {
$cond: {
if: { $lt: [{ $size: { $split: ["$DAMAGE_PROPERTY", "K"] } }, 2] },
then: { $first: { $split: ["$DAMAGE_PROPERTY", "M"] } },
else: { $first: { $split: ["$DAMAGE_PROPERTY", "K"] } }
}
}
}
}
}
},
{
$project: {
_id: 0,
totalDamage: { $sum: "$DAMAGE_PROPERTY" }
}
}
]);
Output:
{
"totalDamage" : 14.7
}
Test data:
/* 1 createdAt:3/12/2021, 3:22:08 PM*/
{
"_id" : ObjectId("604b39c84b5860176c2254e2"),
"DAMAGE_PROPERTY" : "5.20M"
},
/* 2 createdAt:3/12/2021, 3:22:08 PM*/
{
"_id" : ObjectId("604b39c84b5860176c2254e3"),
"DAMAGE_PROPERTY" : "9.5K"
}

Related

How to get this pipeline to return exactly one document?

I am running the following aggregation pipeline:
const agg = [
{
'$match': {
'aaa': 'bbb'
}
}, {
'$group': {
'_id': '',
'total': {
'$sum': '$num'
}
}
}
];
My problem is, when $match matches nothing, the pipeline returns 0 documents. How do I get the pipeline to always return 1 document?
In MongoDB version 6.0 you can do it like this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{
$group: {
_id: null,
total: { $sum: "$num" }
}
},
{
$densify: {
field: "total",
range: { step: 1, bounds: [0, 0] }
}
},
{ $set: { _id: { $cond: [{ $eq: [{ $type: "$_id" }, "missing"] }, MaxKey, "$_id"] } } },
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
In version < 6.0 you can try this one:
db.collection.aggregate([
{
$facet: {
data: [
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } }
],
default: [
{ $limit: 1 },
{ $group: { _id: null, total: { $sum: 0 } } },
{ $set: { _id: MaxKey } }
]
}
},
{ $replaceWith: { $mergeObjects: [{ $first: "$default" }, { $first: "$data" }] } },
])
Or this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } },
{
$unionWith: {
coll: "collection",
pipeline: [
{ $limit: 1 },
{ $set: { _id: MaxKey, total: 0 } },
{ $project: { _id: 1, total: 1 } }
]
}
},
{ $sort: { _id: 1 } },
{ $limit: 1 }
])

Lodash `countBy` equivalent in MongoDB?

Let's say I have the input docs below:
[
{
"_id": "6225ca4052e7c226e2dd836d",
"data": [
"07",
"07",
"12",
"19",
"07",
"32"
]
},
{
"_id": "6225ca4052e7c226e2dd888f",
"data": [
"99",
"97",
"52",
"99",
"58",
"92"
]
}
]
I want to count the occurrences of every element in data string array per document. In JS, I can use countBy. How can I achieve the same using MongoDB Aggregation Framework?
I have tried to $reduce but MongoDB seems to not support assigning dynamic field to object.
{
$reduce: {
input: '$data',
initialValue: {},
in: { // assign `$$this` with count to `$$value`, but failed! }
}
}
Below is the desired output.
[
{
"_id": "6225ca4052e7c226e2dd836d",
"freqs": {
"12": 1,
"19": 1,
"32": 1,
"07": 3
}
},
{
"_id": "6225ca4052e7c226e2dd888f",
"freqs": {
"52": 1,
"58": 1,
"92": 1,
"97": 1,
"99": 2
}
}
]
db.collection.aggregate([
{
$match: {}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$data",
c: { $sum: 1 },
id: { $first: "$_id" }
}
},
{
$group: {
_id: "$id",
data: { $push: { k: "$_id", v: "$c" } }
}
},
{
$set: {
data: { $arrayToObject: "$data" }
}
}
])
mongoplayground
db.collection.aggregate([
{
$set: {
data: {
$function: {
body: "function(d) {let obj = {}; d.forEach(e => {if(obj[e]==null) { obj[e]=1; }else{ obj[e]++; }}); return obj;}",
args: [
"$data"
],
lang: "js"
}
}
}
}
])
mongoplayground

Mongodb loop through every distinct values and select tags using aggregate (facet)

I have collection like this:
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "XXX"
}]
}
{
"labels": [{
"description": "Cat"
}, {
"description": "XXX"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Bird"
}, {
"description": "XXX"
}, {
"description": "XXX"
}]
}
I want to filter for example only "Red" and "Yellow" colors from ALL elements and output document like this:
// because "Dog" appears 2 times so total = 2
{
description: "Dog",
total: 2,
colors: [
{ "_id": "Red", total: 2 },
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Cat",
total: 1,
colors: [
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Bird",
total: 1,
colors: []
}
{
description: "Red",
total: 2,
colors: [
{ _id: "Yellow", total: 1 }
]
}
{
description: "XXX",
total: 4,
colors: [
{ _id: "Yellow", total: 1 }
]
}
I can do this by using collection.distinct('labels.description') and then iterating through every single element + make a separate collection.count({ 'labels.description': 'Dog' }) like this:
for (...)
db.collection.aggregate([
{
"$match": {
"labels.description": valueFromLoop // (e.g. Dog)
}
},
{ $unwind : "$labels" },
{
"$group": {
"_id": "$labels.description",
"count": { "$sum": 1 }
}
},
{
"$match": {
"$or": [
{ "_id": "Red" },
{ "_id": "Yellow" }
]
}
},
{
"$sort": {
"count": -1
}
}
])
I want to do this in a single aggregation or mapReduce so that I could easily output it to new collection using $out instead of using Bulk operations separately, however I don't know if it's possible.
Try this:
let filter = ["Red", "Yellow"];
db.testcollection.aggregate([
{
$addFields: { bkp: "$labels" }
},
{ $unwind: "$labels" },
{
$addFields: {
bkp: {
$filter: {
input: "$bkp",
as: "item",
cond: {
$and: [
{ $ne: ["$$item.description", "$labels.description"] },
{ $in: ["$$item.description", filter] }
]
}
}
}
}
},
{
$unwind: {
path: "$bkp",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
key1: "$labels.description",
key2: { $ifNull: ["$bkp.description", false] }
},
total: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.key1",
description: { $first: "$_id.key1" },
total: {
$sum: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: 1,
else: "$total"
}
}
},
colors: {
$push: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: {
_id: "$_id.key2",
total: "$total"
},
else: "$$REMOVE"
}
}
}
}
},
{ $project: { _id: 0 } }
]);
For some reason with code from both answers it does not count all tags properly.
I'm posting what works:
db.collection.aggregate([
{
$project: {
labels: 1,
result: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{ $eq: ["$$label.description", "Blue"] },
{ $eq: ["$$label.description", "Red"] },
{ $eq: ["$$label.description", "Black-and-white"] },
{ $eq: ["$$label.description", "Purple"] },
{ $eq: ["$$label.description", "Orange"] },
{ $eq: ["$$label.description", "Yellow"] },
{ $eq: ["$$label.description", "Green"] },
{ $eq: ["$$label.description", "Teal"] }
]
}
}
}
}
},
{
$unwind: "$labels"
},
{
"$group": {
_id: "$labels.description",
x: {
$push: "$result.description"
},
total: { "$sum": 1 }
}
},
{
$project: {
x: {
$reduce: {
input: '$x',
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
}
},
total: 1
}
},
{
$project: {
x: 1,
y: { $setUnion: "$x" },
total: 1
}
},
{
$project: {
_id: 0,
description: "$_id",
"colors": {
$map: {
input: "$y",
as: "item",
in: {
_id: "$$item",
count: {
$size: {
$filter: {
input: "$x",
as: "itemx",
cond: {
$eq: ["$$item", "$$itemx"]
}
}
}
}
}
}
},
total: 1
}
},
{
$out: "backgrounds_meta"
}
])
db.test2.aggregate([
{
$project: {
labels:1,
colours: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{$eq:["Yellow","$$label.description"]},
{$eq:["Red", "$$label.description"]}
]
}
}
}
}
},
{$unwind:"$labels"},
{$group:{
_id: "$labels.description",
total: {$sum:1},
colours: {$addToSet:"$colours.description"}
}},
{
$project:{
_id:0,
description:"$_id",
total:1,
colours: {
$reduce:{
input: "$colours",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
}
}
},
{
$unwind: {
path:"$colours",preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
description:"$description",
total:"$total",
colour:"$colours"
},
count: {
$sum: {$cond:[{$ifNull:["$colours",false]},1,0]}
}
}
},
{
$group:{
_id:{
description:"$_id.description",
total:"$_id.total"
},
colours: {
$push: {
$cond: [{$gt:["$count",0]},
{
"_id":"$_id.colour",
total:"$count"
},
"$$REMOVE"
]
}
}
}
},
{
$project: {
_id:0,
description: "$_id.description",
total: "$_id.total",
colours: 1
}
}
]);
**Edit In your answer, you are missing the Yellows for Red and Dog because you are taking the first item from $result with $arrayElemAt: ["$result.description", 0].
If description is a colour, do you also want to include the counts for itself in colours?
Never mind, you've updated the answer

Mongoose subquery

I have a collection that looks like below:
[
{
"orderNum": "100",
"createdTime": ISODate("2020-12-01T21:00:00.000Z"),
"amount": 100,
"memo": "100memo",
"list": [
1
]
},
{
"orderNum": "200",
"createdTime": ISODate("2020-12-01T21:01:00.000Z"),
"amount": 200,
"memo": "200memo",
"list": [
1,
2
]
},
{
"orderNum": "300",
"createdTime": ISODate("2020-12-01T21:02:00.000Z"),
"amount": 300,
"memo": "300memo"
},
{
"orderNum": "400",
"createdTime": ISODate("2020-12-01T21:03:00.000Z"),
"amount": 400,
"memo": "400memo"
},
]
and I'm trying to get the total amount of orders that were created before order# 300 (so order#100 and #200, total amount is 300).
Does anyone know how to get it via Mongoose?
You can use this one:
db.collection.aggregate([
{ $sort: { orderNum: 1 } }, // by default the order of documents in a collection is undetermined
{ $group: { _id: null, data: { $push: "$$ROOT" } } }, // put all documents into one document
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } }, // cut desired elementes from array
{ $unwind: "$data" }, // transform back to documents
{ $replaceRoot: { newRoot: "$data" } },
{ $group: { _id: null, total_amount: { $sum: "$amount" } } } // make summary
])
Actually it is not needed to $unwind and $group, so the shortcut would be this:
db.collection.aggregate([
{ $sort: { orderNum: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $project: { total_amount: { $sum: "$data.amount" } } }
])
But the answer from #turivishal is even better.
Update for additional field
{
$set: {
data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] },
memo: { $arrayElemAt: [ "$data.memo", { $indexOfArray: ["$data.orderNum", "300"] } ] }
}
}
or
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $set: { memo: { $last: { "$data.memo" } } },
$match orderNum less than 300
$group by null and get totalAmount using $sum of amount
YourSchemaModel.aggregate([
{ $match: { orderNum: { $lt: "300" } } },
{
$group: {
_id: null,
totalAmount: { $sum: "$amount" }
}
}
])
Playground

Problem with grouping with conditional sum in MongoDB

I'm trying to partition the some documents according to the previous 6 months and then compute the conditional sum of certain fields that satisfy certain conditions. The problem I'm having is that I can't see a way to do this in a less verbose manner. I'm basically looking for a way to iterate over the previous 6 months then get the conditional sum of the field.
Below is what I've done :
[
{
'$facet': {
'JanuaryTotal': [
{
'$match': {
'bookingDate': {
'$gte': ISODate('2020-01-01T00:00:00.000Z'),
'$lt': ISODate('2020-02-01T00:00:00.000Z')
}
}
}, {
'$project': {
'_id': 0,
'approved': {
'$cond': [
{
'$eq': [
'$approvalStatus', 'approved'
]
}, '$cost', 0
]
},
'pending': {
'$cond': [
{
'$eq': [
'$approvalStatus', 'pending'
]
}, '$cost', 0
]
},
'denied': {
'$cond': [
{
'$eq': [
'$approvalStatus', 'denied'
]
}, '$cost', 0
]
}
}
}, {
'$group': {
'_id': null,
'sumApproved': {
'$sum': '$approved'
},
'summPending': {
'$sum': '$pending'
},
'sumDenied': {
'$sum': '$denied'
}
}
}
]
}
}
]
Sample Document :
{
"_id":"5e45b621da68610f13aa0cba",
"type":"international",
"booking":"2222222",
"bookingId":"22222222",
"chatId":"Q22222222",
"approvalStatus":"approved",
"pax": "2",
"cost":"222",
"charged":"222",
"chargedGCT": "222.22",
"commRate": "2",
"commUSD":"22.22",
"commJMD":"2222.02",
"bookingDate": ISODate('2020-02-01T18:00:55.000+00:00),
"tourDate": ISODate('2020-02-03T18:00:55.000+00:00'),
"clientName":"test",
"agent":{
"agentName":"test",
"agentId":{
"$numberInt":"1"
}
}
}
Is there a way to do iterate through the previous 6 months in a more succinct manner?
You can try below query :
db.collection.aggregate([
/** Match docs fall under previous 6 months */
{
$match: {
'bookingDate': {
'$gte': ISODate('2019-09-01T00:00:00.000Z'),
'$lt': ISODate('2020-02-01T00:00:00.000Z')
}
}
},
/** This project is to reduce document size by opting lesser fields - Optional if dataset size is less */
{ $project: { _id: 0, cost: { $toInt: '$cost' }, approvalStatus: 1, bookingDate: 1 } },
/** Grouping on month & approvalStatus + cost */
{ $group: { _id: { month: { $month: "$bookingDate" }, approvalStatus: '$approvalStatus' }, cost: { $sum: '$cost' } } },
/** Grouping on month pushing { approvalStatus + cost } objects to data field */
{ $group: { _id: '$_id.month', data: { $push: { approvalStatus: '$_id.approvalStatus', cost: '$cost' } } } },
/** converting month numbers to string Ex.:- 1 as 'Jan' - Optional if no need to be converted */
{
$project: {
_id: 0, data: 1, month: {
$let: {
vars: {
monthsInString: ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']
},
in: {
$arrayElemAt: ['$$monthsInString', '$_id']
}
}
}
}
}
])
Test : MongoDB-Playground
db.collection.aggregate([
{
$match: {
bookingDate: {
$gte: ISODate("2019-09-01T00:00:00.000Z"),
$lte: ISODate("2020-02-01T00:00:00.000Z")
}
}
},
{
$group: {
_id: {
$month: "$bookingDate"
},
sumApproved: {
$sum: { $cond: [ { $eq: ['$approvalStatus','approved']} , { $toDouble: "$cost" }, 0] }
},
sumPending: {
$sum: { $cond: [ { $eq: ['$approvalStatus','pending']} , { $toDouble: "$cost" }, 0] }
},
sumDenied: {
$sum: { $cond: [ { $eq: ['$approvalStatus','denied']} , { $toDouble: "$cost" }, 0 ] }
}
}
},
{
$project: {
_id: 0,
monthNumber: "$_id",
sumApproved: "$sumApproved",
sumPending: "$sumPending",
sumDenied: "$sumDenied"
}
}
]);
https://mongoplayground.net/p/iTyRuiQc8EI