mongodb math operations on two fields in collection - mongodb

Hello i have exercise to filter all countries where gdp is greater than 0.05 on one person in country. I need to take the latest year of population. Also code of the country should have at least 3 characters. My collection looks like this:
mondial.countries
{
"_id" : ObjectId("581cb5a519ec2deb4ba71c03"),
"name" : "Germany",
"code" : "GER",
"capital" : "RN-Niamey-Niamey",
"area" : 1267000,
"gdp" : 7304,
"inflation" : 1.9,
"unemployment" : null,
"independence" : ISODate("1960-08-03T00:00:00Z"),
"government" : "republic",
"population" : [
{
"year" : 1950,
"value" : 2559703
},
{
"year" : 1960,
"value" : 3337141
},
{
"year" : 1970,
"value" : 4412638
},
{
"year" : 1977,
"value" : 5102990
},
{
"year" : 1988,
"value" : 7251626
},
{
"year" : 1997,
"value" : 9113001
},
{
"year" : 2001,
"value" : 11060291
},
{
"year" : 2012,
"value" : 17138707
}
]
}
For this example I have to take the population from year 2012 a divide it by gdp a then display it if its greater than 50000. I have been trying with function in js but idk how to show fields that are greater thatn 5000 of my operation. What is the easies way to do this?
var countries = db.mondial.countries.find({
"code": {$gte: 3},
});
while(countries.hasNext()) {
gdp = countries.next()
gdpresult = countries.population / gdp.gdp
print(gdpresult)
}

I don't know if I understood correctly. more see if it helps
db.mondial.aggregate([
{
$match:{
$expr: {
$gte:['$code',3 ]
}
}
},
{
$project: {
gdpresult: {
$map: {
input: '$population',
as: 'p',
in: {
value: {
$divide: ["$$p.value", '$gdp']
},
year: '$$p.year'
}
}
}
}
}])

Related

Counting distinct number of users from beginning

I have a MongoDB aggregation pipeline that has been frustrating me for a while now, because it never seems to be accurate or correct to my needs. The aim is to count the number of new unique users each day per chatbot, starting from the very beginning.
Here's what my pipeline looks like right now.
[
{
"$project" : {
"_id" : 0,
"bot_id" : 1,
"customer_id" : 1,
"timestamp" : {
"$ifNull" : [
'$incoming_log.created_at', '$outcome_log.created_at'
]
}
}
},
{
"$project" : {
"customer_id" : 1,
"bot_id" : 1,
"timestamp" : {
"$dateFromString" : {
"dateString" : {
"$substr" : [
"$timestamp", 0, 10
]
}
}
}
}
},
{
"$group" : {
"_id" : "$customer_id",
"timestamp" : {
"$first" : "$timestamp"
},
"bot_id" : {
"$addToSet" : "$bot_id"
}
}
},
{
"$unwind" : "$bot_id"
},
{
"$group" : {
"_id" : {
"bot_id" : "$bot_id",
"customer_id" : "$_id"
},
"timestamp" : {
"$first" : "$timestamp"
}
}
},
{
"$project" : {
"_id" : 0,
"timestamp" : 1,
"customer_id" : "$_id.customer_id",
"bot_id" : "$_id.bot_id"
}
},
{
"$group" : {
"_id": {
"timestamp" : "$timestamp",
"bot_id" : "$bot_id"
},
"new_users" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"timestamp" : "$_id.timestamp",
"bot_id" : "$_id.bot_id",
"new_users" : 1
}
}
]
Some sample data for an idea of what the data looks like...
{
"mid" : "...",
"bot_id" : "...",
"bot_name" : "JOBBY",
"customer_id" : "U122...",
"incoming_log" : {
"created_at" : ISODate("2020-12-08T09:14:16.237Z"),
"event_payload" : "",
"event_type" : "text"
},
"outcome_log" : {
"created_at" : ISODate("2020-12-08T09:14:18.145Z"),
"distance" : 0.25,
"incoming_msg" : "🥺"
}
}
My expected outcome is something along the lines of:
{
"new_users" : 1187.0,
"timestamp" : ISODate("2021-01-27T00:00:00.000Z"),
"bot_id" : "5ffd......."
},
{
"new_users" : 1359.0,
"timestamp" : ISODate("2021-01-27T00:00:00.000Z"),
"bot_id" : "6def......."
}
Have I overcomplicated my pipeline somewhere? I seem to get a reasonable number of new users per bot each day, but for some reason my colleague tells me that the number is too high. I need some tips, please!
I have really no idea what you are looking for.
"The aim is to count the number of new unique users each day per chatbot, starting from the very beginning."
What is "new unique users"? What do you mean by "starting from the very beginning"? You ask for count per day but you use {"$group": {"_id": "$customer_id", "timestamp": { "$first": "$timestamp" } } }
For me your grouping does not make any sense. With only one single sample document, it is almost impossible to guess what you like to count.
Regarding group per day: I prefer to work always with Date values, rather than strings. It is less error prone. Maybe you have to consider time zones, because UTC midnight is not your local midnight. When you work with Dates then you have better control over it.
The $project stages are useless when you do $group afterwards. Typically you have only one $project stage at the end.
So, put something to start.
db.collection.aggregate([
{
$set: {
day: {
$dateToParts: {
date: { $ifNull: ["$incoming_log.created_at", "$outcome_log.created_at"] }
}
}
}
},
{
$group: {
_id: "$customer_id",
timestamp: {$min: { $dateFromParts: { year: "$day.year", month: "$day.month", day: "$day.day" } }}
}
}
]);

MongoDB: get sum by year/month with nested values

I'm trying to sum (spending by month/year) of a collection with nested amounts - with no luck.
This is the collection (extract):
[
{
"_id" : ObjectId("5faaf88d0657287993e541a5"),
"segment" : {
"l1" : "Segment A",
"l2" : "001"
},
"invoiceNo" : "2020.10283940",
"invoicePos" : 3,
"date" : ISODate("2019-09-06T00:00:00.000Z"),
"amount" : {
"document" : {
"amount" : NumberDecimal("125.000000000000"),
"currCode" : "USD"
},
"local" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
}
}
},
...
]
I would like to sum up the aggregated invoice volume per month in "global" currency.
I tried this query on MongoDB:
db.invoices.aggregate(
{$project : {
month : {$month : "$date"},
year : {$year : "$date"},
amount : 1
}},
{$unwind: '$amount'},
{$group : {
_id : {month : "$month" ,year : "$year" },
total : {$sum : "$amount.global.amount"}
}})
I am getting as result this:
/* 1 */
{
"_id" : ObjectId("5faaf88d0657287993e541a5"),
"amount" : {
"document" : {
"amount" : NumberDecimal("125.000000000000"),
"currCode" : "USD"
},
"local" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("123.800000000000"),
"currCode" : "CHF"
}
},
"month" : 9,
"year" : 2019
}
/* 2 */
{
"_id" : ObjectId("5faaf88d0657287993e541ac"),
"amount" : {
"document" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
},
"local" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
},
"global" : {
"amount" : NumberDecimal("105.560000000000"),
"currCode" : "CHF"
}
},
"month" : 11,
"year" : 2020
}
This however does not sum up all invoices per month, but looks like single invoice lines - no aggregation.
I would like to get a result like this:
[
{
"month": 11,
"year": 2020,
"amount" : NumberDecimal("99999.99")
},
{
"month": 10,
"year": 2020,
"amount" : NumberDecimal("99999.99")
},
{
"month": 9,
"year": 2020,
"amount" : NumberDecimal("99999.99")
}
]
What is wrong with my query?
Would this be helpful?
db.invoices.aggregate([
{
$group: {
_id: {
month: {
$month: "$date"
},
year: {
$year: "$date"
}
},
total: {
$sum: "$amount.global.amount"
}
}
},
{$sort:{"_id.year":-1, "_id.month":-1}}
])
Playground
If you need any extra explanation let me know, but the code is pretty short and self-explanatory.
In principle your aggregation pipeline is fine, there a few mistakes:
An aggregation pipeline expects an array
$unwind is useless, because $amount is not an array. One element in -> one document out
You can use date function directly
So, short and simple:
db.invoices.aggregate([
{
$group: {
_id: { month: { $month: "$date" }, year: { $year: "$date" } },
total: { $sum: "$amount.global.amount" }
}
}
])

Group based on discrete date ranges

I am new to MongoDB and I've been struggling to get a specific query to work without any luck.
I have a collection with millions of documents having a date and an amount, I want to get the aggregations for specific periods of time.
For example, I want to get the count, amount summations for the periods between 1/1/2015 - 15/1/2015 and between 1/2/2015 - 15/2/2015
A sample collection is
{ "_id" : "148404972864202083547392254", "account" : "3600", "amount" : 50, "date" : ISODate("2017-01-01T12:02:08.642Z")}
{ "_id" : "148404972864202085437392254", "account" : "3600", "amount" : 50, "date" : ISODate("2017-01-03T12:02:08.642Z")}
{ "_id" : "148404372864202083547392254", "account" : "3600", "amount" : 70, "date" : ISODate("2017-01-09T12:02:08.642Z")}
{ "_id" : "148404972864202083547342254", "account" : "3600", "amount" : 150, "date" : ISODate("2017-01-22T12:02:08.642Z")}
{ "_id" : "148404922864202083547392254", "account" : "3600", "amount" : 200, "date" : ISODate("2017-02-02T12:02:08.642Z")}
{ "_id" : "148404972155502083547392254", "account" : "3600", "amount" : 30, "date" : ISODate("2017-02-7T12:02:08.642Z")}
{ "_id" : "148404972864202122254732254", "account" : "3600", "amount" : 10, "date" : ISODate("2017-02-10T12:02:08.642Z")}
for date ranges between 1/1/2017 - 10/10/2017 and 1/2/2017 - 10/2/2017 the output would be like this:
1/1/2017 - 10/1/2017 - count =3, amount summation: 170
10/2/2017 - 15/2/2017 - count =2, amount summation: 40
Is it possible to work with such different date ranges? The code would be in Java, but as an example in mongo, can someone please help me?
There must be a more elegant solution than this. Anyways you can wrap it into a function and generalize date related arguments.
First, you need to make a projection at the same time deciding into which range an item goes (note the huge $switch expression). By default, an item goes into 'null' range.
Then, you filter out results that didn't match your criteria (i.e. range != null).
The very last step is to group items by the range and make all needed calculations.
db.items.aggregate([
{ $project : {
amount : true,
account : true,
date : true,
range : {
$switch : {
branches : [
{
case : {
$and : [
{ $gte : [ "$date", ISODate("2017-01-01T00:00:00.000Z") ] },
{ $lt : [ "$date", ISODate("2017-01-10T00:00:00.000Z") ] }
]
},
then : { $concat : [
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-01-01T00:00:00.000Z") } },
{ $literal : " - " },
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-01-10T00:00:00.000Z") } }
] }
},
{
case : {
$and : [
{ $gte : [ "$date", ISODate("2017-02-01T00:00:00.000Z") ] },
{ $lt : [ "$date", ISODate("2017-02-10T00:00:00.000Z") ] }
]
},
then : { $concat : [
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-02-01T00:00:00.000Z") } },
{ $literal : " - " },
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-02-10T00:00:00.000Z") } }
] }
}
],
default : null
}
}
} },
{ $match : { range : { $ne : null } } },
{ $group : {
_id : "$range",
count : { $sum : 1 },
"amount summation" : { $sum : "$amount" }
} }
])
Based on your data it will give the following results*:
{ "_id" : "01/02/2017 - 10/02/2017", "count" : 2, "amount summation" : 230 }
{ "_id" : "01/01/2017 - 10/01/2017", "count" : 3, "amount summation" : 170 }
*I believe you have few typos in your questions, that's why the data look different.

Multiplication on nested fields on MongoDB

In a database in MongoDB I am trying to group some data by their date (one group for each day of the year), and then add an additional field that would be the result of the multiplication of two of the already existing fields.
The data structure is:
{
"_id" : ObjectId("567a7c6d9da4bc18967a3947"),
"units" : 3.0,
"price" : 50.0,
"name" : "Name goes here",
"datetime" : ISODate("2015-12-23T10:50:21.560+0000")
}
I first tried a two stage approach using $project and then $group like this
db.things.aggregate(
[
{
$project: {
"_id" : 1,
"name" : 1,
"units" : 1,
"price" : 1,
"datetime":1,
"unitsprice" : { $multiply: [ "$price", "$units" ] }
}
},
{
$group: {
"_id" : {
"day" : {
"$dayOfMonth" : "$datetime"
},
"month" : {
"$month" : "$datetime"
},
"year" : {
"$year" : "$datetime"
}
},
"things" : {
"$push" : "$$ROOT"
}
}
}
],
)
in this case, the first step (the $project) gives the expected output (with the expected value of unitsprice), but then when doing the second $group step, it outputs this error:
"errmsg":$multiply only supports numeric types, not String",
"code":16555
I tried also turning around things, doing the $group step first and then the $project
db.things.aggregate(
[
{
$group: {
"_id" : {
"day" : {
"$dayOfMonth" : "$datetime"
},
"month" : {
"$month" : "$datetime"
},
"year" : {
"$year" : "$datetime"
}
},
"things" : {
"$push" : "$$ROOT"
}
}
},
{
$project: {
"_id" : 1,
"things":{
"name" : 1,
"units" : 1,
"price" : 1,
"datetime":1,
"unitsprice" : { $multiply: [ "$price", "$units" ] }
}
}
}
],
);
But in this case, the result of the multiplication is: unitsprice:null
Is there any way of doing this multiplication? Also, it would be nice to do it in a way that the output would not have nested fields, so it would look like:
{"_id":
"units":
"price":
"name":
"datetime":
"unitsprice":
}
Thanks in advance
PS:I am running MongoDB 3.2
Finally found the error. When importing one of the fields, a few of the price fields were created as a string. Surprisingly, the error didn't came out when first doing the multiplication in the project step (the output was normal until it reached the first wrong field, then it stopped), but when doing the group step.
In order to find the text fields I used this query:
db.things.find( { price: { $type: 2 } } );
Thanks for the hints

Order by value in timeseries mongodb

I have a timeseries collection like this ( mongodb documentation sample)
_id: "20101010/site-1/apache_pb.gif",
metadata: {
date: ISODate("2000-10-10T00:00:00Z"),
site: "site-1",
page: "/apache_pb.gif" },
daily: 5468426,
hourly: {
"0": 227850,
"1": 210231,
"2" : 12344,
"23": 20457 },
minute: {
"0": 3612,
"1": 3241,
...
"1439": 2819 }
what is the best solution, using aggregation framework, to sort for value of hourly ? so for example I want to order from the lower to higher in hourly in order to have something like this :
{
"2": 12344,
"23" : 20457,
"1" : 21031,
"0" : 227850
}
Thanks
Hi this same problem occurred for me then that time I changed my documents structure as below
{
"_id" : "20101010/site-1/apache_pb.gif",
"metadata" : {
"date" : ISODate("2000-10-10T00:00:00Z"),
"site" : "site-1",
"page" : "/apache_pb.gif"
},
"daily" : 5468426,
"hourly" : [
{
"hour" : 0,
"value" : 227850
},
{
"hour" : 1,
"value" : 210231
},
{
"hour" : 2,
"value" : 12344
},
{
"hour" : 23,
"value" : 20457
}
],
"minute" : [
{
"min" : 0,
"value" : 3612
},
{
"min" : 1,
"value" : 3241
},
{
"min" : 1439,
"value" : 2819
}
]
}
And in your case you want to sort hourly data according to values from lower to highest first so I write following aggregation query which may be solve your problem
db.collectionName.aggregate(
{"$unwind":"$hourly"},
{"$project":{"hour":"$hourly.hour","value":"$hourly.value"}},
{"$sort":{"hour":-1}},
{"$group":{"_id":0,"hourlyData":
{"$push": {"hour":"$hour","value":"$value"}}}}).pretty()