mongodb sum aggregation return float instead of integer - mongodb

I have a document which history embedded field is as below:
first_pn_data = {'_cls': 'InstallationEmbeddedHistory',
'coverage': 0.8197712971965834,
'date': datetime.datetime(2020, 9, 7, 0, 0),
'estimated_installs': 39349022,
'popularity_rank': 0,
'rank': 1851}
second_pn_data = {'_cls': 'InstallationEmbeddedHistory',
'coverage': 0.8197712971965834,
'date': datetime.datetime(2020, 9, 7, 0, 0),
'estimated_installs': 23412618,
'popularity_rank': 0,}
The estimated_installs field is an integer.
when I run the following query:
query = [
{
"$match": {
"package_name": {
"$in": ["first_pn", "second_pn"]
}
}
},
{
"$unwind": "$history"
},
{
"$group": {
"_id": "$history.date",
"total": {
"$sum": "$history.estimated_installs"
}
}
}
]
the result for the above date is:
{'_id': datetime.datetime(2020, 9, 7, 0, 0), 'total': 62761640.54968266}
while I expect the total=62761640.
I don't know why the result is float and has a decimal.
Can someone help me with this?

Related

Trying to aggregate based on substring matches in mongodb 3.2

Let's say my collection has documents with ExpName field and Rname field. Expname are all of the type - exp_1, exp_2 etc. Rname is a character string with 4 dashes for example. "As-34rt-d3r5-4453f-er4"
I need to aggregate based on experiment name and removing the text between the last two dashes. In the example I gave above that would be "As-34rt-d3r5"
question 1) how do i incorporate this in one table?
question 2) i solved this in a dirty fashion for one exp, because it seemed like the number of characters was almost the same, so I could just take the first 13 characters which seemed like it was the the substring omitting the last two dashes. Is there a correct way to do this if the text was not so uniform?
db.getCollection('rest01').aggregate(
{$match : {ExpName : "exp_1"}},
{$group: {_id :"$ExpName",_id : {$substr : ["$RName", 0,13]}, total: { $sum:1 }}
})
Ideally I would like to have a result that says Expname, Rnamesubstring, count. This code snippet was for exp_1 one alone. Is it even possible to get it all in one result?
Here is how you could do that:
db.getCollection('rest01').aggregate({
$project: {
"ExpName": 1,
"splitRName": { $split: [ "$RName", "-" ] } // add an array with the constituents of your dash-delimited string id as a new field "splitRName"
}
}, {
$group: {
_id: { // our group id shall be made up of both...
"ExpName": "$ExpName", // ...the "ExpName" field...
"Rnamesubstring": { // and some parts of the "RName" field
$concat:
[
{ $arrayElemAt: [ "$splitRName", 0 ] },
"-",
{ $arrayElemAt: [ "$splitRName", 1 ] },
"-",
{ $arrayElemAt: [ "$splitRName", 2 ] }
]
}
},
total: { $sum: 1 }
}
})
In case you want to do it in MongoDB v3.2 (as stated in your comment), here is something that is not exactly pretty but works:
db.getCollection('rest01').aggregate({
$group: {
_id: { // our group id shall be made up of both...
"ExpName": "$ExpName", // ...the "ExpName" field...
"Rnamesubstring": {
$substr:
[
"$RName",
0,
{
$ifNull:
[
{
$arrayElemAt:
[{
$filter: {
input: {
$map: {
input: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 /* add numbers as required */ ],
as: "index",
in: {
$cond: {
if: { $eq: [ "-", { $substr: [ "$RName", "$$index", 1 ] } ] }, // if the string we look at is a dash...
then: "$$index", // ...then let's remember it
else: null // ...otherwise ignore it
}
}
}
},
as: "item",
cond: { $ne: [ null, "$$item" ] } // get rid of all null values
}
},
2 ] // we want the position of the third dash in the string (only)
},
1000 // in case of a malformed RName (wrong number of dashes or completely missing) we want the entire substring
]
}
]
}
},
total: { $sum: 1 }
}
})
Update 2: You seem to be having some data related issues as per your comments (so either missing RName values or improperly structured ones, i.e. without the required number of sections with dashes in between). I have updated the above statement for v3.2 to deal with these rows. You may want to find out, though, which rows actually cause this behaviour. They can be easily identified using the following statement:
db.getCollection('rest01').aggregate({
$project: {
_id: 1,
RName: 1,
"Rnamesubstring": {
$arrayElemAt:
[{
$filter: {
input: {
$map: {
input: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 /* add numbers as required */ ],
as: "index",
in: {
$cond: {
if: { $eq: [ "-", { $substr: [ "$RName", "$$index", 1 ] } ] }, // if the string we look at is a dash...
then: "$$index", // ...then let's remember it
else: null // ...otherwise ignore it
}
}
}
},
as: "item",
cond: { $ne: [ null, "$$item" ] } // get rid of all null values
}
},
2 ] // we want the position of the third dash in the string (only)
}
}
}, {
$match: { "Rnamesubstring": { $exists:false } }
})

Mongo 3.2 query timeseries value at specific time

I have some timeseries data stored in Mongo with one document per account, like so:
{
"account_number": 123,
"times": [
datetime(2017, 1, 2, 12, 34, 56),
datetime(2017, 3, 4, 17, 18, 19),
datetime(2017, 3, 11, 0, 1, 11),
]
"values": [
1,
10,
9001,
]
}
So, to be clear in the above representation account 123 has a value of 1 from 2017-01-02 12:34:56 until it changes to 10 on 2017-03-04 17:18:19, which then changes to 9001 at 2017-03-11, 00:01:11.
There are many accounts and each account's data is all different (could be at different times and could have more or fewer value changes than other accounts).
I'd like to query for each users value at a given time, e.g. "What was each users value at 2017-01-30 02:03:04? Would return 1 for the above account as it was set to 1 before the given time and did not change until after the given time.
It looks like $zip would be useful but thats only available in Mongo 3.4 and I'm using 3.2 and have no plans to upgrade soon.
Edit:
I can get a small part of the way there using:
> db.account_data.aggregate([{$unwind: '$times'}, {$unwind: '$values'}])
which returns something like:
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 1},
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 10},
#...
which isn't quite right as it is returning the cross product of times/values
This is possible using only 3.2 features. I tested with the Mingo library
var mingo = require('mingo')
var data = [{
"account_number": 123,
"times": [
new Date("2017-01-02T12:34:56"),
new Date("2017-03-04T17:18:19"),
new Date("2017-03-11T00:01:11")
],
"values": [1, 10, 9001]
}]
var maxDate = new Date("2017-01-30T02:03:04")
// 1. filter dates down to those less or equal to the maxDate
// 2. take the size of the filtered date array
// 3. subtract 1 from the size to get the index of the corresponding value
// 4. lookup the value by index in the "values" array into new "valueAtDate" field
// 5. project the extra fields
var result = mingo.aggregate(data, [{
$project: {
valueAtDate: {
$arrayElemAt: [
"$values",
{ $subtract: [ { $size: { $filter: { input: "$times", as: "time", cond: { $lte: [ "$$time", maxDate ] }} } }, 1 ] }
]
},
values: 1,
times: 1
}
}])
console.log(result)
// Outputs
[ { valueAtDate: 1,
values: [ 1, 10, 9001 ],
times:
[ 2017-01-02T12:34:56.000Z,
2017-03-04T17:18:19.000Z,
2017-03-11T00:01:11.000Z ] } ]
Not sure how to do the same with MongoDb 3.2, however from 3.4 you can do the following query:
db.test.aggregate([
{
$project:
{
index: { $indexOfArray: [ "$times", "2017,3,11,0,1,11" ] },
values: true
}
},
{
$project: {
resultValue: { $arrayElemAt: [ "$values", "$index" ] }
}
}])

Sum operation on documents with different structures

I'm trying to use the aggregation pipeline to be able to do a report table with (sort,paging, etc) using mongodb for my back end. I run into a problem that my schema is not the same but i need to add all the values. I was trying to use the unwind command but it only works in arrays so run out of alternatives.
Sample Documents
{
"_id": "first",
"apple": 6,
"pears": 7,
"total_fruits": "13"
},
{
"_id": "second",
"apple": 6,
"bananas": 2,
"total_fruits": "8"
}
Desired Result
{
"_id": "result",
"apple": 12,
"pears": 7,
"bananas": 2,
"total_fruits": "21"
}
Instead of aggregating, why not use mapReduce()?
function sumKeyFromCollection(collectionName, keyName) {
map = function() { emit(keyName, this[keyName]); }
red = function(k, v) {
var i, sum = 0;
for (i in v) {
sum += v[i];
}
return sum;
}
return db[collectionName].mapReduce(map, red, {out : {inline: 1}});
}
For your example documents:
{
"_id": "first",
"apple": 6,
"pears": 7,
"total_fruits": "13"
},
{
"_id": "second",
"apple": 6,
"bananas": 2,
"total_fruits": "8"
}
You could call the function on each field name:
console.log(sumKeyFromCollection("collectionName", "apple"));
> { "_id": "...", "apple": 12 }
Then use each of them to produce your resultant document:
var fruits = [ "apple", "bananas", "total_fruits" ];
var results = [];
for (var i = 0; i < fruits.length; i++) {
results.push(sumKeyFromCollection("collectionName", fruits[i]));
}
EDIT Also, to get every field name in a collection programmatically:
function getAllCollectionFieldNames(collectionName) {
mr = db.runCommand({
"mapreduce" : collectionName,
"map" : function() {
for (var key in this) { emit(key, null); }
},
"reduce" : function(key, stuff) { return null; },
"out": "my_collection" + "_keys"
});
db[mr.result].distinct("_id");
}
Which returns an array of unique field names (even if they only appear in one document).

mongo equivalent of sql query

i need to build a mongo query to get results from a collection which has the same structure as the following sql.
click for picture of table structure
my sql query:
SELECT * FROM (
SELECT
db.date,
db.points,
db.type,
db.name,
db.rank,
YEARWEEK( db.date ) AS year_week
FROM _MyDatabase db
WHERE
db.personId = 100 AND
db.date BETWEEN '2012-10-01' AND '2015-09-30'
ORDER BY
YEARWEEK( db.date ),
db.type,
db.points DESC
) x
GROUP BY
x.year_week DESC,
x.type;
the result looks like this
date points type name rank year_week
-------------------------------------------------
23.10.2014 2000 1 Fish 2 201442
12.10.2014 2500 1 Fish 2 201441
16.10.2014 800 2 Fish 2 201441
i have tried different group / aggregate queries so far, but i couldn't get a similar result. hopefully one of you has more mongo experience than i and can give me a hint on how to solve this.
You would want something like this:
var start = new Date(2012, 9, 1),
end = new Date(2015, 8, 30),
pipeline = [
{
"$match": {
"personId": 100,
"date": { "$gte": start, "$lte": end }
}
},
{
"$project": {
"date": 1, "points": 1, "type": 1, "name": 1, "rank": 1,
"year_week": { "$week": "$date" }
}
},
{
"$sort": {
"year_week": 1,
"type": 1,
"points": -1
}
},
{
"$group": {
"_id": {
"year_week": "$year_week",
"type": "$type"
}
}
}
];
db.getCollection("_MyDatabase").aggregate(pipeline);

MongoDB Map/Reduce only working in inline-mode

I wrote map/reduce/finalize function which are working fine during my tests with the out: {inline: true} option.
But if I try to save the result in a collection using out: {replace: 'test'} (or merge, ..) it won't show me the same result.
Does anyone has a clue, what I am doing wrong?
Thx
Inline:
db.runCommand({mapreduce: 'source', map: map_deliverstat, reduce: reduce_deliverstat, finalize: finalize_deliverstat, out: {inline:1}})
{
"_id": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": 469
},
"value": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": NumberLong(469),
"sum": 294,
"nomarker": 42,
"marker": 252,
"product1": 34,
"product2": 22,
"product3": 20,
"product4": 19,
"product5": 16
}
}
Replace:
db.runCommand({mapreduce: 'source', map: map_deliverstat, reduce: reduce_deliverstat, out: {replace: 'test'}, finalize: finalize_deliverstat})
{
"_id": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": 469
},
"value": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": NumberLong(469),
"sum": 2,
"nomarker": 0,
"marker": 2,
"product1": 0,
"product2": 0,
"product3": 0,
"product4": 0,
"product5": 0
}
}
another way of running map reduce, modify as per your need and see if this works for you
The following query counts the number of customers group by state
map = function() {
emit({state: this.CustomerState}, {count:1})
}
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
});
return {state: key,count: count};
}
db.createCollection('map_temp')
db.customer_info.mapReduce(map, reduce, {out:'map_temp'})
You will have the result saved in map_temp collection