MongoDB group data melt - mongodb

Say I have a small dataset:
[
{"A": 0, "B": 0, "X": 100, "Y": 100},
{"A": 1, "B": 0, "X": 50, "Y": 55},
{"A": 0, "B": 1, "X": 25, "Y": 30},
{"A": 1, "B": 1, "X": 1, "Y": 6}
]
I also have a pipeline where the final stage is a group:
[
{
"$group": {
"_id": {
"classification1": {
"$eq": ["$A", 1]
},
"classification2": {
"$eq": ["$B", 1]
}
},
"countX": {"$sum": "$X"},
"countY": {"$sum": "$Y"}
}
}
]
The output of this pipeline:
[
{"_id": {"classification1": false, "classification2": false}, "countX": 100, "countY": 100},
{"_id": {"classification1": true, "classification2": false}, "countX": 50, "countY": 55},
{"_id": {"classification1": false, "classification2": true}, "countX": 25, "countY": 30},
{"_id": {"classification1": true, "classification2": true}, "countX": 1, "countY": 6}
]
What pipeline steps would I need to reach a melted format like this?
[
{"name": "classification1", "countX": 51, "countY": 61},
{"name": "classification2", "countX": 26, "countY": 36}
]
Note that this transformation counts document 1 from the previous stage zero times, and counts document 4 twice (since both conditions are false, or both are true).
I have written a Javascript function for this, but Javascript functions cannot be invoked from the pipeline (aggregation pipelines must be serializable). Unfortunately, that means I have to unload the data from the DB, run the script on the data, and then load the transformed data back in as a temporary collection to finish the rest of the pipeline after this stage.
Any assistance is greatly appreciated.

I did some reading on facets. Somewhat verbose, but this query provides melted data in the proper format:
[
{
"$group": {
"_id": {
"classification1": {
"$eq": ["$A", 1]
},
"classification2": {
"$eq": ["$B", 1]
}
},
"countX": {"$sum": "$X"},
"countY": {"$sum": "$Y"}
}
},
{
"$facet": {
"classification1": [
{"$match": {"_id.classification1": true}},
{"$group": {"_id": null, "X": {"$sum": "$countX"}, "Y": {"$sum": "$countY"}}},
{"$addFields": {"name": "classification1"}}
],
"classification2": [
{"$match": {"_id.classification2": true}},
{"$group": {"_id": null, "X": {"$sum": "$countX"}, "Y": {"$sum": "$countY"}}},
{"$addFields": {"name": "classification2"}}
]
}
},
{
"$project": {"combine": {"$setUnion": ["$classification1", "$classification2"]}}
},
{
"$unwind": "$combine"
},
{
"$replaceRoot": {"newRoot": "$combine"}
},
{
"$project": {"_id": 0}
}
]

Related

MongoDB field must be an array

Currently I have a collection with the following documents:
[
{
"_id": ObjectId("628e6bd640643f97d6517c75"),
"company": "bau",
"current_version": 0,
"form_name": "don't know",
"history": [],
"id": "23421123-24a9-4a45-a12f-27a330152ax3",
"is_active": True,
"user_id": "999",
},
{
"_id": ObjectId("628eaffe4b8ae2ccdeb9305c"),
"company": "vrau",
"current_version": 0,
"form_name": "exemplo",
"history": [
{
"content": [
{
"field_id": 0,
"label": "insira um texto",
"placeholder": "qualquer texto",
"type": "text",
}
],
"layout": [
{"field_id": 0, "h": 10, "type": "text", "w": 100, "x": 0, "y": 0}
],
"responses": [
{
"client_id": 100,
"response_date": "2020-01-02",
"values": [{"field_id": 0, "value": "um texto"}],
},
{
"client_id": 2,
"response_date": "2020-01-01",
"values": [{"field_id": 0, "value": "roi"}],
},
],
"version": 0,
}
],
"id": "33b66684-24a9-4a45-a12f-27a330152ac8",
"is_active": True,
"user_id": "1",
},
]
I want to change the response fromthe client_id = '2' by I'm receiving the following error:
pymongo.errors.WriteError: The field 'history.0.responses.1' must be an array but is of type object in document {_id: ObjectId('628eaffe4b8ae2ccdeb9305c')}, full error: {'index': 0, 'code': 2, 'errmsg': "The field 'history.0.responses.1' must be an array but is of type object in document {_id: ObjectId('628eaffe4b8ae2ccdeb9305c')}"}
I don't know what I'm doing wrong and this error doesnt make sense to me cuz reponses is an array.
my current query:
collection.update_many(
{"id": "33b66684-24a9-4a45-a12f-27a330152ac8", "history.version": 0},
{
"$push": {
"history.$[h].responses.$[r]": {
"client_id": 2,
"response_date": "2020-01-01",
"values": [{"field_id": 0, "value": "roi"}],
}
}
},
array_filters=[{"h.version": 0}, {"r.client_id": "2"}],
)
Is there another to do it?
It is because you are also performing filter on r, which already resolves to object level in responses array.
You can simply abandon the r arrayFilter if you simply want to push to responses array.
collection.update_many(
{"id": "33b66684-24a9-4a45-a12f-27a330152ac8", "history.version": 0},
{
"$push": {
"history.$[h].responses": {
"client_id": 2,
"response_date": "2020-01-01",
"values": [{"field_id": 0, "value": "roi"}],
}
}
},
array_filters=[{"h.version": 0}],
)
Here is the Mongo playground for your reference. (in native js syntax)
You should use $set instead of $push if you want to update the entry instead of adding an entry. In your given example, the client_id is int while your arrayFilter is string. It could cause problem if it is not intended.
collection.update_many(
{"id": "33b66684-24a9-4a45-a12f-27a330152ac8", "history.version": 0},
{
"$set": {
"history.$[h].responses.$[r]": {
"client_id": 2,
"response_date": "2020-01-01",
"values": [{"field_id": 0, "value": "roi"}],
}
}
},
array_filters=[{"h.version": 0}, {"r.client_id": 2}],
)
Here is the Mongo playground for your reference. (in native js syntax)

How do I summarize tags by category in mongodb

I have a collection that is shaped like this:
[
{
_id: ObjectId("5d8e8c9b8f8b9b7b7a8b4567"),
tags: {
language: [ 'en' ],
industries: [ 'agency', 'travel' ],
countries: [ 'ca', 'us' ],
regions: [ 'north-america' ],
}
},
{
_id: ObjectId("5d8e8c9b8f8b9b7b7a8b4568"),
tags: {
language: [ 'en', 'fr' ],
industries: [ 'travel' ],
countries: [ 'ca' ]
}
},
{
_id: ObjectId("5d8e8c9b8f8b9b7b7a8b4569"),
tags: {
language: [ 'en' ],
industries: [ 'agency', 'travel' ],
countries: [ 'ca', 'us' ],
regions: [ 'south-america' ]
}
},
]
and I would like to generate this as a result...
{
//* count of all documents
"count": 3,
//* count of all documents that contain any slug within the given category
"countWithCategorySlug": {
"language": 3,
"industries": 3,
"countries": 3,
"regions": 2
},
//* per category: count of documents that contain that slug in the givin category
"language" {
"en": 3,
"fr": 1
},
"industries" {
"agency": 2,
"travel": 3,
},
"countries" {
"ca": 3,
"us": 2
},
"regions" {
"north-america": 1,
"south-america": 1
}
}
super stuck so any help would be appreciated. :)
The number of categories is unknown and I have a code solution that queries the list of disctint categories and slugs then for each one generates a $group stage... The resultant query is excessively big and there needs to be a better way... problem is that I have absolutely no idea on how to optimize it...
Query
the first part before the facet is done to seperate them and make for each value 1 document like
[{
"type": "language",
"value": "en",
"_id": ObjectId("5d8e8c9b8f8b9b7b7a8b4567")
},
{
"type": "industries",
"value": "agency",
"_id": ObjectId("5d8e8c9b8f8b9b7b7a8b4567")
},
{
"type": "industries",
"value": "travel",
"_id": ObjectId("5d8e8c9b8f8b9b7b7a8b4567")
},
{
"type": "countries",
"value": "ca",
"_id": ObjectId("5d8e8c9b8f8b9b7b7a8b4567")
}]
and then facet with 3 fields and count the documents
and after than transformations to have data on keys like the expected output
Playmongo
ggregate(
[{"$set": {"tags": {"$objectToArray": "$tags"}}},
{"$set":
{"tags":
{"$map":
{"input": "$tags",
"in": {"type": "$$this.k", "value": "$$this.v", "_id": "$_id"}}}}},
{"$unwind": "$tags"},
{"$replaceRoot": {"newRoot": "$tags"}},
{"$unwind": "$value"},
{"$facet":
{"count":
[{"$group": {"_id": null, "count": {"$addToSet": "$_id"}}},
{"$set": {"count": {"$size": "$count"}}}],
"category":
[{"$group": {"_id": "$type", "count": {"$addToSet": "$_id"}}},
{"$set": {"count": {"$size": "$count"}}}],
"values":
[{"$group":
{"_id": "$value",
"type": {"$first": "$type"},
"values": {"$addToSet": "$_id"}}},
{"$set": {"values": {"$size": "$values"}}},
{"$group":
{"_id": "$type",
"values":
{"$push":
{"type": "$type", "value": "$_id", "count": "$values"}}}}]}},
{"$set":
{"count":
{"$getField":
{"field": "count", "input": {"$arrayElemAt": ["$count", 0]}}},
"category":
{"$arrayToObject":
[{"$map":
{"input": "$category",
"in": {"k": "$$this._id", "v": "$$this.count"}}}]},
"values":
{"$arrayToObject":
[{"$map":
{"input": "$values",
"in":
{"k": "$$this._id",
"v":
{"$arrayToObject":
[{"$map":
{"input": "$$this.values",
"in": {"k": "$$this.value", "v": "$$this.count"}}}]}}}}]}}}])
Outputs
[{
"count": 3,
"category": {
"countries": 3,
"industries": 3,
"regions": 2,
"language": 3
},
"values": {
"regions": {
"south-america": 1,
"north-america": 1
},
"countries": {
"us": 2,
"ca": 3
},
"language": {
"fr": 1,
"en": 3
},
"industries": {
"agency": 2,
"travel": 3
}
}
}]

Grouping multiple documents with nested array of objects in MongoDB

I'm having documents that are having this structures
x = {
"scalar": 1,
"array": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
"array2": [
{"key": 1, "value": 2},
{"key": 2, "value": 3},
],
}
and
y = {
"scalar": 2,
"array": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 3},
{"key": 3, "value": 0},
],
}
The end results I'm trying to find is this
{
"scalar": 3, # SUM of scalar
"array": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
"array2": [
{"key": 1, "value": 5}, # SUM by key = 1
{"key": 2, "value": 3},
{"key": 3, "value": 0},
],
}
I've tried to use double $unwind and then do push by. I'm thinking of using $reduce to get the final results
Query
one way to do it, is by facet, you want 3 groupings and facet can do that , like break into 3 seperate parts, to not mix the unwinds, i think this is the most simple way to do it
Test code here
db.collection.aggregate([
{
"$facet": {
"scalar": [
{
"$project": {
"scalar": 1
}
},
{
"$group": {
"_id": null,
"sum": {
"$sum": "$scalar"
}
}
},
{
"$unset": [
"_id"
]
}
],
"array": [
{
"$project": {
"array": 1
}
},
{
"$unwind": {
"path": "$array"
}
},
{
"$group": {
"_id": "$array.key",
"sum": {
"$sum": "$array.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
],
"array2": [
{
"$project": {
"array2": 1
}
},
{
"$unwind": {
"path": "$array2"
}
},
{
"$group": {
"_id": "$array2.key",
"sum": {
"$sum": "$array2.value"
}
}
},
{
"$project": {
"_id": 0,
"key": "$_id",
"value": "$sum"
}
}
]
}
},
{
"$set": {
"scalar": {
"$arrayElemAt": [
"$scalar.sum",
0
]
}
}
}
])
Other alternative is to unwind both arrays, but then unwinds and groups will be mixed, making things complicated i think.
Also $reduce cant be used for grouping in MongoDB i think, because we can't construct dynamic paths.
If group-reduce and have this data (key=key value=value)
{"1" : 5 , "2" : 3}
And we see {"key" 1, "value" : 5} how we can check if the above data contains the 1 as key? We cant construct dynamic paths, like $$this.1 . Only way it to convert it to an array and back to object that will be so slow.

How do you `$push` to a nested MongoDB array conditionally so that there are never 2 consecutive values the same?

Data structure
I collect readings from probes located at different locations. Readings are recorded as {'value': <float>, 'when': <timestamp>} in an array of readings and are ordered in ascending time order. The readings array is in a subdocument accessed by probe_id.
{'location_id': ObjectId('6118d887066a0b17c9a4a531'),
'probes': [
{
'probe_id': ObjectId('6118d887066a0b17c9a4a530'),
'readings': [
{'value': 42, 'when': Date("2021-08-12T05:25:28.905Z")},
{'value': 37, 'when': Date("2021-08-12T08:34:30.405Z")},
{'value': 43, 'when': Date("2021-08-12T12:56:45.043Z")},
...
]
},
...
]
}
Full specification
Readings can be inserted into readings out of time order.
readings must remain sorted by ascending when.
There must never be 2 items in succession in readings with the same value.
When a $push would violate rule 3 then the new item must replace the old if (and only if) it has an earlier when (if when is a lower value), otherwise the readings should remain untouched.
The transaction must be atomic.
Bonus points
If a probe subdocument with probe_id is not present in probes then create the {'probe_id':ObjectId(), 'readings':[]} subdocument during the $push.
Possible cases
empty list - Add item
push back, different value from preceding* member - Add item
push back, same value as preceding member - Do nothing (don't push)
push middle, different value from preceding and following** member - Add item
push middle, same value as preceding member - Do nothing (don't push)
push middle, same value as following member - Replace following member
push front, different value from following member - Add item
push front, same value as following member - Replace following member
*"preceding" = earlier in time (lower when)
**"following" = later in time (higher when)
Notes
Items with identical when may either be ignored or replaced or be pushed (provided value between all items with identical when are different)
From the point of optimization, we normally push back (insert with more recent, higher valued, when
Current Code
Using PyMongo:
result = collection.update_one(
{'_id': location_id },
{'$push': {'probes.$[probe].readings': {'$each': [new_reading], '$sort': {'when': 1}}}},
array_filters = [{'probe.id': probe_id}],
)
if result.modified_count == 0:
collection.update_one(
{'_id': location_id },
{'$push': {'probes': {'id': probe_id, 'readings': [new_reading]}}},
)
Clearly this code does nothing to assert that consecutive items are of different values. It also fails to push the required subdocument if that is not present without 2 database calls. Is the above specification possible?
Solution 1
Online example for inserting {"value" : 43 ,"when" : 11}
Push middle, same value as following member - Replace following member
Test code here
All possible cases (its 9 cases, 7 + 2 add on empty probes("bonus" case) or only empty readings)
---------------------Empty probes(case 1(the extra case) empty probes)-------------------------------
Before add
{"_id": {"$oid": "61345b2aefdf45b6128444f2"}, "location_id": 1, "probes": []}
Case : Only 1 case exists Member : {value 41, when 3}
{"_id": {"$oid": "61345b2aefdf45b6128444f2"}, "location_id": 1, "probes": [{"probe_id": 4, "readings": [{"value": 41, "when": 3}]}]}
---------------------Empty readings(case 1 empty readings)-------------------------------
Before add
{"_id": {"$oid": "61345b68efdf45b612844650"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": []}]}
Case : Only 1 case exists Member : {value 41, when 3}
{"_id": {"$oid": "61345b68efdf45b612844650"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 41, "when": 3}]}]}
---------------------Not empty readings-------------------------------
Before add
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Case : NoConflict start[push front, different value from following member - Add item] Member : {value 41, when 3}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 41, "when": 3}, {"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Case : Conflict start[push front, same value as following member - Replace following member] Member : {value 42, when 3}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 3}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Case : NoConflict middle[push middle, different value from preceding and following** member - Add item] Member : {value 42, when 11}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 42, "when": 11}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Case : Conflict middle[push middle, same value as preceding member - Do nothing (don't push)] Member : {value 37, when 11}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Case : Conflict middle[push middle, same value as following member - Replace following member] Member : {value 43, when 11}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 11}, {"value": 41, "when": 20}]}]}
Case : NoConflict end[push back, different value from preceding* member - Add item] Member : {value 47, when 21}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}, {"value": 47, "when": 21}]}]}
Case : Conflict end[push back, same value as preceding member - Do nothing (don't push)] Member : {value 41, when 21}
{"_id": {"$oid": "61346a33efdf45b6128496b7"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}]}]}
Query
its big but its fast, its atomic, all done in 1 query, it doesn't sort the array,it inserts in the right position to keep it sorted
to move this code to your code
replace the 4 with new ObjectID() (add the method call)
replace the 1 with the location ObjectID
replace the 2 with the prob_id ObjectID
i used numbers instead of dates for easy testing, but no need to change the query for that
first $set is to define the new ObjectID(replace the 4 with the method call) in case probes is empty create the probe with empty readings
$map to get inside the array
$reduce to find the position where we will add the new member
the when field defines the position, the query keep the array sorted by adding in the right place
*we could use reduce to make the array not just find the position
but if the array had >500 members because of concat it would be so slow not usable.
when we have the position we check 4 basic cases
add at empty readings (1 case)
add at start (2 cases)
add at middle (3 cases)
add at end (2 cases)
each one of them has other cases if Conflict of value or not
db.collection.update({
"location_id": 1
},
[
{
"$set": {
"prob-id": {
"$cond": [
{
"$eq": [
"$probes",
[]
]
},
4,
2
]
}
}
},
{
"$set": {
"probes": {
"$cond": [
{
"$eq": [
"$probes",
[]
]
},
[
{
"probe_id": "$prob-id",
"readings": []
}
],
"$probes"
]
}
}
},
{
"$set": {
"probes": {
"$map": {
"input": "$probes",
"as": "m1",
"in": {
"$cond": [
{
"$ne": [
"$$m1.probe_id",
"$prob-id"
]
},
"$$m1",
{
"$mergeObjects": [
"$$m1",
{
"readings": {
"$let": {
"vars": {
"size_position": {
"$reduce": {
"input": "$$m1.readings",
"initialValue": [
0,
null,
null
],
"in": {
"$let": {
"vars": {
"index_pos": "$$value",
"m2": "$$this"
},
"in": {
"$let": {
"vars": {
"index": {
"$arrayElemAt": [
"$$index_pos",
0
]
},
"pos": {
"$arrayElemAt": [
"$$index_pos",
1
]
}
},
"in": {
"$cond": [
{
"$and": [
{
"$eq": [
"$$pos",
null
]
},
{
"$gt": [
"$$m2.when",
11
]
}
]
},
[
{
"$add": [
"$$index",
1
]
},
"$$index"
],
[
{
"$add": [
"$$index",
1
]
},
"$$pos"
]
]
}
}
}
}
}
}
}
},
"in": {
"$let": {
"vars": {
"asize": {
"$arrayElemAt": [
"$$size_position",
0
]
},
"position": {
"$arrayElemAt": [
"$$size_position",
1
]
}
},
"in": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$$asize",
0
]
},
"then": [
{
"value": 43,
"when": 11
}
]
},
{
"case": {
"$eq": [
"$$position",
null
]
},
"then": {
"$let": {
"vars": {
"prv_member": {
"$arrayElemAt": [
"$$m1.readings",
{
"$subtract": [
"$$asize",
1
]
}
]
}
},
"in": {
"$cond": [
{
"$eq": [
"$$prv_member.value",
43
]
},
"$$m1.readings",
{
"$concatArrays": [
"$$m1.readings",
[
{
"value": 43,
"when": 11
}
]
]
}
]
}
}
}
},
{
"case": {
"$eq": [
"$$position",
0
]
},
"then": {
"$let": {
"vars": {
"next_member": {
"$arrayElemAt": [
"$$m1.readings",
0
]
}
},
"in": {
"$cond": [
{
"$eq": [
"$$next_member.value",
43
]
},
{
"$cond": [
{
"$lt": [
11,
"$$next_member.when"
]
},
{
"$concatArrays": [
[
{
"value": 43,
"when": 11
}
],
{
"$slice": [
"$$m1.readings",
1,
"$$asize"
]
}
]
},
"$$m1.readings"
]
},
{
"$concatArrays": [
[
{
"value": 43,
"when": 11
}
],
"$$m1.readings"
]
}
]
}
}
}
}
],
"default": {
"$let": {
"vars": {
"next_member": {
"$arrayElemAt": [
"$$m1.readings",
"$$position"
]
},
"prv_member": {
"$arrayElemAt": [
"$$m1.readings",
{
"$subtract": [
"$$position",
1
]
}
]
}
},
"in": {
"$switch": {
"branches": [
{
"case": {
"$and": [
{
"$ne": [
"$$next_member.value",
43
]
},
{
"$ne": [
"$$prv_member.value",
43
]
}
]
},
"then": {
"$concatArrays": [
{
"$slice": [
"$$m1.readings",
0,
"$$position"
]
},
[
{
"value": 43,
"when": 11
}
],
{
"$slice": [
"$$m1.readings",
"$$position",
{
"$add": [
"$$asize",
1
]
}
]
}
]
}
},
{
"case": {
"$eq": [
"$$prv_member.value",
43
]
},
"then": "$$m1.readings"
}
],
"default": {
"$concatArrays": [
{
"$slice": [
"$$m1.readings",
0,
"$$position"
]
},
[
{
"value": 43,
"when": 11
}
],
{
"$slice": [
"$$m1.readings",
{
"$add": [
"$$position",
1
]
},
{
"$add": [
"$$asize",
1
]
}
]
}
]
}
}
}
}
}
}
}
}
}
}
}
}
]
}
]
}
}
}
}
},
{
"$unset": [
"prob-id"
]
}
])
Solution 2
Online example for {"probe_id" : 2, "readings" : [{"value" :43 ,"when" : 11}]}
Test code here
All possible cases (its 9 cases, 7 + 2 add on empty probes("bonus" case) or only empty readings)
---------------------Empty probes-------------------------------
Before add
{"_id": {"$oid": "61356ae9fdad8624e245e271"}, "location_id": 1, "probes": []}
Case : Only 1 case exists[empty list - Add item(the bonus case)] Member : {:probe_id 2, :readings [{value 42, when 3}]}
{"_id": {"$oid": "61356ae9fdad8624e245e279"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 3}], "probe_id": 2}]}
---------------------Empty readings-------------------------------
Before add
{"_id": {"$oid": "61356b73fdad8624e245e58c"}, "location_id": 1, "probes": [{"probe_id": 2, "readings": []}]}
Case : Only 1 case exists[empty list - Add item] Member : {:probe_id 2, :readings [{value 42, when 3}]}
{"_id": {"$oid": "61356b73fdad8624e245e594"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 3}], "probe_id": 2}]}
---------------------Not empty readings-------------------------------
Before add
{"_id": {"$oid": "613569f7fdad8624e245dd01"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : NoConflict start[push front, different value from following member - Add item] Member : {:probe_id 2, :readings [{value 41, when 3}]}
{"_id": {"$oid": "61356a2dfdad8624e245de34"}, "location_id": 1, "probes": [{"readings": [{"value": 41, "when": 3}, {"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : Conflict start[push front, same value as following member - Replace following member] Member : {:probe_id 2, :readings [{value 42, when 3}]}
{"_id": {"$oid": "61356a2dfdad8624e245de3d"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 3}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : NoConflict middle[push middle, different value from preceding and following** member - Add item] Member : {:probe_id 2, :readings [{value 42, when 11}]}
{"_id": {"$oid": "61356a2dfdad8624e245de46"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 42, "when": 11}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : Conflict middle[push middle, same value as preceding member - Do nothing (don't push)] Member : {:probe_id 2, :readings [{value 37, when 11}]}
{"_id": {"$oid": "61356a2efdad8624e245de4f"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : Conflict middle[push middle, same value as following member - Replace following member] Member : {:probe_id 2, :readings [{value 43, when 11}]}
{"_id": {"$oid": "61356a2efdad8624e245de59"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 11}, {"value": 41, "when": 20}], "probe_id": 2}]}
Case : NoConflict end[push back, different value from preceding* member - Add item] Member : {:probe_id 2, :readings [{value 47, when 21}]}
{"_id": {"$oid": "61356a2efdad8624e245de62"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}, {"value": 47, "when": 21}], "probe_id": 2}]}
Case : Conflict end[push back, same value as preceding member - Do nothing (don't push)] Member : {:probe_id 2, :readings [{value 41, when 21}]}
{"_id": {"$oid": "61356a2efdad8624e245de6b"}, "location_id": 1, "probes": [{"readings": [{"value": 42, "when": 5}, {"value": 37, "when": 10}, {"value": 43, "when": 15}, {"value": 41, "when": 20}], "probe_id": 2}]}
Query
uses another method, with lookup
adds the reading in the end, sorts the array by when
removes and element if the previous member has the same value
(this way we keep only the members with the smaller when)
uses merge for the update, its not possible with update even with
pipeline to do those, because we need lookup and group etc
*merge requires a unique index on location_id else wont work
*online example doesnt have the merge, query bellow has it
Drawback
if someone else add readings in the same location_id, while we update that location_id document (its very small thim) but stil if
concurency of milliseconds is important it might cause data loss.
*You can use the logic of this query and parts of the code with transaction if you prefer smaller but more queries.
For example add in the end, sort array (1 query), remove duplicate values (second query) etc, but you dont need solution2 or transactions, solution1 does all in 1 query + atomic but ok
db.testcoll.aggregate([
{
"$match": {
"location_id": {
"$eq": 1
}
}
},
{
"$set": {
"prob-id": {
"$cond": [
{
"$eq": [
"$probes",
[]
]
},
4,
2
]
}
}
},
{
"$set": {
"probes": {
"$cond": [
{
"$eq": [
"$probes",
[]
]
},
[
{
"probe_id": "$prob-id",
"readings": []
}
],
"$probes"
]
}
}
},
{
"$lookup": {
"from": "testcoll1",
"let": {
"probes": "$probes"
},
"pipeline": [
{
"$set": {
"probes": {
"$concatArrays": [
"$$probes",
[
{
"probe_id": 2,
"readings": [
{
"value": 43,
"when": 11
}
]
}
]
]
}
}
},
{
"$unwind": {
"path": "$probes"
}
},
{
"$unwind": {
"path": "$probes.readings"
}
},
{
"$sort": {
"probes.probe_id": 1,
"probes.readings.when": 1
}
},
{
"$replaceRoot": {
"newRoot": "$probes"
}
}
],
"as": "probes"
}
},
{
"$set": {
"probes": {
"$reduce": {
"input": "$probes",
"initialValue": [],
"in": {
"$let": {
"vars": {
"ps": "$$value",
"p": "$$this"
},
"in": {
"$let": {
"vars": {
"prv_p": {
"$last": "$$ps"
}
},
"in": {
"$cond": [
{
"$and": [
"$$prv_p",
{
"$eq": [
"$$prv_p.readings.value",
"$$p.readings.value"
]
},
{
"$eq": [
"$$prv_p.probe_id",
"$$p.probe_id"
]
},
{
"$eq": [
"$$p.probe_id",
2
]
}
]
},
"$$ps",
{
"$concatArrays": [
"$$ps",
[
"$$p"
]
]
}
]
}
}
}
}
}
}
}
}
},
{
"$lookup": {
"from": "testcoll1",
"let": {
"probes": "$probes"
},
"pipeline": [
{
"$set": {
"probes": "$$probes"
}
},
{
"$unwind": {
"path": "$probes"
}
},
{
"$replaceRoot": {
"newRoot": "$probes"
}
},
{
"$group": {
"_id": "$probe_id",
"readings": {
"$push": "$readings"
}
}
},
{
"$set": {
"probe_id": "$_id"
}
},
{
"$project": {
"_id": 0
}
}
],
"as": "probes"
}
},
{
"$unset": [
"_id",
"prob-id"
]
},
{
"$merge": {
"into": {
"db": "testdb",
"coll": "testcoll"
},
"on": [
"location_id"
],
"whenMatched": "replace",
"whenNotMatched": "discard"
}
}
])

MongoDB - Average fields values over documents

I have a collection with this schema:
{
"fields":
{
"field1": [
{"name": "abc", "value": 2},
{"name": "bcd", "value": 4},
{"name": "cde", "value": 6}
],
"field2": [
{"name": "dec", "value": 3},
{"name": "das", "value": 8},
{"name": "pam", "value": 10}
]
}
},
{
"fields":
{
"field1": [
{"name": "abc", "value": 7},
{"name": "cde", "value": 12}
],
"field2": [
{"name": "dec", "value": 3},
{"name": "das", "value": 8},
{"name": "pam", "value": 10}
]
}
}
What I'm trying to obtain is e.g. the average values of all members of 'field1', evaluating 0 if a member exist in a document but not in another (like 'bcd').
So in this example I should get:
{
'_id': 'abc',
'avg': 4.5
},
{
'_id': 'bcd',
'avg': 2
},
{
'_id': 'cde',
'avg': 9
}
I wrote this aggregation query but I'm pretty sure there is something wrong with it:
db.statuses.aggregate([
{
$unwind: '$fields.field1'
},
{
$group: {
_id: '$fields.field1.name',
avg: {
$avg: '$fields.field1.value'
}
}
},
{
$sort: {
avg: -1
}
}
])
I think I should add a step before the average calculation in which I have to build an array of all values for each name (0 if the name does not exist in a document), and then evaluate the average on these arrays. Am I right?
How could I do this?