how to convert mongodb fields to subdocument fields? - mongodb

sorry it may be silly question, mongodb contains thousands of documents .
hard to be changed manually
original format
{"name" : "aaaa",
"price" : 111,
"ing1" : "abcd",
"ing1Conc" : 50 ,
"ing2" : "wxyz",
"ing2conc": 100}
needed to be converted to
{"name" : "aaaa",
"price" : 111,
"content":[
{ "ing1" : "abcd", "ing1Conc" : 50},
{ "ing2" : "wxyz", "ing2conc": 100}
]
}

The trivial solution would be this one:
db.collection.aggregate([
{
$project: {
name: 1,
price: 1,
content: [
{ ing1: "$ing1", ing1Conc: "$ing1Conc" },
{ ing2: "$ing1", ing2conc: "$ing2conc" }
]
}
}
])
A more generic solution would be this one:
db.collection.aggrega
{
$project: {
name: 1,
price: 1,
data: {
$filter: {
input: { $objectToArray: "$$ROOT" },
cond: { $regexMatch: { input: "$$this.k", regex: "^ing\\d+" } }
}
}
}
},
{ $unwind: "$data" },
{ $set: { i: { $regexFind: { input: "$data.k", regex: "\\d+" } } } },
{ $set: { i: "$i.match" } },
{
$group: {
_id: {
name: "$name",
price: "$price",
i: "$i"
},
content: { $push: "$data" }
}
},
{ $sort: { "_id.i": 1 } },
{ $set: { content: { $arrayToObject: "$content" } } },
{
$group: {
_id: { name: "$_id.name", price: "$_id.price" },
content: { $push: "$content" }
}
},
{ $replaceRoot: { newRoot: { $mergeObjects: [ "$$ROOT", "$_id" ] } } },
{ $unset: "_id" }
])
Mongo Playground
However, I think this structure is still bad. I would suggest something like
content: {
ing: [ "abcd", "wxyz"],
conc: [ 50, 100 ]
}
content: [
{ ing: "abcd", conc: 50 },
{ ing: "wxyz", conc :100 }
]
content: [
{ idx: 1, ing: "abcd", conc: 50 },
{ idx: 2, ing: "wxyz", conc: 100 }
]

Related

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

Lodash `countBy` equivalent in MongoDB?

Let's say I have the input docs below:
[
{
"_id": "6225ca4052e7c226e2dd836d",
"data": [
"07",
"07",
"12",
"19",
"07",
"32"
]
},
{
"_id": "6225ca4052e7c226e2dd888f",
"data": [
"99",
"97",
"52",
"99",
"58",
"92"
]
}
]
I want to count the occurrences of every element in data string array per document. In JS, I can use countBy. How can I achieve the same using MongoDB Aggregation Framework?
I have tried to $reduce but MongoDB seems to not support assigning dynamic field to object.
{
$reduce: {
input: '$data',
initialValue: {},
in: { // assign `$$this` with count to `$$value`, but failed! }
}
}
Below is the desired output.
[
{
"_id": "6225ca4052e7c226e2dd836d",
"freqs": {
"12": 1,
"19": 1,
"32": 1,
"07": 3
}
},
{
"_id": "6225ca4052e7c226e2dd888f",
"freqs": {
"52": 1,
"58": 1,
"92": 1,
"97": 1,
"99": 2
}
}
]
db.collection.aggregate([
{
$match: {}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$data",
c: { $sum: 1 },
id: { $first: "$_id" }
}
},
{
$group: {
_id: "$id",
data: { $push: { k: "$_id", v: "$c" } }
}
},
{
$set: {
data: { $arrayToObject: "$data" }
}
}
])
mongoplayground
db.collection.aggregate([
{
$set: {
data: {
$function: {
body: "function(d) {let obj = {}; d.forEach(e => {if(obj[e]==null) { obj[e]=1; }else{ obj[e]++; }}); return obj;}",
args: [
"$data"
],
lang: "js"
}
}
}
}
])
mongoplayground

Mongodb loop through every distinct values and select tags using aggregate (facet)

I have collection like this:
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "XXX"
}]
}
{
"labels": [{
"description": "Cat"
}, {
"description": "XXX"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Bird"
}, {
"description": "XXX"
}, {
"description": "XXX"
}]
}
I want to filter for example only "Red" and "Yellow" colors from ALL elements and output document like this:
// because "Dog" appears 2 times so total = 2
{
description: "Dog",
total: 2,
colors: [
{ "_id": "Red", total: 2 },
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Cat",
total: 1,
colors: [
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Bird",
total: 1,
colors: []
}
{
description: "Red",
total: 2,
colors: [
{ _id: "Yellow", total: 1 }
]
}
{
description: "XXX",
total: 4,
colors: [
{ _id: "Yellow", total: 1 }
]
}
I can do this by using collection.distinct('labels.description') and then iterating through every single element + make a separate collection.count({ 'labels.description': 'Dog' }) like this:
for (...)
db.collection.aggregate([
{
"$match": {
"labels.description": valueFromLoop // (e.g. Dog)
}
},
{ $unwind : "$labels" },
{
"$group": {
"_id": "$labels.description",
"count": { "$sum": 1 }
}
},
{
"$match": {
"$or": [
{ "_id": "Red" },
{ "_id": "Yellow" }
]
}
},
{
"$sort": {
"count": -1
}
}
])
I want to do this in a single aggregation or mapReduce so that I could easily output it to new collection using $out instead of using Bulk operations separately, however I don't know if it's possible.
Try this:
let filter = ["Red", "Yellow"];
db.testcollection.aggregate([
{
$addFields: { bkp: "$labels" }
},
{ $unwind: "$labels" },
{
$addFields: {
bkp: {
$filter: {
input: "$bkp",
as: "item",
cond: {
$and: [
{ $ne: ["$$item.description", "$labels.description"] },
{ $in: ["$$item.description", filter] }
]
}
}
}
}
},
{
$unwind: {
path: "$bkp",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
key1: "$labels.description",
key2: { $ifNull: ["$bkp.description", false] }
},
total: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.key1",
description: { $first: "$_id.key1" },
total: {
$sum: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: 1,
else: "$total"
}
}
},
colors: {
$push: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: {
_id: "$_id.key2",
total: "$total"
},
else: "$$REMOVE"
}
}
}
}
},
{ $project: { _id: 0 } }
]);
For some reason with code from both answers it does not count all tags properly.
I'm posting what works:
db.collection.aggregate([
{
$project: {
labels: 1,
result: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{ $eq: ["$$label.description", "Blue"] },
{ $eq: ["$$label.description", "Red"] },
{ $eq: ["$$label.description", "Black-and-white"] },
{ $eq: ["$$label.description", "Purple"] },
{ $eq: ["$$label.description", "Orange"] },
{ $eq: ["$$label.description", "Yellow"] },
{ $eq: ["$$label.description", "Green"] },
{ $eq: ["$$label.description", "Teal"] }
]
}
}
}
}
},
{
$unwind: "$labels"
},
{
"$group": {
_id: "$labels.description",
x: {
$push: "$result.description"
},
total: { "$sum": 1 }
}
},
{
$project: {
x: {
$reduce: {
input: '$x',
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
}
},
total: 1
}
},
{
$project: {
x: 1,
y: { $setUnion: "$x" },
total: 1
}
},
{
$project: {
_id: 0,
description: "$_id",
"colors": {
$map: {
input: "$y",
as: "item",
in: {
_id: "$$item",
count: {
$size: {
$filter: {
input: "$x",
as: "itemx",
cond: {
$eq: ["$$item", "$$itemx"]
}
}
}
}
}
}
},
total: 1
}
},
{
$out: "backgrounds_meta"
}
])
db.test2.aggregate([
{
$project: {
labels:1,
colours: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{$eq:["Yellow","$$label.description"]},
{$eq:["Red", "$$label.description"]}
]
}
}
}
}
},
{$unwind:"$labels"},
{$group:{
_id: "$labels.description",
total: {$sum:1},
colours: {$addToSet:"$colours.description"}
}},
{
$project:{
_id:0,
description:"$_id",
total:1,
colours: {
$reduce:{
input: "$colours",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
}
}
},
{
$unwind: {
path:"$colours",preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
description:"$description",
total:"$total",
colour:"$colours"
},
count: {
$sum: {$cond:[{$ifNull:["$colours",false]},1,0]}
}
}
},
{
$group:{
_id:{
description:"$_id.description",
total:"$_id.total"
},
colours: {
$push: {
$cond: [{$gt:["$count",0]},
{
"_id":"$_id.colour",
total:"$count"
},
"$$REMOVE"
]
}
}
}
},
{
$project: {
_id:0,
description: "$_id.description",
total: "$_id.total",
colours: 1
}
}
]);
**Edit In your answer, you are missing the Yellows for Red and Dog because you are taking the first item from $result with $arrayElemAt: ["$result.description", 0].
If description is a colour, do you also want to include the counts for itself in colours?
Never mind, you've updated the answer

MongoDB $match on object not considering key

Please help compose a request.
[{
name: 'Peter',
items: {
a: [1, 2, 3, 6],
b: [0, 1],
c: [4, 1]
},
name: 'Joe',
items: {
d: [2, 3],
e: [0, 1],
},
name: 'Luk',
items: {
f: [0, 1, 8],
},
}]
filter - items.xxx> 5
result - "Peter" and "Luk"
Is this what you are looking for?
db.collection.aggregate([
{ $set: { items: { $objectToArray: "$items" } } },
{
$set: {
items: {
$reduce: {
input: "$items.v",
initialValue: [],
in: { $concatArrays: ["$$this", "$$value"] }
}
}
}
},
{ $match: { items: { $elemMatch: { $gte: 5 } } } },
{ $project: { name: 1 } }
])
Result:
{
"_id" : ObjectId("5e3d7ed0ac00d5ec3a32125f"),
"name" : "Peter"
}
{
"_id" : ObjectId("5e3d7ed0ac00d5ec3a321261"),
"name" : "Luk"
}
If you need the full documents, you can use this one:
db.collection.aggregate([
{
$facet:
{
filter: [
{ $set: { items: { $objectToArray: "$items" } } },
{
$set: {
items: {
$reduce: {
input: "$items.v",
initialValue: [],
in: { $concatArrays: ["$$this", "$$value"] }
}
}
}
},
{ $match: { items: { $elemMatch: { $gte: 5 } } } },
{ $project: { name: 1 } }
],
in: [{ $match: {} }],
}
},
{ $unwind: "$in" },
{ $match: { $expr: { $in: ["$in._id", "$filter._id"] } } },
{ $replaceRoot: { newRoot: "$in" } }
])

MongoDB - group by on facet result

Provided I have following collections
Customers
[
{
uuid: "first",
culture: "it-it"
},
{
uuid: "second",
culture: "de-de"
}
]
Vehicles
[
{
model: "mymodel",
users: [
{
uuid: "first",
isOwner: true,
createdOn: "2019-05-15T06: 00: 00"
}
]
},
{
model: "mymodel",
users: [
{
uuid: "first",
isOwner: false,
createdOn: "2019-05-15T06: 00: 00"
},
{
uuid: "second",
isOwner: true,
createdOn: "2019-05-15T06: 00: 00"
}
]
}
]
And following query:
db.customers.aggregate([
{
$lookup: {
from: "vehicles",
let: {
uuid: "$uuid"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$$uuid",
"$users.uuid"
]
}
}
},
{
$project: {
model: 1,
users: {
$filter: {
input: "$users",
as: "user",
cond: {
$eq: [
"$$user.uuid",
"$$uuid"
]
}
}
}
}
},
{
$unwind: "$users"
},
{
$replaceRoot: {
newRoot: {
isOwner: "$users.isOwner",
createdOn: "$users.createdOn"
}
}
}
],
as: "vehicles"
}
},
{
$facet: {
"createdOn": [
{
$match: {
"vehicles": {
$elemMatch: {
isOwner: true,
$and: [
{
"createdOn": {
$gte: "2019-05-15T00: 00: 00"
}
},
{
"createdOn": {
$lt: "2019-05-16T00: 00: 00"
}
}
]
}
}
}
},
{
$project: {
culture: 1,
count: {
$size: "$vehicles"
}
}
},
{
$group: {
_id: 0,
"total": {
$sum: "$count"
}
}
}
]
}
},
{
$project: {
"CreatedOn": {
$arrayElemAt: [
"$CreatedOn.total",
0
]
}
}
}
])
I get following result:
[
{
"createdOn": 2
}
]
What I would like to achieve is a result as follows:
[
{
culture: "it-it",
results: {
"createdOn": 1
}
},
{
culture: "de-de",
results: {
"createdOn": 1
}
}
]
But I cannot seem to figure out where I can group so that I can get that result.
Can someone show me the way to do this?
The query is more complex with more metrics so this is a trimmed down version of what I have.
I tried grouping everywhere but fail to get the desired result I want.
The following query can get us the expected output:
db.customers.aggregate([
{
$lookup: {
"from": "vehicles",
"let": {
"uuid": "$uuid"
},
"pipeline": [
{
$unwind: "$users"
},
{
$match: {
$expr: {
$and: [
{
$eq: ["$users.uuid", "$$uuid"]
},
{
$eq: ["$users.isOwner", true]
},
{
$gte: ["$users.createdOn", "2019-05-15T00: 00: 00"]
},
{
$lte: ["$users.createdOn", "2019-05-16T00: 00: 00"]
}
]
}
}
},
{
$count:"totalVehicles"
}
],
as: "vehiclesInfo"
}
},
{
$unwind: {
"path": "$vehiclesInfo",
"preserveNullAndEmptyArrays": true
}
},
{
$group: {
"_id": "$culture",
"culture": {
$first: "$culture"
},
"createdOn": {
$sum: "$vehiclesInfo.totalVehicles"
}
}
},
{
$project: {
"_id": 0,
"culture": 1,
"results.createdOn": "$createdOn"
}
}
]).pretty()
Output:
{ "culture" : "de-de", "results" : { "createdOn" : 1 } }
{ "culture" : "it-it", "results" : { "createdOn" : 1 } }