Filter key/value array by value in mongodb - mongodb

I have this sample data:
[
{
"id": 1,
"marcadores": [
{ "k": "G", "v": "00" },
{ "k": "1", "v": "A" },
]
},
{
"id": 2,
"marcadores": [
{ "k": "1", "v": "A" },
]
},
{
"id": 3,
"marcadores": [
{ "k": "G", "v": "03" },
{ "k": "P", "v": "55" }
]
}
]
I would like to filter those documents with these critera:
marcadores.k: "G" and marcadores.v: { $ne: "00" } ($elemMatch). In the example, "id": 3 meets this criteria
or
document has no marcadores.k: "G". In the example, "id": 2 meets this criteria.
Expected output:
[
{
"id": 2,
"marcadores": [
{ "k": "1", "v": "A" },
]
},
{
"id": 3,
"marcadores": [
{ "k": "G", "v": "03" },
{ "k": "P", "v": "55" }
]
}
]
What's the best/cleanest way to solve this query?
You can use this playground.

Using find we can use $not with $elemMatch:
db.collection.find(
{marcadores: {$not: {$elemMatch: {"k": "G", "v": "00"}}}}
)
See how it works on the playground example - find

Related

Calculate running total across for different groups by day

I'm trying to aggreate a collection of transactions into a running total of owners by day.
The initial collection looks like this:
[
{ "to": "A", "from": "0", "ts": 1 },
{ "to": "A", "from": "0", "ts": 1 },
{ "to": "B", "from": "0", "ts": 1 },
{ "to": "B", "from": "0", "ts": 2 },
{ "to": "C", "from": "0", "ts": 3 },
{ "to": "A", "from": "B", "ts": 4 }
]
What I would like to get is something like this:
[
{
"ts": 1,
"holdings": [
{ "owner": "0", "holdings": -3 },
{ "owner": "A", "holdings": 2 },
{ "owner": "B", "holdings": 1 }
]
},
{
"ts": 2,
"holdings": [
{ "owner": "0", "holdings": -4 },
{ "owner": "A", "holdings": 2 },
{ "owner": "B", "holdings": 2 }
]
},
{
"ts": 4,
"holdings": [
{ "owner": "0", "holdings": -5 },
{ "owner": "A", "holdings": 3 },
{ "owner": "B", "holdings": 1 },
{ "owner": "C", "holdings": 1 }
]
}
]
I've already understood how to generate this for a single ts that I'm setting, but I don't know how to do it across all ts.
The aggregation pipeline for a single ts looks like this:
db.collection.aggregate([
// start with: { "to": "A", "from": "0", "ts": 1 }
{
// create a doc with an array with subset of fields:
// { "_id": ObjectId("5a934e000102030405000000"),
// "data": [ { "change": 1, "owner": "A", "ts": "1" },
// { "change": -1, "owner": "0", "ts": "1" } ] }
$project: {
data: [
{
owner: '$to',
ts: '$ts',
change: 1,
},
{
owner: '$from',
ts: '$ts',
change: -1,
},
],
},
},
{
// unwind the array into 2 docs:
// { "_id": ObjectId("5a934e000102030405000000"), "data": { "change": 1, "owner": "A", "ts": "1" } },
// { "_id": ObjectId("5a934e000102030405000000"), "data": { "change": -1, "owner": "0", "ts": "1" } },
$unwind: '$data',
},
{
// use data as root:
// { "data": { "change": 1, "owner": "A", "ts": "1" } },
// { "data": { "change": -1, "owner": "0", "ts": "1" } }
$replaceRoot: {
newRoot: '$data',
},
},
{
// select day to calc totals
$match: {
ts: {
$lt: 6,
},
},
},
{
// sum totals, grouped by owner
$group: {
_id: '$owner',
//_id: null,
holdings: {
$sum: '$change',
},
},
},
])
This gives the correct result for a particular day (selected in the match stage). I don't understand how I can now generalize that to all days.
One way to do it is using $setWindowFields, which has a built-in accumulation:
db.collection.aggregate([
{
$project: {
ts: "$ts",
data: [{owner: "$to", change: 1}, {owner: "$from", change: -1}]
}
},
{$unwind: "$data"},
{
$group: {
_id: {ts: "$ts", owner: "$data.owner"},
holdings: {$sum: "$data.change"}
}
},
{
$setWindowFields: {
partitionBy: "$_id.owner",
sortBy: {"_id.ts": 1},
output: {
cumulativeHoldings: {
$sum: "$holdings",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$group: {
_id: "$_id.ts",
holdings: {$push: {owner: "$_id.owner", holdings: "$cumulativeHoldings"}}
}
}
])
Playground

Mongodb print count of unique values from multiple fields

I got the following documents of a collection (let's name it myCollection):
{
"_id": {
"$oid": "601a75a0c9a338f09f238816"
},
"Sample": "lie50",
"Chromosome": "chr10",
"Position": {
"$numberLong": "47663"
},
"Reference": "C",
"Mutation": "T",
"Run": "Run_test",
"SYMBOL": "TUBB8"
},
{
"_id": {
"$oid": "601a75a0c9a338f09f238817"
},
"Sample": "lie50",
"Chromosome": "chr10",
"Position": {
"$numberLong": "47876"
},
"Reference": "T",
"Mutation": "C",
"Run": "Run_test",
"SYMBOL": "TUBB8"
},
{
"_id": {
"$oid": "601a75a0c9a338f09f238818"
},
"Sample": "lie50",
"Chromosome": "chr10",
"Position": {
"$numberLong": "48005"
},
"Reference": "G",
"Mutation": "A",
"Run": "Run_test",
"SYMBOL": "TUBB8"
},
{
"_id": {
"$oid": "601a75a0c9a338f09f238819"
},
"Sample": "lie12",
"Chromosome": "chr10",
"Position": {
"$numberLong": "48005"
},
"Reference": "G",
"Mutation": "A",
"Run": "Run_test",
"SYMBOL": "TUBB8"
}
I am interested in printing the distinct count of the values from the fields Chromosome, Position, Reference, and Mutation. This means to count the unique fields of the following entries:
"Chromosome": "chr10", "Position": 47663, "Reference": "C", "Mutation": "T"
"Chromosome": "chr10", "Position": 47876, "Reference": "T", "Mutation": "C"
"Chromosome": "chr10", "Position": 48005, "Reference": "G", "Mutation": "A"
"Chromosome": "chr10", "Position": 48005, "Reference": "G", "Mutation": "A"
which should be 3 distinct rows.
I have checked multiple questions like this one on how to print the distinct values for one field or using $unwind/$project.
For the latter, I thought why not concatenate the 4 fields and then print the number using length with $unwind/$project?
I managed to get that far:
db.myCollection.aggregate(
[
{
$group:
{
_id: null,
newfield: {
$addToSet:
{
$concat:
[
"$Chromosome",
"_",
{"$toString":"$Position"},
"_",
"$Reference",
"_",
"$Mutation"
]
}
}
}
},
{
$unwind: "$newfield"
},
{
$project: { _id: 0 }
}
]).length
However, adding .length to this query does not return anything but without returns:
{ "newfield" : "chr10_47663_C_T" }
{ "newfield" : "chr10_47876_T_C" }
{ "newfield" : "chr10_48005_G_A" }
For information, my actual data contains 2 billion documents.
The fields should pass in _id in $group stage, and also use $count stage to get total elements instead of returning all documents,
db.myCollection.aggregate([
{
$group: {
_id: {
Chromosome: "$Chromosome",
Position: "$Position",
Reference: "$Reference",
Mutation: "$Mutation"
}
}
},
{ $count: "count" }
])
Playground

MongoDB, How to associate array fields for statistics

Example JSON:
{
"groups": [
{
"_id": 1,
"name": "g1"
},
{
"_id": 2,
"name": "g2"
}
],
"items": [
{
"_id": 1,
"name": "item1",
"gid": 1
},
{
"_id": 2,
"name": "item2",
"gid": 2
}
]
}
How to associate two arrays and count ?I tried to use aggregate, I didn't get the results I wanted.
Required Result:
Or can directly find all the items associated with it, perfect....
{"groups": [
{
"_id": 1,
"name": "g1",
"count": 1,
"items": [
{
"_id": 1,
"name": "item1"
}
]
},
{
"_id": 2,
"name": "g2",
"count": 1,
"items": [
{
"_id": 2,
"name": "item2"
}
]
}
]}
db.getCollection('collection').aggregate([
{$unwind:{
path:"$groups",
preserveNullAndEmptyArrays:true
}},
{$unwind:{
path:"$items",
preserveNullAndEmptyArrays:true
}},
{$redact: {$cond: [{
$eq: [
"$groups._id",
"$items.gid"
]
},
"$$KEEP",
"$$PRUNE"
]
}
},
{$project:{
_id:1,
groups_id:"$groups._id",
group_name:"$groups.name",
item_data:{
_id:"$items._id",
name:"$items.name",
}
}},
{
$group:{
_id:"$groups_id",
name:{$first:"$group_name"},
count:{$sum:1},
items:{$push:"$item_data"}
}
}
])

Get key value pair result of activities in Mongodb

Get key value pair result of activities. get all activities under first elements term as key.
INPUT
[
{
"_id": "diamond",
"activities": [
[
{
"term": "11",
"sport_name": "football"
}
]
]
},
{
"_id": "topaz",
"activities": [
[
{
"term": "12",
"sport_name": "football"
}
],
[
{
"term": "11",
"sport_name": "football"
},
{
"term": "11",
"sport_name": "hand ball"
}
]
]
}
]
OUTPUT
[
{
"_id": "diamond",
"activities": [
{
"11": [
{
"term": "11",
"sport_name": "football"
}
]
}
]
},
{
"_id": "topaz",
"activities": [
{
"12": [
{
"term": "12",
"sport_name": "football"
}
]
},
{
"11": [
{
"term": "11",
"sport_name": "football"
},
{
"term": "11",
"sport_name": "hand ball"
}
]
}
]
}
]
You can use below aggregation
db.collection.aggregate([
{ "$project": {
"activities": {
"$arrayToObject": {
"$map": {
"input": "$activities",
"in": {
"k": { "$arrayElemAt": ["$$this.term", 0] },
"v": "$$this"
}
}
}
}
}}
])
MongoPlayground

MongoDB projection. Operator $add field|expression array awareness or after $slice

I've got collection that looks like:
[{
"org": "A",
"type": "simple",
"payFor": 3,
"price": 100
},
{
"org": "A",
"type": "custom",
"payFor": 2,
"price": 115
},
{
"org": "B",
"type": "simple",
"payFor": 1,
"price": 110
},
{
"org": "B",
"type": "custom",
"payFor": 2,
"price": 200
},
{
"org": "B",
"type": "custom",
"payFor": 4,
"price": 220
}]
And need to produce result with query to perform group by "org" where payments appears for only first "payFor" prices in "type".
I'm trying to use expression result by $slice operator in $add but this is not works.
pipeline:
[{
"$group": {
"_id": {
"org": "$org",
"type": "$type"
},
"payFor": {
"$max": "$payFor"
},
"count": {
"$sum": 1
},
"prices": {
"$push": "$price"
}
}
},
{
"$group": {
"_id": "$_id.org",
"payments": {
"$push": {
"type": "$_id.type",
"forFirst": "$payFor",
"sum": {
"$cond": [
{
"$gte": [
"$payFor",
"$count"
]
},
{
"$add": {
"$prices": {
"$slice": "$count"
}
}
},
{
"$add": "$prices"
}
]
}
}
}
}
}]
I know that it is possible to traverse unwinded prices and pick only "payFor" count of them. but result collections are more rich than in example above and this operation will produce some unecessary overheads.
Need some advice from community. Please. Thanks.