How to match string within embedded array or doc in MongoDB? - mongodb

After searching for a whole day, I am doubting whether MongoDB can fulfill below requirement:
Q: How can I filter out documents that meet below conditions ?
In last array element of students_replies, there is a reply from a student whose name containing string 'ason'.
id_1: first_school, students_replies: [
{Date:20210101, replies: [
{name: jack, reply: 'I do not like this idea'},
{name: jason, reply: 'I would rather stay at home'},
{name: charles, reply: 'I have an plan to improve'},
]},
{Date:20210401, replies: [
...]},
{Date:20210801, replies: [
...]},
]
id_2: second_shool, students_replies: [..]
id_3: third_shool, students_replies: [...]
Mongoplayground

Use $slice and $regex
For your example this becomes:
db.collection.aggregate([
// project only the last reply
{
"$project": {
key: 1,
last_reply: {
"$slice": [
"$students_replies",
-1
]
}
}
},
// filter the documents
{
"$match": {
"last_reply.replies.name": {
"$regex": "ason"
}
}
}
])
https://mongoplayground.net/p/a9piw2WQ8n6

Since you need last array element of students_replies, use $arrayElemAt
db.collection.aggregate([
{
"$match": {
$expr: {
$regexMatch: {
input: {
$reduce: {
input: {
$arrayElemAt: [
"$students_replies.replies",
-1
]
},
initialValue: "",
in: {
$concat: [
"$$value",
"$$this.name",
","
]
}
}
},
regex: "ason"
}
}
}
},
{
"$project": {
"students_replies": 0
}
}
])
mongoplayground
another answer
db.collection.aggregate([
{
$match: {
$expr: {
$ne: [
{
$filter: {
input: {
$map: {
input: {
$arrayElemAt: [
"$students_replies.replies",
-1
]
},
as: "r",
in: "$$r.name"
}
},
as: "s",
cond: {
$regexMatch: {
input: "$$s",
regex: "ason"
}
}
}
},
[]
]
}
}
},
{
"$project": {
"students_replies": 0
}
}
])
mongoplayground

Related

$filter inside $reduce or inside $map from array without unwind

I need some help:
I want to optimize this query to be faster , it need to filter by events.eventType:"log" all docs with server:"strong" , but without separate unwind & filter stages , maybe somehow inside the $reduce stage to add $filter.
example single document:
{
server: "strong",
events: [
{
eventType: "log",
createdAt: "2022-01-23T10:26:11.214Z",
visitorInfo: {
visitorId: "JohnID"
}
}
current aggregation query:
db.collection.aggregate([
{
$match: {
server: "strong"
}
},
{
$project: {
total: {
$reduce: {
input: "$events",
initialValue: {
visitor: [],
uniquevisitor: []
},
in: {
visitor: {
$concatArrays: [
"$$value.visitor",
[
"$$this.visitorInfo.visitorId"
]
]
},
uniquevisitor: {
$cond: [
{
$in: [
"$$this.visitorInfo.visitorId",
"$$value.uniquevisitor"
]
},
"$$value.uniquevisitor",
{
$concatArrays: [
"$$value.uniquevisitor",
[
"$$this.visitorInfo.visitorId"
]
]
}
]
}
}
}
}
}
}
])
expected output , two lists with unique visitorId & list of all visitorId:
[
{
"total": {
"uniquevisitor": [
"JohnID"
],
"visitor": [
"JohnID",
"JohnID"
]
}
}
]
playground
In the example query no filter is added for events.eventType:"log" , how can this be implemented without $unwind?
I am not sure this approach is more optimized than yours but might be this will help,
$filter to iterate loop of events and filter by eventType
$let to declare a variable events and store the above filters result
return array of visitor by using dot notation $$events.visitorInfo.visitorId
return array of unique visitor uniquevisitor by using dot notation $$events.visitorInfo.visitorId and $setUnion operator
db.collection.aggregate([
{ $match: { server: "strong" } },
{
$project: {
total: {
$let: {
vars: {
events: {
$filter: {
input: "$events",
cond: { $eq: ["$$this.eventType", "log"] }
}
}
},
in: {
visitor: "$$events.visitorInfo.visitorId",
uniquevisitor: {
$setUnion: "$$events.visitorInfo.visitorId"
}
}
}
}
}
}
])
Playground
Or similar approach without $let and two $project stages,
db.collection.aggregate([
{ $match: { server: "strong" } },
{
$project: {
events: {
$filter: {
input: "$events",
cond: { $eq: ["$$this.eventType", "log"] }
}
}
}
},
{
$project: {
total: {
visitor: "$events.visitorInfo.visitorId",
uniquevisitor: {
$setUnion: "$events.visitorInfo.visitorId"
}
}
}
}
])
Playground

How to update values in string array in all documents? - MongoDB

I have in my collection this structure:
{
_id: ObjectId('...'),
images: [
"images/key1",
"images/key2",
"images/key3",
"images/key4"
],
.... ,
....
}
So, I want to update all documents to:
{
_id: ObjectId('...'),
images: [
"key1",
"key2",
"key3",
"key4"
],
.... ,
....
}
Replacing in all values 'images/' with ''. Thanks 😁
you could done it with update aggregation like this
first match the doc and then in project use map and them split and choose last element
db.collection.update({},
[
{
$addFields: {
images: {
$map: {
input: "$images",
as: "i",
in: {
$last: {
$split: [
"$$i",
"images/"
]
}
}
}
}
}
}
],{multi:true})
https://mongoplayground.net/p/6fDBAlpKDBj
or use this
db.collection.update({},
[
{
$addFields: {
images: {
$map: {
input: "$images",
as: "i",
in: {
$arrayElemAt: [
{
$split: [
"$$i",
"images/"
]
},
1
]
}
}
}
}
}
],{multi:true})
replace $last with $arrayelementAt
https://mongoplayground.net/p/ecHMquZGazy

MongoDB get all documents where array values match

I have the following structure of document:
{
"input": {
"fields": [
{
"name": "last_name_hebrew",
"text": "test1",
},
],
},
"output": {
"fields": [
{
"name": "last_name_hebrew",
"text": "test1"
},
],
},
},
I want to get all documents, where fields has object that has name of value last_name_hebrew as with text value of the output.fields.
For example in the given structure it would return this documents because input.fields.name is last_name_hebrew and text is equal to the text in output.
Note I cannot guarantee that fields array in either input or output will have name: last_name_hebrew in the array.
How can I do so?
This is my try to first force the arrays to have document with name of last_name_hebrew:
db.collection.find({
"input.fields": {
$elemMatch: {
"name": "last_name_hebrew"
}
},
"output.fields": {
$elemMatch: {
"name": "last_name_hebrew"
}
},
})
But now I need to compare the text values.
Your first 2 condition with $elemMatch is correct
add expression match, first find the matching element that having last_name_hebrew name from input using $filter and get first element from that filtered result using $arrayElemAt, same process for output field and then match both object using $eq
db.collection.find({
"input.fields": { $elemMatch: { "name": "last_name_hebrew" } },
"output.fields": { $elemMatch: { "name": "last_name_hebrew" } },
$expr: {
$eq: [
{
$arrayElemAt: [
{
$filter: {
input: "$input.fields",
cond: { $eq: ["$$this.name", "last_name_hebrew"] }
}
},
0
]
},
{
$arrayElemAt: [
{
$filter: {
input: "$output.fields",
cond: { $eq: ["$$this.name", "last_name_hebrew"] }
}
},
0
]
}
]
}
});
Playground
Second option: if you want to go with more specific to match exact 2 fields name and text both just need to add $let operator to return fields from filter,
db.collection.find({
"input.fields": { $elemMatch: { "name": "last_name_hebrew" } },
"output.fields": { $elemMatch: { "name": "last_name_hebrew" } },
$expr: {
$eq: [
{
$let: {
vars: {
input: {
$arrayElemAt: [
{
$filter: {
input: "$input.fields",
cond: { $eq: ["$$this.name", "last_name_hebrew"] }
}
},
0
]
}
},
in: { name: "$$input.name", text: "$$input.text" }
}
},
{
$let: {
vars: {
output: {
$arrayElemAt: [
{
$filter: {
input: "$output.fields",
cond: { $eq: ["$$this.name", "last_name_hebrew"] }
}
},
0
]
}
},
in: { name: "$$output.name", text: "$$output.text" }
}
}
]
}
})
Playground
Third option: for more specific to check both fields in loop,
first filter the matching elements by name in input field using $filter
pass above filter result in another filter
filter to match name and text field in output field, if its not [] empty then return filter result
$ne to check return result is not [] empty
db.collection.find({
"input.fields": { $elemMatch: { "name": "last_name_hebrew" } },
"output.fields": { $elemMatch: { "name": "last_name_hebrew" } },
$expr: {
$ne: [
{
$filter: {
input: {
$filter: {
input: "$input.fields",
cond: { $eq: ["$$this.name", "last_name_hebrew"] }
}
},
as: "i",
cond: {
$ne: [
{
$filter: {
input: "$output.fields",
cond: {
$and: [
{ $eq: ["$$this.name", "$$i.name"] },
{ $eq: ["$$this.text", "$$i.text"] }
]
}
}
},
[]
]
}
}
},
[]
]
}
})
Playground
You will have to use an aggregation pipeline to achieve this, there are several ways to do so, here is one example:
db.collection.aggregate([
{
$match: {
$expr: {
$gt: [
{
$size: {
$filter: {
input: "$input.fields",
as: "inputField",
cond: {
$and: [
{
$eq: [
"$$inputField.name",
"last_name_hebrew"
]
},
{
"$setIsSubset": [
[
"$$inputField.text"
],
"$output.fields.text"
]
}
]
}
}
}
},
0
]
}
}
}
])
Mongo Playground
One thing to note is that with this query there are no restrictions on the output.fields.name (as it was not required), if you do require the names to match then you can drop the .text field in the $setIsSubset operator.

Splitting an alphanumeric string like "3a" or "32ab" in mongodb aggregation pipeline

I would like to split an alphanumeric string like 3a into "3" and "a". Please help if any one has an idea. I can't use the $split in mongodb aggregation.
I'm not sure that this is efficient, but this answer may give you a solution.
Since we can't use regex in $split,
First stage - divide the sentence into words and store in char[]
Flat the char[] using $unwind
Categorize all string into strings[] and all numbers into numbers[] using $facet. Here we use $match with regex
Then combined as what you need.
Assume this is your string.
{
char:"32ab"
}
The mongo script might be,
db.collection.aggregate([{$addFields: {
'char': {
$map: {
input: {
$range: [
0,
{
$strLenCP: '$char'
}
]
},
'in': {
$substrCP: [
'$char',
'$$this',
1
]
}
}
}
}}, {$unwind: {
path: '$char',
preserveNullAndEmptyArrays: false
}}, {$facet: {
strings: [
{
$match: {
'char': RegExp('^[A-Za-z]+$')
}
},
{
$group: {
_id: null,
arr: {
$push: '$char'
}
}
},
{
$project: {
combined: {
$reduce: {
input: '$arr',
initialValue: '',
'in': {
$concat: [
'$$value',
'$$this'
]
}
}
}
}
}
],
numbers: [
{
$match: {
'char': {
$not: RegExp('^[A-Za-z]+$')
}
}
},
{
$group: {
_id: null,
arr: {
$push: '$char'
}
}
},
{
$project: {
combined: {
$reduce: {
input: '$arr',
initialValue: '',
'in': {
$concat: [
'$$value',
'$$this'
]
}
}
}
}
}
]
}}, {$project: {
string: {
$arrayElemAt: [
{
$ifNull: [
'$strings.combined',
''
]
},
0
]
},
number: {
$toInt:{
$arrayElemAt: [
{
$ifNull: [
'$numbers.combined',
''
]
},
0
]
}
}
}}])
And the output is
{
string : "ab",
numbers: 32
}

Query to find connected components in mongodb graph collection?

I want to group the connected component in the mongodb collection.
Example:
{ '_id': 1, 'data': '...', 'similar_id': [2,3,4] }
{ '_id': 2, 'data': '...', 'similar_id': [1] }
{ '_id': 3, 'data': '...', 'similar_id': [1,4] }
{ '_id': 4, 'data': '...', 'similar_id': [1,3] }
{ '_id': 7, 'data': '...', 'similar_id': [2,3,4] }
{ '_id': 5, 'data': '...', 'similar_id': [6] }
{ '_id': 6, 'data': '...', 'similar_id': [5] }
Diagram for above network.
So I want a query which can find connected components.
{ '_id': ..., 'groups': {[1,2,3,4], [5,6], [7]} }
The result may not need to look like above but only in some form such that they are separated in different groups.
It ain't pretty but this is what I got, a brief description of my strategy was initially creating two groups of nodes. one contains node that are "connected" (i.e both x=>y and y=>x edges exist). and the other are potential single nodes. meaning they had one or zero of the x=>y or y=>x edges.
Once achieving this all we have to do is reducing the array by connecting connected nodes.
Mind you I fully believe this is not the "best" way to achieve the result you want as I just focused on getting it done without over thinking about performance or redundancy. with that said I'm define myself as a Mongo enthusiast and I would definitely say I struggled with this a little. For me this is usually a red flag that says my schema or db solution is wrong (maybe use a graph db?). Again these are just my opinions and it's entirely possible I just tangled myself with this pipeline.
It's worth mentioning I considered an approach using $graphLookup however on a fully connected or nearly fully connected graph this has a required depth usage of n where n=number of node, eventually I decided against it although thi approach might viable if you have any prior knowledge that can limit the depth to a certain constant.
db.collection.aggregate([
{
$unwind: {
path: "$similar_id",
preserveNullAndEmptyArrays: true
}
},
{
$addFields: {
similar_id: {
$ifNull: [
"$similar_id",
"$_id"
]
}
}
},
{
$sort: {
_id: 1,
similar_id: -1
}
},
{
$addFields: {
tmpId: {
$cond: [
{
$gt: [
"$similar_id",
"$_id"
]
},
[
"$_id",
"$similar_id"
],
[
"$similar_id",
"$_id"
]
]
}
}
},
{
$group: {
_id: "$tmpId",
sum: {
$sum: 1
}
}
},
{
$facet: {
single: [
{
$match: {
sum: 1
}
},
{
$unwind: "$_id"
},
{
$group: {
_id: null,
potentionals: {
$addToSet: "$_id"
}
}
}
],
clusters: [
{
$match: {
sum: 2
}
},
{
$group: {
_id: null,
edges: {
$addToSet: "$_id"
},
}
},
{
$project: {
all: {
$reduce: {
input: "$edges",
initialValue: [],
in: {
$setUnion: [
"$$this",
"$$value"
]
}
}
},
groups: {
$reduce: {
input: "$edges",
initialValue: [],
in: {
$cond: [
{
$gt: [
{
$size: {
$filter: {
input: "$$value",
as: "subgroup",
cond: {
$gt: [
{
$size: {
$setIntersection: [
"$$subgroup",
"$$this"
]
}
},
0
]
}
}
}
},
0
]
},
{
$map: {
input: "$$value",
as: "subgroup",
in: {
$cond: [
{
$gt: [
{
$size: {
$setIntersection: [
"$$subgroup",
"$$this"
]
}
},
0
]
},
{
"$setUnion": [
"$$this",
"$$subgroup"
]
},
"$$subgroup"
]
}
}
},
{
$concatArrays: [
"$$value",
[
"$$this"
]
]
}
]
}
}
}
}
}
]
}
},
{
$unwind: {
path: "$single",
preserveNullAndEmptyArrays: true
}
},
{
$unwind: {
path: "$clusters",
preserveNullAndEmptyArrays: true
}
},
{
$project: {
groups: {
$concatArrays: [
"$clusters.groups",
{
$map: {
input: {
$filter: {
input: "$single.potentionals",
as: "pot",
cond: {
$eq: [
{
$size: {
$setIntersection: [
[
"$$pot"
],
"$clusters.all"
]
}
},
0
]
}
}
},
as: "single",
in: [
"$$single"
]
}
}
]
}
}
}
])
MongoPlayground
Sorry for such late reply, but maybe other will find this useful.
You can try using NetworkX library in Python.
1st unwind similar_id to have documents with pairs {'_id':1,'similar_id':2}
import networkx as nx
unwind={'$unwind':'$similar_id'}
pipeline=[unwind]
cursor=db.collection.aggregate(pipeline)
G=nx.Graph()
for c in cursor:
G.add_edge(c['_id'],c['similar_id'])
all_clusters=list(nx.connected_components(G)) # a list of all connected components
len(all_clusters) # number of connected components