MongoDB Aggregation on multiple nested arrays - mongodb

I'm trying to work out how to query a document which has two layers of nested arrays.
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
I want to be able to search through the collection and return all of the subdomains where where there is a match on the port number supplied. So far I've tried (in PyMongo)
result = db.aggregate([{'$match': {'subdomains.shodan.ports.port': port}},
{'$project': {
'subdomains': {'$filter': {
'input': '$subdomains.shodan.ports',
'cond': {'$eq': ['$$this.port', port]}
}}
}}])
When I run this I don't get any results back at all. I've played around with my $filter but can't seem to get any results out. I'm using a similar aggregation for querying within just the subdomains array and it works fine, I'm just struggling with the array within an array and wondering if I need a different approach.

Try aggregate pipeline below:
db.collection.aggregate([
{
$unwind: "$subdomains"
},
{
$match: {
"subdomains.shodan.ports": {
$elemMatch: {
port: 443
},
$ne: null
}
}
},
{
$group: {
_id: "$_id",
"org-name": {
$last: "$org-name"
},
"domain": {
$last: "$domain"
},
"subdomains": {
$push: "$subdomains"
}
}
}
])
giving output:
[
{
"_id": ObjectId("5d7fb679d76f3bbf82ed952e"),
"domain": "shropscommunityhealth.nhs.uk",
"org-name": "Shropshire Community Health NHS Trust",
"subdomains": [
{
"a_rr": "195.49.146.9",
"asn": 21472,
"asn_org": "ServerHouse Ltd",
"city": "Portsmouth",
"country": "United Kingdom",
"data_retrieved": ISODate("2019-09-16T17:21:11.468Z"),
"firstSeen": "2015-10-17 01:10:00",
"name": "www.shropscommunityhealth.nhs.uk",
"shodan": {
"ports": [
{
"cpe": "cpe:/a:microsoft:internet_information_server:8.5",
"port": 443,
"product": "Microsoft IIS httpd"
},
{
"cpe": "cpe:/o:microsoft:windows",
"port": 80,
"product": "Microsoft HTTPAPI httpd"
}
],
"timestamp": ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}
]

The following query can get us the expected output:
db.collection.aggregate([
{
$project:{
"subdomains":{
$filter:{
"input":"$subdomains",
"as":"subdomain",
"cond":{
$in:[
443,
{
$ifNull:[
"$$subdomain.shodan.ports.port",
[]
]
}
]
}
}
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
},
{
"name" : "www2.shropscommunityhealth.nhs.uk",
"firstSeen" : "2017-06-23 16:55:00",
"a_rr" : "80.175.25.17",
"data_retrieved" : ISODate("2019-09-16T17:21:12.663Z"),
"asn" : 8607,
"asn_org" : "Timico Limited",
"city" : null,
"country" : "United Kingdom",
"shodan" : {
"timestamp" : ISODate("2019-09-16T17:21:13.664Z")
}
}
]
}
Output:
{
"_id" : ObjectId("5d7fb679d76f3bbf82ed952e"),
"org-name" : "Shropshire Community Health NHS Trust",
"domain" : "shropscommunityhealth.nhs.uk",
"subdomains" : [
{
"name" : "www.shropscommunityhealth.nhs.uk",
"firstSeen" : "2015-10-17 01:10:00",
"a_rr" : "195.49.146.9",
"data_retrieved" : ISODate("2019-09-16T17:21:11.468Z"),
"asn" : 21472,
"asn_org" : "ServerHouse Ltd",
"city" : "Portsmouth",
"country" : "United Kingdom",
"shodan" : {
"ports" : [
{
"port" : 443,
"cpe" : "cpe:/a:microsoft:internet_information_server:8.5",
"product" : "Microsoft IIS httpd"
},
{
"port" : 80,
"cpe" : "cpe:/o:microsoft:windows",
"product" : "Microsoft HTTPAPI httpd"
}
],
"timestamp" : ISODate("2019-09-16T17:21:12.659Z")
}
}
]
}

Related

Requests in mongodb

I have three objects. I am trying to fing the parents who haven't got a job. I am writing this code:
db.getCollection('students').find({
'parents.profession':{$exists: false}
})
I have no mistakes, but it is loking for me users who haven't got a parents. What am i doing wrong
My Objects:
{
"_id" : ObjectId("60c1bd314ed90f98fbbf9d5b"),
"name" : "Ivan",
"class" : 2.0,
"lessons" : [
"basic"
],
"avgScore" : 4.2,
"parents" : [
{
"gender" : "male",
"name" : "Ivan",
"profession" : "trainer"
},
{
"gender" : "female",
"name" : "Vika"
}
]
}
{
"_id" : ObjectId("60c1bd314ed90f98fbbf9d5d"),
"name" : "Kostya",
"class" : 2.0,
"lessons" : [
"basic"
],
"avgScore" : 4.24,
"parents" : [
{
"gender" : "male",
"name" : "Ivan",
"profession" : "blogger"
},
{
"gender" : "male",
"name" : "Andriy",
"profession" : "blogger"
}
]
}
Use $elemMatch:
db.collection.find({
"parents": {
"$elemMatch": {
profession: {
$exists: false
}
}
}
})
Here is the working example: https://mongoplayground.net/p/XT8JJdZ9L5H

Spring Data Mongo - How to get the nested distinct array for nested value?

I'm taking a reference from : Spring Data Mongo - Perform Distinct, but doesn't wants to pull embedded documents in results and asking another questions.
I want to find technology list where "subdeptCd" : "1D". How can we do that ?
{
"firstName" : "Laxmi",
"lastName" : "Dekate",
.....
.......
.....
"departments" : {
"deptCd" : "Tax",
"deptName" : "Tax Handling Dept",
"status" : "A",
"subdepts" : [
{
"subdeptCd" : "1D",
"subdeptName" : "Tax Clearning",
"desc" : "",
"status" : "A"
"technology" : [
{
"technologyCd" : "4C",
"technologyName" : "Cloud Computing",
"desc" : "This is best certficate",
"status" : "A"
}
]
}
]
},
},
{
"firstName" : "Neha",
"lastName" : "Parate",
.....
.......
.....
"departments" : {
"deptCd" : "Tax Rebate",
"deptName" : "Tax Rebate Handling Dept",
"status" : "A",
"subdepts" : [
{
"subdeptCd" : "1D",
"subdeptName" : "Tax Clearning",
"desc" : "",
"status" : "A"
"technology" : [
{
"technologyCd" : "9C",
"technologyName" : "Spring Cloud",
"desc" : "This is best certficate post Google",
"status" : "A"
}
]
}
]
},
}
You can get distinct technologies (technology array elements) with this aggregation:
db.depts.aggregate( [
{
$unwind: "$departments.subdepts"
},
{
$unwind: "$departments.subdepts.technology"
},
{
$match: { "departments.subdepts.subdeptCd": "1D" }
},
{
$group: { _id: "$departments.subdepts.technology.technologyCd", tech: { $first: "$departments.subdepts.technology" } }
},
{
$replaceRoot: { newRoot: "$tech" }
}
] )

Problems aggregating MongoDB

I am having problems aggregating my Product Document in MongoDB.
My Product Document is:
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T18:25:48.026+01:00"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
},
And my Category Document is:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T18:25:47.196+01:00")
},
My aim is to perform aggregation on the Product Document in order to count the number of items within each Category. So I have the Category "Art", I need to count the products are in the "Art" Category:
My current aggregate:
db.product.aggregate(
{ $unwind : "$categories" },
{
$group : {
"_id" : { "name" : "$name" },
"doc" : { $push : { "category" : "$categories" } },
}
},
{ $unwind : "$doc" },
{
$project : {
"_id" : 0,
"name" : "$name",
"category" : "$doc.category"
}
},
{
$group : {
"_id" : "$category",
"name": { "$first": "$name" },
"items_in_cat" : { $sum : 1 }
}
},
{ "$sort" : { "items_in_cat" : -1 } },
)
Which does actually work but not as I need:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : null, // Why is the name of the category no here?
"items_in_cat" : 4
},
As we can see the name is null. How can I aggregate the output to be:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : "Art",
"items_in_cat" : 4
},
We need to use $lookup to fetch the name from Category collection.
The following query can get us the expected output:
db.product.aggregate([
{
$unwind:"$categories"
},
{
$group:{
"_id":"$categories",
"items_in_cat":{
$sum:1
}
}
},
{
$lookup:{
"from":"category",
"let":{
"id":"$_id"
},
"pipeline":[
{
$match:{
$expr:{
$eq:["$_id","$$id"]
}
}
},
{
$project:{
"_id":0,
"name":1
}
}
],
"as":"categoryLookup"
}
},
{
$unwind:{
"path":"$categoryLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"_id":1,
"name":{
$ifNull:["$categoryLookup.name","NA"]
},
"items_in_cat":1
}
}
]).pretty()
Data set:
Collection: product
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T17:25:48.026Z"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
}
Collection: category
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Craft",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
Output:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"items_in_cat" : 1,
"name" : "Craft"
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"items_in_cat" : 1,
"name" : "Art"
}

Delete portion of Sub Document Collection in Mongodb

I am trying to delete a section of sub documents from a collections..
Tried (and many others):
db.collection.remove( {'Segments': {$gte: '20150612141038' }} )
db.collection.remove( { 'Segments.$' : {$gte: '0150612141038' }} )
db.collection.deleteMany( { 'Segments.$' : {$gte: '0150612141038' }} )
db.collection.deleteOne( { 'Segments.$' : {$eq: '0150612141038' }} )
just can't figure out what I am doing wrong...
{
"_id" : ObjectId("56e32c5147e030bc0a000035"),
"Date" : "2015-06-12",
"UnitID" : "5",
"Segments" : {
"20150612141037" : {
"Parameters" : {
"647" : "0",
"649" : "16",
"653" : "0",
"655" : "0",
"656" : "0",
"658" : "98",
"664" : "447.0486",
"666" : "442.7083",
"677" : "122.8004",
}
},
"20150612141038" : {
"Parameters" : {
"658" : "96",
"664" : "451.3889",
"666" : "447.0486",
"677" : "122.7892"
}
},
"20150612141039" : {
"Parameters" : {
"658" : "44",
"664" : "442.7083",
"677" : "122.8004",
"704" : "1"
}
},
First of all I think your queries are wrong.
If you try this query to the mongoShell:
db.bios.find({'Segments': {$gte: '20150612141038'}})
you don't have anything as output because 20150612141038 is a document and not a field. Maybe you can reshape your schema to set 20150612141038 as a field and Segments as an array of documents:
{
"_id" : ObjectId("56e32c5147e030bc0a000035"),
"Date" : "2015-06-12",
"UnitID" : "5",
"Segments" : [
{
"segmentId" : "20150612141037",
"Parameters" : {
"647" : "0",
"649" : "16",
"653" : "0",
"655" : "0",
"656" : "0",
"658" : "98",
"664" : "447.0486",
"666" : "442.7083",
"677" : "122.8004"
}
},
{
"segmentId" : "20150612141038",
"Parameters" : {
"658" : "96",
"664" : "451.3889",
"666" : "447.0486",
"677" : "122.7892"
}
},
{
"segmentId" : "20150612141039",
"Parameters" : {
"658" : "44",
"664" : "442.7083",
"677" : "122.8004",
"704" : "1"
}
}
]
}
and then you can use $pull operator to remove documents from Segments array where the field Segments.segmentId has a certain value:
db.collection.update({},
{$pull:{Segments:{"segmentId":{$eq:"20150612141039"}}}},
{ multi: true}
)

How to find count of a key in one document using mongoDB?

I have following structure in my collection:
users:[
{
"name":"ABC",
"address":{
"city":"London",
"country":"UK",
}
},
{
"name":"XYZ",
"address":{
"city":"London",
"country":"UK",
}
},
{
"name":"PQR",
"address":{
"city":"NewYork",
"country":"US",
}
}
]
I want count of number of occurrences of 'city' key in 'address' and 'name' as a result.
I want to query above collection and want following output:
[{
"name":"ABC",
"city":"London",
"count":2
},{
"name":"XYZ",
"city":"London",
"count":2
}, {
"name":"PQR",
"city":"NewYork",
"count":1
}
]
I simulated your collection
{
"_id" : ObjectId("547c30ae371ea419f07b9550"),
"users" : [
{
"name" : "ABC",
"address" : {
"city" : "London",
"country" : "UK"
}
},
{
"name" : "XYZ",
"address" : {
"city" : "London",
"country" : "UK"
}
},
{
"name" : "PQR",
"address" : {
"city" : "NewYork",
"country" : "US"
}
}
]
}
And then I use aggregate framework
db.coll.aggregate([
{
$unwind:"$users"
},
{
$group:{
_id:"$users.address.city",
name:{$push:"$users.name"},
city:{$first:"$users.address.city"},
count:{$sum:1}
}
},{
$unwind:"$name"
},{
$project:{
_id:0,
"city":"$_id",
"name":1,
"city":1,
"count":1
}
}])
result:
{
"result" : [
{
"name" : "PQR",
"city" : "NewYork",
"count" : 1
},
{
"name" : "ABC",
"city" : "London",
"count" : 2
},
{
"name" : "XYZ",
"city" : "London",
"count" : 2
}
],
"ok" : 1
}
UPDATE AFTER QUESTION
I added a new Document
{
"_id" : ObjectId("547c394c371ea419f07b9551"),
"users" : [
{
"address" : {
"city" : "Livorno",
"country" : "LI"
}
},
{
"address" : {
"city" : "Livorno",
"country" : "LI"
}
},
{
"address" : {
"city" : "NewYork",
"country" : "US"
}
}
]
}
and new Query
db.coll.aggregate([
{
$unwind:"$users"
},
{
$group:{
_id:"$users.address.city",
"name": {
$push:{"$ifNull": ["$users.name","$_id"]}
},
city:{$first:"$users.address.city"},
count:{$sum:1}
}
},{
$unwind:"$name"
},{
$project:{
_id:0,
"city":"$_id",
"name":1,
"city":1,
"count":1
}
}])
Result:
{
"result" : [
{
"name" : "PQR",
"city" : "NewYork",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "NewYork",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "Livorno",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "Livorno",
"count" : 2
},
{
"name" : "ABC",
"city" : "London",
"count" : 2
},
{
"name" : "XYZ",
"city" : "London",
"count" : 2
}
],
"ok" : 1
}