PostgreSQL query on Json having nested Array - postgresql

I have below json data stored in Postgres DB with column as json:
{"users": [
{
"id": 1,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
},
{
"pos": "BA3",
"endDate": "2023-03-31",
"startDate": "2022-10-01"
},
{
"pos": "BA4",
"endDate": "2023-06-08",
"startDate": "2023-04-01"
}
]
},
{
"id": 2,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
},
{
"pos": "BA3",
"endDate": "2023-03-31",
"startDate": "2022-10-01"
},
{
"pos": "BA4",
"endDate": "2023-06-08",
"startDate": "2023-04-01"
}
]
},
{
"id": 3,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
},
{
"pos": "BA3",
"endDate": "2023-03-31",
"startDate": "2022-10-01"
},
{
"pos": "BA4",
"endDate": "2023-06-08",
"startDate": "2023-04-01"
}
]
}
]
}
I need to write a query to filter the data based on the startDate and endDate.
So if the startDate is 2022-01-01 and endDate is 2022-12-31 then query should return below json:
{"users": [
{
"id": 1,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
}
]
},
{
"id": 2,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
}
]
},
{
"id": 3,
"data": [
{
"pos": "BA",
"endDate": "2022-07-31",
"startDate": "2022-01-01"
},
{
"pos": "BA2",
"endDate": "2022-09-30",
"startDate": "2022-08-01"
}
]
}
]
}
Can someone help me with the query ?

Well, this is a bit complicated. Inside-out, first flatten the JSON field then filter and finally re-aggregate into a JSON object again. Please note however that it is worth reviewing the data design. A normalized one would make things way better and easier. Please also note that it is extremely difficult to maintain data quality and consistency in a complex composite JSON field while supported out-of-the-box in a proper normalized model.
select json_build_object('users', json_agg(t))
from
(
select jvalue ->> 'id' as id, json_agg(j) as data
from
json_array_elements
(
'{"users":[
{"id":1,"data":[{"pos":"BA","endDate":"2022-07-31","startDate":"2022-01-01"},{"pos":"BA2","endDate":"2022-09-30","startDate":"2022-08-01"},{"pos":"BA3","endDate":"2023-03-31","startDate":"2022-10-01"},{"pos":"BA4","endDate":"2023-06-08","startDate":"2023-04-01"}]},
{"id":2,"data":[{"pos":"BA","endDate":"2022-07-31","startDate":"2022-01-01"},{"pos":"BA2","endDate":"2022-09-30","startDate":"2022-08-01"},{"pos":"BA3","endDate":"2023-03-31","startDate":"2022-10-01"},{"pos":"BA4","endDate":"2023-06-08","startDate":"2023-04-01"}]},
{"id":3,"data":[{"pos":"BA","endDate":"2022-07-31","startDate":"2022-01-01"},{"pos":"BA2","endDate":"2022-09-30","startDate":"2022-08-01"},{"pos":"BA3","endDate":"2023-03-31","startDate":"2022-10-01"},{"pos":"BA4","endDate":"2023-06-08","startDate":"2023-04-01"}]}
]}'::json -> 'users'
) as jvalue
cross join lateral json_array_elements(jvalue -> 'data') as j(value_object)
where (value_object ->> 'startDate')::date >= '2022-01-01'
and (value_object ->> 'endDate')::date <= '2022-12-31'
group by id
) as t;

Related

mongodb agregate and filter data

I try to filter some results data from mongodb with mongoose in javascript.
This is my json structure:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": 6,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
In some case i need to return json data with hiding some values. For example I have a list that specifies all the values ​​to hide
hidekeys=["egg"];
and i would like to get this:
{
"name": "john",
"firstname": "doe",
"yearold": 22,
"recipes": [
{
"title": "cheesecake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "oil",
"label": "Specific oil",
"unit": "oz",
"value": 0.2
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 450,
"unit": "gr"
}
]
},
{
"title": "cake",
"data": [
{
"name": "egg",
"label": "Eggs for",
"value": #######,
"unit": "piece"
},
{
"name": "flour",
"label": "Wholemel flour",
"value": 500,
"unit": "gr"
},
]
}
]
}
For each recipe i need to hide ingredient value if it is specified in hidekeys.
I tried something with $project and $cond but it doesnt works
Here's a quick way of how to achieve this using $map
const hidekeys = ["egg"];
db.collection.aggregate([
{
$addFields: {
recipes: {
$map: {
input: "$recipes",
as: "recipe",
in: {
$mergeObjects: [
"$$recipe",
{
data: {
$map: {
input: "$$recipe.data",
as: "datum",
in: {
"$mergeObjects": [
"$$datum",
{
$cond: [
{
"$setIsSubset": [
[
"$$datum.name"
],
hidekeys
]
},
{
value: "#####"
},
{
value: "$$datum.value"
}
]
}
]
}
}
}
}
]
}
}
}
}
}
])
Mongo Playground

Mongdb aggregate do second group inside first group stage output

I don't know if it's either possible but I would like to obtain a specific output from an aggregate pipeline.
Exemples objects:
{
"_id": "6001d736e6dc1c55e893158d",
"manager": "6000da590ed6253807158216",
"label": "Test",
"identifier": "Test",
"interval": 11,
"unit": "X",
"created_at": "2021-01-15T17:56:06.749Z",
"updated_at": "2021-01-21T12:21:35.670Z",
"__v": 0
},
{
"_id": "6030236f976756b0b2d74556",
"manager": "6022f3285752fec73393bda2",
"label": "Temperature salon",
"identifier": "DS18B20_TEMP",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-19T20:45:35.847Z",
"updated_at": "2021-02-19T20:45:35.847Z",
"__v": 0
}
I'm trying to obtain a group by date AND by unit(field in object), i succeed to do it separatly but i can't find a solution to do both in the same pipeline.
Expected output if i do the first group by month:
{
"_id": "2021-01-00T00:00:00.000Z",
"X": objectsArray[],
"°C": objetcsArray[]
},
{
"_id": "2021-02-00T00:00:00.000Z",
"X": objectsArray[],
"°C": objetcsArray[]
}
What i have for the moment with this group:
{
'_id': {
'$add': [
{ '$subtract': [
{ '$subtract': [ '$created_at', new Date(0) ] },
{ '$mod': [
{ '$subtract': [ '$created_at', new Date(0) ] },
this.millisecondsIn(interval),
]},
]},
new Date(0),
]
},
sensors: {
$addToSet: '$$ROOT',
},
}
{
"_id": "2021-01-21T00:00:00.000Z",
"sensors": [
{
"_id": "601ab8f623224a5387c6252d",
"manager": "6000da590ed6253807158216",
"label": "Test",
"identifier": "Test2",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-03T14:53:42.538Z",
"updated_at": "2021-02-03T14:53:42.538Z",
"__v": 0
},
{
"_id": "6029ad3dda9bafb99cf0b4d5",
"manager": "6022f3285752fec73393bda2",
"label": "Test sensor 1",
"identifier": "RANDOMID",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-14T23:07:41.255Z",
"updated_at": "2021-02-14T23:07:41.255Z",
"__v": 0
}
]
},
{
"_id": "2020-12-24T00:00:00.000Z",
"sensors": [
{
"_id": "6001917f41c38212a477a2ce",
"manager": "6000da590ed6253807158216",
"label": "Test label",
"identifier": "TEst id",
"interval": 10,
"unit": "%",
"created_at": "2021-01-15T12:58:39.514Z",
"updated_at": "2021-01-16T19:08:40.239Z",
"__v": 0
},
{
"_id": "6001d736e6dc1c55e893158d",
"manager": "6000da590ed6253807158216",
"label": "Test",
"identifier": "Test",
"interval": 11,
"unit": "X",
"created_at": "2021-01-15T17:56:06.749Z",
"updated_at": "2021-01-21T12:21:35.670Z",
"__v": 0
}
]
},
{
"_id": "2021-02-18T00:00:00.000Z",
"sensors": [
{
"_id": "6030238d976756b0b2d74557",
"manager": "6022f3285752fec73393bda2",
"label": "Taux d'humidité salon",
"identifier": "DHT_22_HUM",
"interval": 60,
"unit": "%",
"created_at": "2021-02-19T20:46:05.042Z",
"updated_at": "2021-02-19T20:46:05.042Z",
"__v": 0
},
{
"_id": "60302357976756b0b2d74555",
"manager": "6022f3285752fec73393bda2",
"label": "Temperature salon",
"identifier": "DTH_22_TEMP",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-19T20:45:11.071Z",
"updated_at": "2021-02-19T20:45:11.071Z",
"__v": 0
},
{
"_id": "6030236f976756b0b2d74556",
"manager": "6022f3285752fec73393bda2",
"label": "Temperature salon",
"identifier": "DS18B20_TEMP",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-19T20:45:35.847Z",
"updated_at": "2021-02-19T20:45:35.847Z",
"__v": 0
}
]
}
Does anyone know if the wanted output is possible and if it is, how ?
Thanks
The general steps would be:
$group by unit and date, pushing all of the documents into a sensors array
$project to create a new field with {k: <unit value>, v: <sensors array>}
$group by date, pushing the new field into an array
$project with $arrayToObject to convert array
$addFields to include the date in the new object
$replaceRoot to promote the new object
Thank you so much Joe, after few hours and many tries I end up with this:
this.sensorModel
.aggregate()
.group({
'_id': {
date: { '$year': '$created_at' },
'unit': '$unit',
},
test: { '$addToSet': '$$ROOT' },
})
.project({
newField: {
k: '$_id.unit',
v: '$test',
}
})
.group({
'_id': '$_id.date',
data: { '$addToSet': '$newField', },
})
.project({
'sensors': { $arrayToObject: '$data', },
})
.replaceRoot({
$arrayToObject: [[{k: {$toString: '$_id'}, v: '$sensors'}]]
})
.exec();
The result is almost perfect:
[
{
"2021": {
"°C": [
{
"_id": "6029ad3dda9bafb99cf0b4d5",
"manager": "6022f3285752fec73393bda2",
"label": "Test sensor 1",
"identifier": "RANDOMID",
"interval": 60,
"unit": "°C",
"created_at": "2021-02-14T23:07:41.255Z",
"updated_at": "2021-02-14T23:07:41.255Z",
"__v": 0
},
],
"X": [
{
"_id": "6001d736e6dc1c55e893158d",
"manager": "6000da590ed6253807158216",
"label": "Test",
"identifier": "Test",
"interval": 11,
"unit": "X",
"created_at": "2021-01-15T17:56:06.749Z",
"updated_at": "2021-01-21T12:21:35.670Z",
"__v": 0
}
],
"%": [
{
"_id": "6030238d976756b0b2d74557",
"manager": "6022f3285752fec73393bda2",
"label": "Taux d'humidité salon",
"identifier": "DHT_22_HUM",
"interval": 60,
"unit": "%",
"created_at": "2021-02-19T20:46:05.042Z",
"updated_at": "2021-02-19T20:46:05.042Z",
"__v": 0
},
]
}
}
]

MongoDb extract array

I have a mongodb which I want to extract some specific data.
Here is my json:
{
"jobs" : [
{
"id": 554523,
"code": "1256-554523",
"name": "Banco de Talentos",
"status": "published",
"type": "vacancy_type_effective",
"publicationType": "external",
"numVacancies": 1,
"departmentId": 108141,
"departmentName": "FUTURAS OPORTUNIDADES",
"roleId": 169970,
"roleName": "BANCO DE TALENTOS",
"createdAt": "2020-10-30T12:23:48.572Z",
"updatedAt": "2020-12-30T23:21:30.403Z",
"branchId": null,
"branchName": null
},
{
"id": 616834,
"code": "1256-616834",
"name": "YYYYYY (o) YYYYY",
"status": "frozen",
"type": "vacancy_type_effective",
"publicationType": "external",
"numVacancies": 1,
"departmentId": 109190,
"departmentName": "TESTE TESTE",
"roleId": 165712,
"roleName": "SL - TESTE PL",
"createdAt": "2020-12-16T14:17:36.187Z",
"updatedAt": "2021-01-29T17:08:43.613Z",
"branchId": 120448,
"branchName": "TESTE TESTE1"
}
],
"application": [
{
"id": 50707344,
"score": 40.251965332031254,
"partnerName": "indeed",
"endedAt": null,
"createdAt": "2020-12-21T11:21:30.587Z",
"updatedAt": "2021-02-18T22:02:35.866Z",
"tags": {},
"candidate": {
"birthdate": "1986-04-04",
"id": 578615,
"name": "TESTE",
"lastName": "TESTE TESTE",
"email": "teste#teste.com.br",
"identificationDocument": "34356792807",
"countryOfOrigin": "BR",
"linkedinProfileUrl": "teste",
"gender": "female",
"mobileNumber": "+5511972319799",
"phoneNumber": "(11)2463-2039"
},
"job": {
"id": 619713,
"name": "XXXXde XXXX Pleno"
},
"manualCandidate": null,
"currentStep": {
"id": 3527370,
"name": "Cadastro",
"status": "done"
}
},
{
"id": 50707915,
"score": 3.75547943115234E+1,
"partnerName": "indeed",
"endedAt": null,
"createdAt": "2020-12-21T11:31:31.877Z",
"updatedAt": "2021-02-18T14:07:06.605Z",
"tags": {},
"candidate": {
"birthdate": "1971-10-02",
"id": 919358,
"name": "TESTE TESTE",
"lastName": "SILVA",
"email": "teste.teste#teste.com",
"identificationDocument": "3232323232",
"countryOfOrigin": "BR",
"linkedinProfileUrl": "teste/",
"gender": "female",
"mobileNumber": "11 94021- 5521",
"phoneNumber": "+5511995685247"
},
"job": {
"id": 619713,
"name": "Analista de XXXXX Pleno"
},
"manualCandidate": null,
"currentStep": {
"id": 3527370,
"name": "Cadastro",
"status": "done"
}
}
]
}
My question is: How can I extract only the array objects in jobs and application separately? Anyone knows the code in Mongodb for do this?
I need do this task for after I can insert the extract separated data in different collections.
Thanks a lot.
Unfortunately there is no real "good" way of doing this. Here is an example of how I would do it by using $facet and other operators to manipulate the structure
db.collection.aggregate([
{
$match: {
/** your query*/
}
},
{
$facet: {
jobs: [
{
$unwind: "$jobs"
},
{
$replaceRoot: {
newRoot: "$jobs"
}
}
],
applications: [
{
$unwind: "$application"
},
{
$replaceRoot: {
newRoot: "$application"
}
}
]
}
},
{
$addFields: {
"merged": {
"$concatArrays": [
"$jobs",
"$applications"
]
}
}
},
{
$unwind: "$merged"
},
{
"$replaceRoot": {
"newRoot": "$merged"
}
}
])
Mongo Playground
I would personally just do it in code after you fetched the document.

Find specific field in array by last date and updating it

I have the document below, and I want to update in exams, the last exam with status "started" (according to date), I need to add to "done" another number
{
"_id": ObjectID("5d2a371cec7eaf00119df614"),
"firstname": "Laura",
"lastname": "Warriner",
"image": "2019-07-14t05:25:23.352z_13.jpg",
"exams": [
{
"examId": "Ba6apmRmz",
"name": "general",
"language": "es",
"version": 2,
"testId": "2000",
"status": [
{
"status": "created",
"date": ISODate("2019-09-08T11:49:42.124Z")
},
{
"status": "started",
"date": ISODate("2019-09-08T11:55:09.873Z"),
"done": [1]
},
{
"status": "started",
"date": ISODate("2019-09-09T12:01:57.886Z"),
"done": [2,3]
},
{
"status": "completed",
"date": ISODate("2019-09-09T12:01:57.886Z")
}
]
}
]
}
Thanks in advance

What's the correct syntax for given mongodb query?

Question: Write a MongoDB query to find the restaurants which locates in latitude value less than -95.754168.
Structure of 'restaurants' collection
{
"address": {
"building": "1007",
"coord": [ -73.856077, 40.848447 ],
"street": "Morris Park Ave",
"zipcode": "10462"
},
"borough": "Bronx",
"cuisine": "Bakery",
"grades": [
{ "date": { "$date": 1393804800000 }, "grade": "A", "score": 2 },
{ "date": { "$date": 1378857600000 }, "grade": "A", "score": 6 },
{ "date": { "$date": 1358985600000 }, "grade": "A", "score": 10 },
{ "date": { "$date": 1322006400000 }, "grade": "A", "score": 9 },
{ "date": { "$date": 1299715200000 }, "grade": "B", "score": 14 }
],
"name": "Morris Park Bake Shop",
"restaurant_id": "30075445"
}
Author's Answer is : db.restaurants.find({"address.coord" : {$lt : -95.754168}});
Is this the correct answer?
If no then what is the correct answer?
Resource: http://www.w3resource.com/mongodb-exercises/#PracticeOnline
You can go into your coord index like this
db.restaurants.find({"address.coord.0" : {$lt: -95.754168}});
According to the documentation here,
MongoDB uses the dot notation to access the elements of an array and to access the fields of an embedded document.
E.g. <array>.<index>
Note that the index is 0-based.