Spark Filtering rows in window functions

Spark Filtering rows in window functions - pyspark

I need to applying a window function is PySpark but have to ignore certain rows while doing it.
I have tried the below code.
from pyspark.sql import functions as F
from pyspark.sql.window import Window
df = (sc.parallelize([
{"id":"900","service":"MM", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-09-13 13:38:17.229" },
{"id":"900","service":"MM", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-09-13 13:38:17.242" },
{"id":"1527","service":"RA", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 14:52:02.331" },
{"id":"1527","service":"RT", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 14:52:02.490" },
{"id":"1527","service":"RP", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 14:52:02.647" },
{"id":"1504","service":"RA", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 22:28:25.095" },
{"id":"1504","service":"RT", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 22:28:25.253" },
{"id":"1504","service":"RP", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 22:28:25.372" },
{"id":"1504","service":"RV", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-10-17 22:28:25.732" },
{"id":"1504","service":"RA", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:53.445" },
{"id":"1504","service":"MT", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:53.643" },
{"id":"1504","service":"RA", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:53.924" },
{"id":"1504","service":"RT", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:54.094" },
{"id":"1504","service":"RP", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:54.243" },
{"id":"1504","service":"RV", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-09 02:05:54.732" },
{"id":"1504","service":"RA", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-11 20:52:30.764" },
{"id":"1504","service":"RV", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-11 20:52:31.099" },
{"id":"1504","service":"RT", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-11 20:52:33.363" },
{"id":"1504","service":"RV", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-11 20:52:33.677" },
{"id":"1504","service":"RP", "guid":"43158A8E-3DF2-4FD2-90C9-B73411BBE683" ,"time":"2018-11-11 20:52:39.572" }
]).toDF()
)
(
df
.withColumn
(
'rank',
F.when
(
(F.col('id') != 900),
F.row_number()
.over
(
Window.partitionBy
(
#F.when
#(
# (
# (F.col('id') != 90000)
#),
F.col('guid')
#)
)
.orderBy
(
F.col('time').asc()
)
)
)
)
.select
(
'id',
'service',
'guid',
'time',
'rank'
)
.show(truncate = False)
)
I almost have it but the row_numbers need to start from 1 instead of three in this case.
So in the rank column the number after the two nulls should be 1 instead of 3.

IIUC, you just need to add one temporary partition column with a value like id == 900 ? 0 : 1:
from pyspark.sql import Window, functions as F
# add `part` into partitionBy: (partition based on if id is 900)
win = Window.partitionBy('guid','part').orderBy('time')
# define part and then calculate rank
df = df.withColumn('part', F.when(df.id == 900, 0).otherwise(1)) \
.withColumn('rank', F.when(F.col('part')==1, F.row_number().over(win))) \
.drop('part')

Related

Sequelize Assosiative aggrigate function error

Error: SequelizeDatabaseError: column "receiving_product.id" must appear in the GROUP BY clause or be used in an aggregate function
Error: SequelizeDatabaseError: column "receiving_product.id" must appear in the GROUP BY clause or be used in an aggregate function
Error: SequelizeDatabaseError: column "receiving_product.id" must appear in the GROUP BY clause or be used in an aggregate function
Error: SequelizeDatabaseError: column "receiving_product.id" must appear in the GROUP BY clause or be used in an aggregate function
const singleRP = await db.receiving_product.findOne({
include: [
{
model: db.purchase_order,
as: "purchase_order",
include: [
{ model: db.vendor, as: "vendor" },
{
model: db.po_productlist,
as: "product_items",
attributes: [
"id",
"purchase_order_id",
"product_id",
"quantity",
"price",
"totalPrice",
"created_by",
"updated_by",
"tenant_id",
"createdAt",
"updatedAt",
[
db.sequelize.fn(
"SUM",
db.sequelize.col("purchase_order.product_items.receivinghistory.received_quantity")
),
"received_quantity",
],
],
include: [
{ model: db.product, as: "product" },
{
model: db.received_product,
as: "receivinghistory",
attributes: [
[
db.sequelize.fn(
"SUM",
db.sequelize.col("purchase_order.product_items.receivinghistory.received_quantity")
),
"received_quantity",
],
],
include: {
model: db.user,
as: "received_by",
},
},
{
model: db.product_serial,
as: "serials",
required: false,
where: {
rec_prod_id: id,
},
},
],
},
],
},
{
model: db.user,
as: "added_by",
include: {
model: db.role,
as: "roles",
},
},
],
where: {
[Op.and]: [
{
id,
tenant_id: TENANTID,
},
],
},
group: ['receiving_product.id']
});

Prisma - Sort by _sum

I do have simple groupBy query in my prisma which looks like this:
const groupBy = await prisma.referral.groupBy({
by: ['recommenderId'],
_sum: {
points: true,
},
});
What I am looking for is the way to sort by this _sum value.
The current response is:
{
"groupBy": [
{
"_sum": {
"points": 20000
},
"recommenderId": 3
},
{
"_sum": {
"points": 19000
},
"recommenderId": 2
},
{
"_sum": {
"points": 34000
},
"recommenderId": 1
}
]
}
What I need is to get is:
{
"groupBy": [
{
"_sum": {
"points": 34000
},
"recommenderId": 1
},
{
"_sum": {
"points": 20000
},
"recommenderId": 3
},
{
"_sum": {
"points": 19000
},
"recommenderId": 2
},
]
}
Based on documentation (https://www.prisma.io/docs/concepts/components/prisma-client/filtering-and-sorting#sorting) I tried something like this:
const groupBy = await prisma.referral.groupBy({
by: ['recommenderId'],
_sum: {
points: true,
},
orderBy: [
{
_sum: 'desc',
},
],
});
But with code I'm getting error:
Argument _sum: Got invalid value 'desc' on prisma.groupByReferral.
Provided String, expected ReferralSumOrderByAggregateInput

You can use _sum on different fields at the same time, so you also need to provide field name that you want to sort on:
const groupBy = await prisma.referral.groupBy({
by: ['recommenderId'],
_sum: {
points: true,
},
orderBy: [
{
_sum: {
// Add `points` key here
points: 'desc'
}
},
],
});

How do I count all the documents in a collection and use the cont in a controller, with MongoDB and Express.js?

I am working on a blogging application (click the link to see the GitHub repo) with Express (version 4.17.1), EJS and MongoDB (version 4.0.10).
Trying to paginate the posts I did the following, in the controller:
exports.getPosts = (req, res, next) => {
const perPage = 5;
const currPage = req.query.page ? parseInt(req.query.page) : 1;
let postsCount = 0;
const posts = Post.find({}, (err, posts) => {
postsCount = posts.length;
let pageDecrement = currPage > 1 ? 1 : 0;
let pageIncrement = postsCount >= perPage ? 1 : 0;
if (err) {
console.log('Error: ', err);
} else {
res.render('default/index', {
moment: moment,
layout: 'default/layout',
website_name: 'MEAN Blog',
page_heading: 'XPress News',
page_subheading: 'A MEAN Stack Blogging Application',
currPage: currPage,
posts: posts,
pageDecrement: pageDecrement,
pageIncrement: pageIncrement
});
}
})
.sort({
created_at: -1
})
.populate('category')
.limit(perPage)
.skip((currPage - 1) * perPage);
};
And in the view:
<a class="btn btn-primary <%= pageDecrement == 0 ? 'disabled' : '' %>" href="/?page=<%= currPage - pageDecrement %>">← Newer Posts</a>
and
<a class="btn btn-primary <%= pageIncrement == 0 ? 'disabled' : '' %>" href="/?page=<%= currPage + pageIncrement %>">Older Posts →</a>
That works fine unless there are is a number of posts equal to perPage x N, where N is an integer, in which case the "Older Posts" button becomes disabled one page too late.
That is because postsCount = posts.length counts the posts after they are limited by .skip((currPage - 1) * perPage).
So I need to count the posts from the model/collection and bring that count variable in the controller.
My model:
const mongoose = require('mongoose');
const postSchema = new mongoose.Schema({
title: {
type: String,
required: true
},
short_description: {
type: String,
required: true
},
full_text: {
type: String,
required: true
},
category: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Category'
},
post_image: {
type: String,
required: false
},
updated_at: {
type: Date,
default: Date.now()
},
created_at: {
type: Date,
default: Date.now()
}
});
module.exports = mongoose.model('Post', postSchema);
How do I count all the documents in the posts collection and use that number in the posts controller?

This can be done easier with mongodb aggregation framework.
We use $facet aggregation to get the paginated data along with the total number of documents.
In aggregation framework we use $lookup instead of mongoose populate. $lookup returns an array, to get the first item in array we use $arrayElemAt operator inside $addFields.
Playground
And here is the code to apply to your app:
(The first $match aggregation is unnecessary here, but I put in in case you may need it in the future)
exports.getPosts = async (req, res, next) => {
const perPage = 5;
const currPage = req.query.page ? parseInt(req.query.page) : 1;
const skip = (currPage - 1) * perPage;
try {
const result = await Post.aggregate([{
$match: {},
},
{
$sort: {
created_at: -1,
},
},
{
$lookup: {
from: "categories",
localField: "category",
foreignField: "_id",
as: "category",
},
},
{
$addFields: {
category: {
$arrayElemAt: ["$category", 0],
},
},
},
{
$facet: {
totalRecords: [{
$count: "total",
}, ],
data: [{
$skip: skip,
},
{
$limit: perPage,
},
],
},
},
]);
let postsCount = result[0].totalRecords[0].total;
const pageCount = Math.ceil(postsCount / perPage);
const pageDecrement = currPage > 1 ? 1 : 0;
const pageIncrement = currPage < pageCount ? 1 : 0;
const posts = result[0].data;
res.render("default/index", {
moment: moment,
layout: "default/layout",
website_name: "MEAN Blog",
page_heading: "XPress News",
page_subheading: "A MEAN Stack Blogging Application",
currPage,
posts,
pageDecrement,
pageIncrement,
});
} catch (err) {
console.log("Error: ", err);
res.status(500).send("something went wrong");
}
};
By the way, in the post schema, for date fields you use default: Date.now(), this will cause the date value always the same value, it should be in this format: default: Date.now

Read $facet.
New in version 3.4.
Processes multiple aggregation pipelines within a single stage on the
same set of input documents. Each sub-pipeline has its own field in
the output document where its results are stored as an array of
documents.
Example: See here
db.collection.aggregate([
{
$facet: {
"count": [
{ $match: {} },
{ $count: "totalCount" }
],
"data": [
{ $match: {} },
{ $sort: { _id: -1 } },
{ $skip: 1 },
{ $limit: 2 }
]
}
}
])
Mongoose Version:
Model.aggregate([
{
$facet: {
"count": [
{ $match: {} },
{ $count: "totalCount" }
],
"data": [
{ $match: {} },
{ $sort: { _id: -1 } },
{ $skip: 1 },
{ $limit: 2 }
]
}
}
]).
then(res => console.log(res)).
catch(error => console.error('error', error));

In case of Mongoose you should use this:
https://mongoosejs.com/docs/api.html#aggregate_Aggregate-facet
Official Mongodb docs:
https://docs.mongodb.com/manual/reference/operator/aggregation/facet
General idea is to perform aggregation instead of multiple calls (1 for getting needed info + 1 to get the total count of documents)
You can perform 2 separate calls of course but it will hit your performance (not much for small data volumes but still...)
So you can get all needed data with .find() and then get count like this:
https://mongoosejs.com/docs/api.html#model_Model.count
PS. btw, use async/await instead of callbacks to avoid callback hell

Update's request for mongodb [duplicate]

I have a document structure that is deeply nested, like this:
{id: 1,
forecasts: [ {
forecast_id: 123,
name: "Forecast 1",
levels: [
{ level: "proven",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
{ level: "likely",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
]
},
]
}
I'm trying to update the collection to insert a new config, that looks like this:
newdata = {
config: "Custom 1",
variables: [{ x: 111, y:2222, z:3333}]
}
I'm trying something like this in mongo (in Python):
db.myCollection.update({"id": 1,
"forecasts.forecast-id": 123,
"forecasts.levels.level": "proven",
"forecasts.levels.configs.config": "Custom 1"
},
{"$set": {"forecasts.$.levels.$.configs.$": newData}}
)
I'm getting "Cannot apply the positional operator without a corresponding query field containing an array" error though. What is the proper way to do this in mongo? This is mongo v2.4.1.

Unfortunately, you can't use the $ operator more than once per key, so you have to use numeric values for the rest. As in:
db.myCollection.update({
"id": 1,
"forecasts.forecast-id": 123,
"forecasts.levels.level": "proven",
"forecasts.levels.configs.config": "Custom 1"
},
{"$set": {"forecasts.$.levels.0.configs.0": newData}}
)
MongoDB's support for updating nested arrays is poor. So you're best off avoiding their use if you need to update the data frequently, and consider using multiple collections instead.
One possibility: make forecasts its own collection, and assuming you have a fixed set of level values, make level an object instead of an array:
{
_id: 123,
parentId: 1,
name: "Forecast 1",
levels: {
proven: {
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
likely: {
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
}
}
Then you can update it using:
db.myCollection.update({
_id: 123,
'levels.proven.configs.config': 'Custom 1'
},
{ $set: { 'levels.proven.configs.$': newData }}
)

Managed to solve it with using mongoose:
All you need to know is the '_id's of all of the sub-document in the chain (mongoose automatically create '_id' for each sub-document).
for example -
SchemaName.findById(_id, function (e, data) {
if (e) console.log(e);
data.sub1.id(_id1).sub2.id(_id2).field = req.body.something;
// or if you want to change more then one field -
//=> var t = data.sub1.id(_id1).sub2.id(_id2);
//=> t.field = req.body.something;
data.save();
});
More about the sub-document _id method in mongoose documentation.
explanation:_id is for the SchemaName, _id1 for sub1 and _id2 for sub2 - you can keep chaining like that.
*You don't have to use findById method, but it's seem to me the most convenient as you need to know the rest of the '_id's anyway.

MongoDB has introduced ArrayFilters to tackle this issue in Version 3.5.2 and later.
New in version 3.6.
Starting in MongoDB 3.6, when updating an array field, you can specify
arrayFilters that determine which array elements to update.
[https://docs.mongodb.com/manual/reference/method/db.collection.update/#specify-arrayfilters-for-an-array-update-operations][1]
Let's say the Schema design as follows :
var ProfileSchema = new Schema({
name: String,
albums: [{
tour_name: String,
images: [{
title: String,
image: String
}]
}]
});
And Document created looks like this :
{
"_id": "1",
"albums": [{
"images": [
{
"title": "t1",
"url": "url1"
},
{
"title": "t2",
"url": "url2"
}
],
"tour_name": "london-trip"
},
{
"images": [.........]:
}]
}
Say I want to update the "url" of an image.
Given - "document id", "tour_name" and "title"
For this the update query :
Profiles.update({_id : req.body.id},
{
$set: {
'albums.$[i].images.$[j].title': req.body.new_name
}
},
{
arrayFilters: [
{
"i.tour_name": req.body.tour_name, "j.image": req.body.new_name // tour_name - current tour name, new_name - new tour name
}]
})
.then(function (resp) {
console.log(resp)
res.json({status: 'success', resp});
}).catch(function (err) {
console.log(err);
res.status(500).json('Failed');
})

This is a very OLD bug in MongoDB
https://jira.mongodb.org/browse/SERVER-831

I was facing same kind of problem today, and after lot of exploring on google/stackoverflow/github, I figured arrayFilters are the best solution to this problem. Which would work with mongo 3.6 and above.
This link finally saved my day: https://thecodebarbarian.com/a-nodejs-perspective-on-mongodb-36-array-filters.html
const OrganizationInformationSchema = mongoose.Schema({
user: {
_id: String,
name: String
},
organizations: [{
name: {
type: String,
unique: true,
sparse: true
},
rosters: [{
name: {
type: String
},
designation: {
type: String
}
}]
}]
}, {
timestamps: true
});
And using mongoose in express, updating the name of roster of given id.
const mongoose = require('mongoose');
const ControllerModel = require('../models/organizations.model.js');
module.exports = {
// Find one record from database and update.
findOneRosterAndUpdate: (req, res, next) => {
ControllerModel.updateOne({}, {
$set: {
"organizations.$[].rosters.$[i].name": req.body.name
}
}, {
arrayFilters: [
{ "i._id": mongoose.Types.ObjectId(req.params.id) }
]
}).then(response => {
res.send(response);
}).catch(err => {
res.status(500).send({
message: "Failed! record cannot be updated.",
err
});
});
}
}

It's fixed.
https://jira.mongodb.org/browse/SERVER-831
But this feature is available starting with the MongoDB 3.5.12 development version.
Note: This question asked on Aug 11 2013 and it's resolved on Aug 11 2017

Given how MongoDB doesn't appear to provide a good mechanism for this, I find it prudent to use mongoose to simply extract the element from the mongo collection using .findOne(...), run a for-loop search on its relevant subelements (seeking by say ObjectID), modify that JSON, then do Schema.markModified('your.subdocument'); Schema.save(); It's probably not efficient, but it is very simple and works fine.

I searched about this for about 5 hours and finally found the best and easiest solution:
HOW TO UPDATE NESTED SUB-DOCUMENTS IN MONGO DB
{id: 1,
forecasts: [ {
forecast_id: 123,
name: "Forecast 1",
levels: [
{
levelid:1221
levelname: "proven",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
{
levelid:1221
levelname: "likely",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
]
},
]}
Query:
db.weather.updateOne({
"_id": ObjectId("1"), //this is level O select
"forecasts": {
"$elemMatch": {
"forecast_id": ObjectId("123"), //this is level one select
"levels.levelid": ObjectId("1221") // this is level to select
}
}
},
{
"$set": {
"forecasts.$[outer].levels.$[inner].levelname": "New proven",
}
},
{
"arrayFilters": [
{ "outer.forecast_id": ObjectId("123") },
{ "inner.levelid": ObjectId("1221") }
]
}).then((result) => {
resolve(result);
}, (err) => {
reject(err);
});

Sharing my lessons learned. I faced the same requirement recently where i need to update a nested array item.
My structure is as follows
{
"main": {
"id": "ID_001",
"name": "Fred flinstone Inc"
},
"types": [
{
"typeId": "TYPE1",
"locations": [
{
"name": "Sydney",
"units": [
{
"unitId": "PHG_BTG1"
}
]
},
{
"name": "Brisbane",
"units": [
{
"unitId": "PHG_KTN1"
},
{
"unitId": "PHG_KTN2"
}
]
}
]
}
]
}
My requirement is to add some fields in a specific units[].
My solution is first to find the index of the nested array item (say foundUnitIdx)
The two techniques I used are
use the $set keyword
specify the dynamic field in $set using the [] syntax
query = {
"locations.units.unitId": "PHG_KTN2"
};
var updateItem = {
$set: {
["locations.$.units."+ foundUnitIdx]: unitItem
}
};
var result = collection.update(
query,
updateItem,
{
upsert: true
}
);
Hope this helps others. :)

EASY SOLUTION FOR Mongodb 3.2+
https://docs.mongodb.com/manual/reference/method/db.collection.replaceOne/
I had a similar situation and solved it like this. I was using mongoose, but it should still work in vanilla MongoDB. Hope it's useful to someone.
const MyModel = require('./model.js')
const query = {id: 1}
// First get the doc
MyModel.findOne(query, (error, doc) => {
// Do some mutations
doc.foo.bar.etc = 'some new value'
// Pass in the mutated doc and replace
MyModel.replaceOne(query, doc, (error, newDoc) => {
console.log('It worked!')
})
}
Depending on your use case, you might be able to skip the initial findOne()

Okkk.we can update our nested subdocument in mongodb.this is our schema.
var Post = new mongoose.Schema({
name:String,
post:[{
like:String,
comment:[{
date:String,
username:String,
detail:{
time:String,
day:String
}
}]
}]
})
solution for this schema
Test.update({"post._id":"58206a6aa7b5b99e32b7eb58"},
{$set:{"post.$.comment.0.detail.time":"aajtk"}},
function(err,data){
//data is updated
})

Mongodb update deeply nested subdocument

I have a document structure that is deeply nested, like this:
{id: 1,
forecasts: [ {
forecast_id: 123,
name: "Forecast 1",
levels: [
{ level: "proven",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
{ level: "likely",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
]
},
]
}
I'm trying to update the collection to insert a new config, that looks like this:
newdata = {
config: "Custom 1",
variables: [{ x: 111, y:2222, z:3333}]
}
I'm trying something like this in mongo (in Python):
db.myCollection.update({"id": 1,
"forecasts.forecast-id": 123,
"forecasts.levels.level": "proven",
"forecasts.levels.configs.config": "Custom 1"
},
{"$set": {"forecasts.$.levels.$.configs.$": newData}}
)
I'm getting "Cannot apply the positional operator without a corresponding query field containing an array" error though. What is the proper way to do this in mongo? This is mongo v2.4.1.

Unfortunately, you can't use the $ operator more than once per key, so you have to use numeric values for the rest. As in:
db.myCollection.update({
"id": 1,
"forecasts.forecast-id": 123,
"forecasts.levels.level": "proven",
"forecasts.levels.configs.config": "Custom 1"
},
{"$set": {"forecasts.$.levels.0.configs.0": newData}}
)
MongoDB's support for updating nested arrays is poor. So you're best off avoiding their use if you need to update the data frequently, and consider using multiple collections instead.
One possibility: make forecasts its own collection, and assuming you have a fixed set of level values, make level an object instead of an array:
{
_id: 123,
parentId: 1,
name: "Forecast 1",
levels: {
proven: {
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
likely: {
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
}
}
Then you can update it using:
db.myCollection.update({
_id: 123,
'levels.proven.configs.config': 'Custom 1'
},
{ $set: { 'levels.proven.configs.$': newData }}
)

Managed to solve it with using mongoose:
All you need to know is the '_id's of all of the sub-document in the chain (mongoose automatically create '_id' for each sub-document).
for example -
SchemaName.findById(_id, function (e, data) {
if (e) console.log(e);
data.sub1.id(_id1).sub2.id(_id2).field = req.body.something;
// or if you want to change more then one field -
//=> var t = data.sub1.id(_id1).sub2.id(_id2);
//=> t.field = req.body.something;
data.save();
});
More about the sub-document _id method in mongoose documentation.
explanation:_id is for the SchemaName, _id1 for sub1 and _id2 for sub2 - you can keep chaining like that.
*You don't have to use findById method, but it's seem to me the most convenient as you need to know the rest of the '_id's anyway.

MongoDB has introduced ArrayFilters to tackle this issue in Version 3.5.2 and later.
New in version 3.6.
Starting in MongoDB 3.6, when updating an array field, you can specify
arrayFilters that determine which array elements to update.
[https://docs.mongodb.com/manual/reference/method/db.collection.update/#specify-arrayfilters-for-an-array-update-operations][1]
Let's say the Schema design as follows :
var ProfileSchema = new Schema({
name: String,
albums: [{
tour_name: String,
images: [{
title: String,
image: String
}]
}]
});
And Document created looks like this :
{
"_id": "1",
"albums": [{
"images": [
{
"title": "t1",
"url": "url1"
},
{
"title": "t2",
"url": "url2"
}
],
"tour_name": "london-trip"
},
{
"images": [.........]:
}]
}
Say I want to update the "url" of an image.
Given - "document id", "tour_name" and "title"
For this the update query :
Profiles.update({_id : req.body.id},
{
$set: {
'albums.$[i].images.$[j].title': req.body.new_name
}
},
{
arrayFilters: [
{
"i.tour_name": req.body.tour_name, "j.image": req.body.new_name // tour_name - current tour name, new_name - new tour name
}]
})
.then(function (resp) {
console.log(resp)
res.json({status: 'success', resp});
}).catch(function (err) {
console.log(err);
res.status(500).json('Failed');
})

This is a very OLD bug in MongoDB
https://jira.mongodb.org/browse/SERVER-831

I was facing same kind of problem today, and after lot of exploring on google/stackoverflow/github, I figured arrayFilters are the best solution to this problem. Which would work with mongo 3.6 and above.
This link finally saved my day: https://thecodebarbarian.com/a-nodejs-perspective-on-mongodb-36-array-filters.html
const OrganizationInformationSchema = mongoose.Schema({
user: {
_id: String,
name: String
},
organizations: [{
name: {
type: String,
unique: true,
sparse: true
},
rosters: [{
name: {
type: String
},
designation: {
type: String
}
}]
}]
}, {
timestamps: true
});
And using mongoose in express, updating the name of roster of given id.
const mongoose = require('mongoose');
const ControllerModel = require('../models/organizations.model.js');
module.exports = {
// Find one record from database and update.
findOneRosterAndUpdate: (req, res, next) => {
ControllerModel.updateOne({}, {
$set: {
"organizations.$[].rosters.$[i].name": req.body.name
}
}, {
arrayFilters: [
{ "i._id": mongoose.Types.ObjectId(req.params.id) }
]
}).then(response => {
res.send(response);
}).catch(err => {
res.status(500).send({
message: "Failed! record cannot be updated.",
err
});
});
}
}

It's fixed.
https://jira.mongodb.org/browse/SERVER-831
But this feature is available starting with the MongoDB 3.5.12 development version.
Note: This question asked on Aug 11 2013 and it's resolved on Aug 11 2017

Given how MongoDB doesn't appear to provide a good mechanism for this, I find it prudent to use mongoose to simply extract the element from the mongo collection using .findOne(...), run a for-loop search on its relevant subelements (seeking by say ObjectID), modify that JSON, then do Schema.markModified('your.subdocument'); Schema.save(); It's probably not efficient, but it is very simple and works fine.

I searched about this for about 5 hours and finally found the best and easiest solution:
HOW TO UPDATE NESTED SUB-DOCUMENTS IN MONGO DB
{id: 1,
forecasts: [ {
forecast_id: 123,
name: "Forecast 1",
levels: [
{
levelid:1221
levelname: "proven",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
},
{
levelid:1221
levelname: "likely",
configs: [
{
config: "Custom 1",
variables: [{ x: 1, y:2, z:3}]
},
{
config: "Custom 2",
variables: [{ x: 10, y:20, z:30}]
},
]
}
]
},
]}
Query:
db.weather.updateOne({
"_id": ObjectId("1"), //this is level O select
"forecasts": {
"$elemMatch": {
"forecast_id": ObjectId("123"), //this is level one select
"levels.levelid": ObjectId("1221") // this is level to select
}
}
},
{
"$set": {
"forecasts.$[outer].levels.$[inner].levelname": "New proven",
}
},
{
"arrayFilters": [
{ "outer.forecast_id": ObjectId("123") },
{ "inner.levelid": ObjectId("1221") }
]
}).then((result) => {
resolve(result);
}, (err) => {
reject(err);
});

Sharing my lessons learned. I faced the same requirement recently where i need to update a nested array item.
My structure is as follows
{
"main": {
"id": "ID_001",
"name": "Fred flinstone Inc"
},
"types": [
{
"typeId": "TYPE1",
"locations": [
{
"name": "Sydney",
"units": [
{
"unitId": "PHG_BTG1"
}
]
},
{
"name": "Brisbane",
"units": [
{
"unitId": "PHG_KTN1"
},
{
"unitId": "PHG_KTN2"
}
]
}
]
}
]
}
My requirement is to add some fields in a specific units[].
My solution is first to find the index of the nested array item (say foundUnitIdx)
The two techniques I used are
use the $set keyword
specify the dynamic field in $set using the [] syntax
query = {
"locations.units.unitId": "PHG_KTN2"
};
var updateItem = {
$set: {
["locations.$.units."+ foundUnitIdx]: unitItem
}
};
var result = collection.update(
query,
updateItem,
{
upsert: true
}
);
Hope this helps others. :)

EASY SOLUTION FOR Mongodb 3.2+
https://docs.mongodb.com/manual/reference/method/db.collection.replaceOne/
I had a similar situation and solved it like this. I was using mongoose, but it should still work in vanilla MongoDB. Hope it's useful to someone.
const MyModel = require('./model.js')
const query = {id: 1}
// First get the doc
MyModel.findOne(query, (error, doc) => {
// Do some mutations
doc.foo.bar.etc = 'some new value'
// Pass in the mutated doc and replace
MyModel.replaceOne(query, doc, (error, newDoc) => {
console.log('It worked!')
})
}
Depending on your use case, you might be able to skip the initial findOne()

Okkk.we can update our nested subdocument in mongodb.this is our schema.
var Post = new mongoose.Schema({
name:String,
post:[{
like:String,
comment:[{
date:String,
username:String,
detail:{
time:String,
day:String
}
}]
}]
})
solution for this schema
Test.update({"post._id":"58206a6aa7b5b99e32b7eb58"},
{$set:{"post.$.comment.0.detail.time":"aajtk"}},
function(err,data){
//data is updated
})

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Spark Filtering rows in window functions - pyspark

Related

Sequelize Assosiative aggrigate function error

Prisma - Sort by _sum

How do I count all the documents in a collection and use the cont in a controller, with MongoDB and Express.js?

Update's request for mongodb [duplicate]

Mongodb update deeply nested subdocument

Categories

Resources