How do I make the next self query on MongoDB?
SELECT e.user_id AS user_id,
e.datetime AS started_at,
(SELECT MIN(datetime) ## taking the closest "end" event datetime of that userId ##
FROM events
WHERE type = "end" AND
user_id = e.user_id AND
datetime > e.datetime) AS end_at,
FROM events AS e
WHERE e.type = "start"
Over the next event data table:
{"_id" : "1", "type": "start", "datetime": "2022-02-01T10:15Z", "userId": "1"},
{"_id" : "2", "type": "end", "datetime": "2022-02-01T10:20Z", "userId": "1"},
{"_id" : "3", "type": "start", "datetime": "2022-02-01T10:16Z", "userId": "2"},
{"_id" : "4", "type": "end", "datetime": "2022-02-01T10:21Z", "userId": "2"},
{"_id" : "5", "type": "start", "datetime": "2022-02-02T11:01Z", "userId": "1"},
{"_id" : "6", "type": "end", "datetime": "2022-02-02T11:02Z", "userId": "1"}
The expected result should look like:
user_id
started_at
end_at
1
2022-02-01T10:15Z
2022-02-01T10:20Z
2
2022-02-01T10:16Z
2022-02-01T10:21Z
1
2022-02-02T11:01Z
2022-02-02T11:02Z
Maybe something like this:
db.collection.aggregate([
{
$sort: {
"datetime": 1
}
},
{
$project: {
"d": {
k: "$type",
v: "$datetime"
},
userId: 1
}
},
{
$group: {
_id: "$userId",
e: {
$push: "$d"
}
}
},
{
$addFields: {
e: {
$map: {
input: {
$range: [
0,
{
$size: "$e"
},
2
]
},
as: "index",
in: {
$slice: [
"$e",
"$$index",
2
]
}
}
}
}
},
{
$unwind: "$e"
},
{
$project: {
events: {
"$arrayToObject": "$e"
}
}
},
{
$project: {
userId: "$_id",
start_at: "$events.start",
end_at: "$events.end",
_id: 0
}
}
])
Explailed:
( The solution will work only if the user events start / end sequentially )
Sort the documents by datetime.
Rename the fields type & datetime to k,v ( suitable for $arrayToObject )
Group the documents per userId ( Note this solution has the limitation that total number of events must not exceed 16MB per userId)
Split the events per date/time pairs (start+end , considering user cannot start new event if the previous has not finished)
$unwind the events array
Convert start/end array to object.
Project the fields as per the expected output.
playground
Not sure what the exact use case is , but in general looks abit more practical if you add sessionId for every event document so if user can start paralel sessions the start/end events to be possible easier to correlate based on sessionId.
Here's a pipeline that closely (exactly?) follows your SQL. I converted the string datetime to ISODate to insure comparisons were done properly, but perhaps this is unecessary.
db.collection.aggregate([
{
// match each start
"$match": { "type": "start" }
},
{ // lookup ends for userId in collection
"$lookup": {
"from": "collection",
"localField": "userId",
"foreignField": "userId",
"let": {
"isoDate": {
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
}
},
"pipeline": [
{
"$match": {
"type": "end",
"$expr": {
"$gt": [
{
"$dateFromString": {
"dateString": "$datetime",
"format": "%Y-%m-%dT%H:%MZ"
}
},
"$$isoDate"
]
}
}
}
],
"as": "endArray"
}
},
{ // output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// assumes original collection was sorted
"$first": "$endArray.datetime"
}
}
}
])
Try it on mongoplayground.net.
Here's another pipeline that uses "$setWindowFields", but it's not ideal. I don't know how to filter "$setWindowFields" "output" given the allowed operators, etc., but it works. Improvement comments welcome!
db.collection.aggregate([
{
// add winField sorted array to each doc
// containing userId docs following
// current doc
"$setWindowFields": {
"partitionBy": "$userId",
"sortBy": { "datetime": 1 },
"output": {
"winField": {
"$push": "$$CURRENT",
"window": {
"documents": [ 1, "unbounded" ]
}
}
}
}
},
{
// just keep start docs
"$match": { "type": "start" }
},
{
// sorting on start datetime
"$sort": { "datetime": 1 }
},
{
// output desired fields
"$project": {
"_id": 0,
"userId": 1,
"started_at": "$datetime",
"end_at": {
// grab first end datetime
"$getField": {
"field": "datetime",
"input": {
"$first": {
"$filter": {
"input": "$winField",
"cond": { "$eq": [ "$$this.type", "end" ] }
}
}
}
}
}
}
}
])
Try it on mongoplayground.net.
Related
I have a collection with documents in below format: (shown below 2 sample document)
1st doc:
{
"date": 20221101,
"time":1500,
"productCode": "toycar",
"purchaseHistory": [
{
"clientid": 123,
"status": "SUCCESS"
},
{
"clientid": 456,
"status": "FAILURE"
}
]
}
2nd doc:
{
"date": 20221101,
"time": 1500,
"productCode": "toycar",
"purchaseHistory": [
{
"clientid": 890,
"status": "SUCCESS"
},
{
"clientid": 678,
"status": "SUCCESS"
}
]
}
I want to query above and print output in below format where purchaseHistory.status = 'SUCCESS' and date = 20221101:
{productCode:"toycar", "time": 1500, "docCount": 2, "purchaseHistCount":3}
How can I achieve this?
I tried below:
db.products.aggregate({
$match : {date:20221101, 'purchaseHistory.status':'SUCCESS'},
"$group": {
"_id": {
"pc": "$productCode",
"time": "$time"
},
"docCount": {$sum :1}
}
})
Something like this maybe:
db.collection.aggregate([
{
$match: {
date: 20221101,
"purchaseHistory.status": "SUCCESS"
}
},
{
"$addFields": {
"purchaseHistory": {
"$filter": {
"input": "$purchaseHistory",
"as": "ph",
"cond": {
$eq: [
"$$ph.status",
"SUCCESS"
]
}
}
}
}
},
{
$group: {
_id: {
t: "$time",
pc: "$productCode"
},
docCount: {
$sum: 1
},
purchaseHistCount: {
$sum: {
$size: "$purchaseHistory"
}
}
}
}
])
Explained:
Filter the matched documents.
Filter the purchaseHistory SUCCESS only.
Group the result to see count of matching documents & matching purchaseHistory.
Playground
I have a query as below, what it does it creates a link between two documents and find the last order date and users details like email, phone, etc. but on large data set it shows me timeout error any help would be much appreciated, and thanks in advance for the help
db.users.aggregate([
{
"$lookup": {
"from": "orders",
"let": {
"id": "$_id"
},
"pipeline": [
{
"$addFields": {
"owner": {
"$toObjectId": "$owner"
}
}
},
{
"$match": {
$expr: {
$eq: [
"$owner",
"$$id"
]
}
}
},
],
"as": "orders"
}
},
{
"$unwind": {
path: "$orders",
preserveNullAndEmptyArrays: false,
includeArrayIndex: "arrayIndex"
}
},
{
"$group": {
"_id": "$_id",
"order": {
"$last": "$orders.createdAt"
},
"userInfo": {
"$mergeObjects": {
name: "$name",
email: "$email",
phone: "$phone",
orderCount: "$orderCount"
}
}
}
},
{
"$project": {
name: "$userInfo.name",
email: "$userInfo.email",
phone: "$userInfo.phone",
orderCount: "$userInfo.orderCount",
lastOrder: "$order",
}
}
]
)
my documents look like the following for orders
{
"_id": ObjectId("607fbeeb0a752a66a7af40eb"),
"address": {
"loc": [
-1,
3
],
"_id": "5d35d55d3d081f486d0d401c",
"apartment": "",
"description": "ACcdg dfef"
},
"approvedAt": ISODate("2021-04-21T11:28:05.295+05:30"),
"assignedAt": null,
"billingAddress": {
"description": ""
},
"createdAt": ISODate("2021-04-21T11:28:04.449+05:30"),
"creditCard": "",
"deliveryDate": "04/21/21",
"deliveryDateObj": ISODate("2021-04-21T12:27:58.746+05:30"),
"owner": "609bd5831b912947ea51a9ac",
"products": [
"5a070c079b"
],
"updatedAt": ISODate("2021-04-21T11:28:05.295+05:30"),
}
and for users, it is like below
{
"_id": ObjectId("609bd5831b912947ea51a9ac"),
"updatedAt": ISODate("2021-05-12T18:47:55.291+05:30"),
"createdAt": ISODate("2021-05-12T18:47:55.213+05:30"),
"email": "1012#gmail.com",
"phone": "123",
"dob": "1996-04-10",
"password": "",
"stripeID": "",
"__t": "Customer",
"name": {
"first": "A",
"last": "b"
},
"orderCount": 1,
"__v": 0,
"forgottenPassword": ""
}
convert _id to string in lookup's let and you can remove $addFields from lookup pipeline
add $project stage in lookup pipeline and show only required fields
$project to show required fields and get last / max createdAt date use $max, you don't need to $unwind and $group operation
db.users.aggregate([
{
$lookup: {
from: "orders",
let: { id: { $toString: "$_id" } },
pipeline: [
{ $match: { $expr: { $eq: ["$owner", "$$id"] } } },
{
$project: {
_id: 0,
createdAt: 1
}
}
],
"as": "orders"
}
},
{
$project: {
email: 1,
name: 1,
orderCount: { $size: "$orders" },
phone: 1,
lastOrder: { $max: "$orders.createdAt" }
}
}
])
Playground
SUGGESTION:
You can save owner id in orders as objectId instead of string and whenever new order arrive store it as objectId, you can prevent conversation operator $toString operation
create an index in owner field to make lookup process faster.
I have figured out that after using createIndex for the owner field which is used to compare the owner in the orders from the users _id filed, so just after adding an db.orders.createIndex({ owner: 1 }), the query will run much faster and smoother
{
"orderNo": "123",
"bags": [{
"type": "small",
"products": [{
"id": "1",
"name": "ABC",
"returnable": true
}, {
"id": "2",
"name": "XYZ"
}
]
},{
"type": "big",
"products": [{
"id": "3",
"name": "PQR",
"returnable": true
}, {
"id": "4",
"name": "UVW"
}
]
}
]
}
I have orders collection where documents are in this format. I want to get a total count of products which has the returnable flag. e.g: for the above order the count should be 2. I am very new to MongoDB wanted to know how to write a query to find this out, I have tried few things but did not help:
this is what I tried but not worked:
db.orders.aggregate([
{ "$unwind": "$bags" },
{ "$unwind": "$bags.products" },
{ "$unwind": "$bags.products.returnable" },
{ "$group": {
"_id": "$bags.products.returnable",
"count": { "$sum": 1 }
}}
])
For inner array you can use $filter to check returnable flag and $size to get number of such items. For the outer one you can take advantage of $reduce to sum the values from inner arrays:
db.collection.aggregate([
{
$project: {
totalReturnable: {
$reduce: {
input: "$bags",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: {
$filter: {
input: "$$this.products",
as: "prod",
cond: {
$eq: [ "$$prod.returnable", true ]
}
}
}
]
}
}
}
}
}
}
])
Mongo Playground
I've been using MongoDB for just a week and I have problems achieving this result: I want to group my documents by date while also keeping track of the number of entries that have a certain field set to a certain value.
So, my documents look like this:
{
"_id" : ObjectId("5f3f79fc266a891167ca8f65"),
"recipe" : "A",
"timestamp" : ISODate("2020-08-22T09:38:36.306Z")
}
where recipe is either "A", "B" or "C". Right now I'm grouping the documents by date using this pymongo query:
mongo.db.aggregate(
# Pipeline
[
# Stage 1
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
"progressivo": 1,
"temperatura_fusione": 1
}
},
# Stage 2
{
"$group": {
"_id": {
"createdAt": "$createdAt"
},
"products": {
"$sum": 1
}
}
},
# Stage 3
{
"$project": {
"label": "$_id.createdAt",
"value": "$products",
"_id": 0
}
}])
Which gives me results like this:
[{"label": "2020-08-22", "value": 1}, {"label": "2020-08-15", "value": 2}, {"label": "2020-08-11", "value": 1}, {"label": "2020-08-21", "value": 5}]
What I'd like to have is also the counting of how many times each recipe appears on every date. So, if for example on August 21 I have 2 entries with the "A" recipe, 3 with the "B" recipe and 0 with the "C" recipe, the desired output would be
{"label": "2020-08-21", "value": 5, "A": 2, "B":3, "C":0}
Do you have any tips?
Thank you!
You can do like following, what have you done is excellent. After that,
In second grouping, We just get total value and value of each recipe.
$map is used to go through/modify each objects
$arrayToObject is used to covert the array what we have done via map (key : value pair) to object
$ifNull is used for, sometimes your data might not have "A" or "B" or "C". But you need the value should be 0 if there is no name as expected output.
Here is the code
[
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
recipe: 1,
"progressivo": 1,
"temperatura_fusione": 1
}
},
{
"$group": {
"_id": {
"createdAt": "$createdAt",
"recipeName": "$recipe",
},
"products": {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.createdAt",
value: {
$sum: "$products"
},
recipes: {
$push: {
name: "$_id.recipeName",
val: "$products"
}
}
}
},
{
$project: {
"content": {
"$arrayToObject": {
"$map": {
"input": "$recipes",
"as": "el",
"in": {
"k": "$$el.name",
"v": "$$el.val"
}
}
}
},
value: 1
}
},
{
$project: {
_id: 1,
value: 1,
A: {
$ifNull: [
"$content.A",
0
]
},
B: {
$ifNull: [
"$content.B",
0
]
},
C: {
$ifNull: [
"$content.C",
0
]
}
}
}
]
Working Mongo playground
I have many tweets object like this:
{
"_id" : ObjectId("5a2f4a381cb29b482553e2c9"),
"user_id" : 21898942,
"created_at" : ISODate("2009-03-09T19:48:50Z"),
"id" : 1301923516,
"place" : "",
"retweet_count" : 0,
"tweet" : "Save the Date! March 28th Vietnamese Cooking Class! Call to Reserve 312.255.0088",
"favorite_count" : 0
"type": A
}
I'm using this code to qroup the tweets by date and by type:
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
}
]
results = mongo.db.tweets.aggregate(pipeline)
Here is the result I get:
{
"_id": {
"date": "2009-03-17",
"type": A
},
"count": 4
,
{
"_id": {
"date": "2009-03-17",
"type": B
},
"count": 6
}
But now I want to have the result in this format:
{date: "2009-03-17", A: 4, B: 6, C: 9}
Is there anyway I can achieve this through aggregate directly?
Note: I'm using MongoDB and PyMongo
You can try the below aggregation query in 3.6 version.
Added the second group to create array of type and count value pairs followed by $mergeObjects to merge date key value with $arrayToObject, which produces create a type value key and count value pairs, to generate the expected response.
$replaceRoot to promote the document to the top level.
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.date",
"typeandcount": {
"$push": {
"k": "$_id.type",
"v": "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
"date": "$_id"
},
{
"$arrayToObject": "$typeandcount"
}
]
}
}
}
]
Mongo 3.4 version:
Replace the last stage with below
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{
"k": "date",
"v": "$_id"
}
],
"$typeandcount"
]
}
}
}
}