Using mongo in golang, Expire Documents after a Specified Number of Seconds? - mongodb

I am trying something simple using the mongo-go-driver.
I insert some datas in a collection, and I want them to be automaticaly deleted after a number of seconds.
I have read the following documentation : https://docs.mongodb.com/manual/tutorial/expire-data/#expire-documents-after-a-specified-number-of-seconds
Then I have wrote something in GO, but it does not seems to work as I expected. Maybe there is something I did not get, or I am doing the wrong way.
package main
import (
"bytes"
"context"
"fmt"
"log"
"text/tabwriter"
"time"
"github.com/Pallinder/go-randomdata"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
func main() {
ctx := context.TODO()
client, err := mongo.NewClient(options.Client().ApplyURI("mongodb://localhost:27017"))
if err != nil {
log.Fatal(err)
}
err = client.Connect(ctx)
if err != nil {
log.Fatal(err)
}
db := client.Database("LADB")
col := db.Collection("LACOLL")
// add index to col
// the goal is to set a TTL for datas to only 1 secondes (test purpose)
model := mongo.IndexModel{
Keys: bson.M{"createdAt": 1},
Options: options.Index().SetExpireAfterSeconds(1),
}
ind, err := col.Indexes().CreateOne(ctx, model)
if err != nil {
log.Fatal(err)
}
fmt.Println(ind)
// insert some datas each seconds
for i := 0; i < 5; i++ {
name := randomdata.SillyName()
res, err := col.InsertOne(ctx, NFT{Timestamp: time.Now(), CreatedAt: time.Now(), Name: name})
if err != nil {
log.Fatal(err)
}
fmt.Println("Inserted", name, "with id", res.InsertedID)
time.Sleep(1 * time.Second)
}
// display all
cursor, err := col.Find(ctx, bson.M{}, nil)
if err != nil {
log.Fatal(err)
}
var datas []NFT
if err = cursor.All(ctx, &datas); err != nil {
log.Fatal(err)
}
// I expect some datas not to be there (less than 5)
fmt.Println(datas)
}
type NFT struct {
ID primitive.ObjectID `bson:"_id,omitempty"`
CreatedAt time.Time `bson:"createdAt,omitempty"`
Timestamp time.Time `bson:"timestamp,omitempty"`
Name string `bson:"name,omitempty"`
}

There's nothing wrong with your example, it works.
Please note that the expireAfterSeconds you specify is the duration after createdAt when the document expires, and that instant is the earliest time at which the document may be deleted, but there is no guarantee that the deletion will happen "immediately", exactly at that time.
Quoting from MongoDB docs: TTL indexes: Timing of the Delete Operation:
The TTL index does not guarantee that expired data will be deleted immediately upon expiration. There may be a delay between the time a document expires and the time that MongoDB removes the document from the database.
The background task that removes expired documents runs every 60 seconds. As a result, documents may remain in a collection during the period between the expiration of the document and the running of the background task.
Because the duration of the removal operation depends on the workload of your mongod instance, expired data may exist for some time beyond the 60 second period between runs of the background task.
As you can see, if a document expires, at worst case it may take 60 seconds for the background task to kick in and start removing expired documents, and if there are many (or the database is under heavy load), it may take some time to delete all expired documents.

Related

Golang automatically delete(TTL) data [duplicate]

I am trying something simple using the mongo-go-driver.
I insert some datas in a collection, and I want them to be automaticaly deleted after a number of seconds.
I have read the following documentation : https://docs.mongodb.com/manual/tutorial/expire-data/#expire-documents-after-a-specified-number-of-seconds
Then I have wrote something in GO, but it does not seems to work as I expected. Maybe there is something I did not get, or I am doing the wrong way.
package main
import (
"bytes"
"context"
"fmt"
"log"
"text/tabwriter"
"time"
"github.com/Pallinder/go-randomdata"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
func main() {
ctx := context.TODO()
client, err := mongo.NewClient(options.Client().ApplyURI("mongodb://localhost:27017"))
if err != nil {
log.Fatal(err)
}
err = client.Connect(ctx)
if err != nil {
log.Fatal(err)
}
db := client.Database("LADB")
col := db.Collection("LACOLL")
// add index to col
// the goal is to set a TTL for datas to only 1 secondes (test purpose)
model := mongo.IndexModel{
Keys: bson.M{"createdAt": 1},
Options: options.Index().SetExpireAfterSeconds(1),
}
ind, err := col.Indexes().CreateOne(ctx, model)
if err != nil {
log.Fatal(err)
}
fmt.Println(ind)
// insert some datas each seconds
for i := 0; i < 5; i++ {
name := randomdata.SillyName()
res, err := col.InsertOne(ctx, NFT{Timestamp: time.Now(), CreatedAt: time.Now(), Name: name})
if err != nil {
log.Fatal(err)
}
fmt.Println("Inserted", name, "with id", res.InsertedID)
time.Sleep(1 * time.Second)
}
// display all
cursor, err := col.Find(ctx, bson.M{}, nil)
if err != nil {
log.Fatal(err)
}
var datas []NFT
if err = cursor.All(ctx, &datas); err != nil {
log.Fatal(err)
}
// I expect some datas not to be there (less than 5)
fmt.Println(datas)
}
type NFT struct {
ID primitive.ObjectID `bson:"_id,omitempty"`
CreatedAt time.Time `bson:"createdAt,omitempty"`
Timestamp time.Time `bson:"timestamp,omitempty"`
Name string `bson:"name,omitempty"`
}
There's nothing wrong with your example, it works.
Please note that the expireAfterSeconds you specify is the duration after createdAt when the document expires, and that instant is the earliest time at which the document may be deleted, but there is no guarantee that the deletion will happen "immediately", exactly at that time.
Quoting from MongoDB docs: TTL indexes: Timing of the Delete Operation:
The TTL index does not guarantee that expired data will be deleted immediately upon expiration. There may be a delay between the time a document expires and the time that MongoDB removes the document from the database.
The background task that removes expired documents runs every 60 seconds. As a result, documents may remain in a collection during the period between the expiration of the document and the running of the background task.
Because the duration of the removal operation depends on the workload of your mongod instance, expired data may exist for some time beyond the 60 second period between runs of the background task.
As you can see, if a document expires, at worst case it may take 60 seconds for the background task to kick in and start removing expired documents, and if there are many (or the database is under heavy load), it may take some time to delete all expired documents.

(CursorNotFound) Cursor not found (namespace: 'dbName.collection', id: 885805460243113719)

Following is the code for fetching the results from the db providing collection, filter query, sorting query and number of limit.
func DBFetch(collection *mongo.Collection, filter interface{}, sort interface{}, limit int64) ([]bson.M, error) {
findOptions := options.Find()
findOptions.SetLimit(limit)
findOptions.SetSort(sort)
cursor, err := collection.Find(context.Background(), filter, findOptions)
var result []bson.M
if err != nil {
logger.Client().Error(err.Error())
sentry.CaptureException(err)
cursor.Close(context.Background())
return nil, err
}
if err = cursor.All(context.Background(), &result); err != nil {
logger.Client().Error(err.Error())
sentry.CaptureMessage(err.Error())
return nil, err
}
return result, nil
}
I am using mongo-go driver version 1.8.2
mongodb community version 4.4.7 sharded mongo with 2 shards
Each shard is with 30 CPU in k8 with 245Gb memory having 1 replica
200 rpm for the api
Api fetches the data from mongo and format it and the serves it
We are reading and writing both on primary.
Heavy writes occur every hour approximately.
Getting timeouts in milliseconds ( 10ms-20ms approx. )
As pointed out by #R2D2 in the comment, no cursor timeout error occurs when the default timeout (10 minutes) exceeds and there was no request from go for next set of data.
There are couple of workarounds you can do to mitigate getting this error.
First option is to set batch size for your find query by using the below option. By doing do, you are instructing MongoDB to send data in specified chunks rather than sending more data. Note that this will usually increase the roundtrip time between MongoDB and Go server.
findOptions := options.Find()
findOptions.SetBatchSize(10) // <- Batch size is set to `10`
cursor, err := collection.Find(context.Background(), filter, findOptions)
Furthermore, you can set the NoCursorTimeout option which will keep your MongoDB find query result cursor pointer to stay alive unless you manually close it. This option is a double edge sword since you have to manually close the cursor once you no longer need that cursor, else that cursor will stay in memory for a prolonged time.
findOptions := options.Find()
findOptions.SetNoCursorTimeout(true) // <- Applies no cursor timeout option
cursor, err := collection.Find(context.Background(), filter, findOptions)
// VERY IMPORTANT
_ = cursor.Close(context.Background()) // <- Don't forget to close the cursor
Combine the above two options, below will be your complete code.
func DBFetch(collection *mongo.Collection, filter interface{}, sort interface{}, limit int64) ([]bson.M, error) {
findOptions := options.Find()
findOptions.SetLimit(limit)
findOptions.SetSort(sort)
findOptions.SetBatchSize(10) // <- Batch size is set to `10`
findOptions.SetNoCursorTimeout(true) // <- Applies no cursor timeout option
cursor, err := collection.Find(context.Background(), filter, findOptions)
var result []bson.M
if err != nil {
//logger.Client().Error(err.Error())
//sentry.CaptureException(err)
_ = cursor.Close(context.Background())
return nil, err
}
if err = cursor.All(context.Background(), &result); err != nil {
//logger.Client().Error(err.Error())
//sentry.CaptureMessage(err.Error())
return nil, err
}
// VERY IMPORTANT
_ = cursor.Close(context.Background()) // <- Don't forget to close the cursor
return result, nil
}

Mongodb doesn't retrieve all documents in a collection with 2 million records using cursor

I have a collections of 2,000,000 records
> db.events.count(); │
2000000
and I use golang mongodb client to connect to the database
package main
import (
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
func main() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://localhost:27888").SetAuth(options.Credential{
Username: "mongoadmin",
Password: "secret",
}))
if err != nil {
panic(err)
}
defer func() {
if err = client.Disconnect(ctx); err != nil {
panic(err)
}
}()
collection := client.Database("test").Collection("events")
var bs int32 = 10000
var b = true
cur, err := collection.Find(context.Background(), bson.D{}, &options.FindOptions{
BatchSize: &bs, NoCursorTimeout: &b})
if err != nil {
log.Fatal(err)
}
defer cur.Close(ctx)
s, n := runningtime("retrive db from mongo and publish to kafka")
count := 0
for cur.Next(ctx) {
var result bson.M
err := cur.Decode(&result)
if err != nil {
log.Fatal(err)
}
bytes, err := json.Marshal(result)
if err != nil {
log.Fatal(err)
}
count++
msg := &sarama.ProducerMessage{
Topic: "hello",
// Key: sarama.StringEncoder("aKey"),
Value: sarama.ByteEncoder(bytes),
}
asyncProducer.Input() <- msg
}
But the the program only retrives only about 600,000 records instead of 2,000,000 every times I ran the program.
$ go run main.go
done
count = 605426
nErrors = 0
2020/09/18 11:23:43 End: retrive db from mongo and publish to kafka took 10.080603336s
I don't know why? I want to retrives all 2,000,000 records. Thanks for any help.
Your loop fetching the results may end early because you are using the same ctx context for iterating over the results which has a 10 seconds timeout.
Which means if retrieving and processing the 2 million records (including connecting) takes more than 10 seconds, the context will be cancelled and thus the cursor will also report an error.
Note that setting FindOptions.NoCursorTimeout to true is only to prevent cursor timeout for inactivity, it does not override the used context's timeout.
Use another context for executing the query and iterating over the results, one that does not have a timeout, e.g. context.Background().
Also note that for constructing the options for find, use the helper methods, so it may look as simple and as elegant as this:
options.Find().SetBatchSize(10000).SetNoCursorTimeout(true)
So the working code:
ctx2 := context.Background()
cur, err := collection.Find(ctx2, bson.D{},
options.Find().SetBatchSize(10000).SetNoCursorTimeout(true))
// ...
for cur.Next(ctx2) {
// ...
}
// Also check error after the loop:
if err := cur.Err(); err != nil {
log.Printf("Iterating over results failed: %v", err)
}

Ensure MongoDB expires data at dynamic time intervals and calls are idempotent

I am using MongoDB to save user-generated links in storage. The user can state how long they want the URL to be saved before it is expired.Every user id is unique too.
Ideally, I would like my requests to be idempotent. I would like to make as many calls without having to check if there was an expiry value on the last call.
My code below seems to give me:
"Index with name: creationtime_1 already exists with different options" or
index does not exist.
This is my first run with MongoDB and I would appreciate any insights.I think I might have redundant checks on my code too but I can't figure out how else to do it
```
//mongo settings
sessionTTL := mgo.Index{
Key: []string{"creationtime"},
Unique: false,
DropDups: false,
Background: true,
ExpireAfter: time.Hour * time.Duration(expires)} // Expire in expire time
// START MONGODB
session, err := mgo.Dial(tokenMongo)
if err != nil {
return "", err
}
defer session.Close()
//session.SetSafe(&mgo.Safe{})
// Optional. Switch the session to a monotonic behavior.
id := uuid.NewV4().String()
thistime := time.Now().Local()
// find index
err = session.DB("tokenresults").C("tokenurl").Find(bson.M{"id": id}).One(&result)
if err == nil{
//Drop old values if exist // cant drop if empty
if err := session.DB("tokenresults").C("tokenurl").DropIndex("creationtime"); err != nil {
return "", err
}
}
//add stuff
c := session.DB("tokenresults").C("tokenurl")
err = c.Insert(&TokenUrl{id, tokenstring, thistime}, )
if err != nil {
return "", err
}
// create index //add session ttl // cant create if exists
if err := session.DB("tokenresults").C("tokenurl").EnsureIndex(sessionTTL); err != nil {
return "", err
}
```
The Solution
The approach is documented: Use a date field, set the value to the date the document expires, create a TTL-Index with ExpireAfterSeconds set to 0 and the MongoDB background TTL purging process will delete the expired documents.
Notes
However, there is some fuzziness in using TTL indices. Since it would be too costly to have a process for each document which is to be expired, waiting for the expiration time and then deleting the document, MongoDB chose a different solution. There is a background process which checks for expired documents once a minute. So there is no guarantee that your documents will expire immediately at their expiration time and a document might exist up to slightly under 2 minutes longer than the set date of expiration (missing the first run because of overload or whatever and only being deleted in the next run). Note however that this only occurs under very special circumstances. Usually, your documents get deleted within the minute of their expiration.
Explanation
What we basically do here is to add a field ExpirationDate and create a TTL index which is set to check for this expiration date. To which value this ExpirationDate is set is totally up to you. Use a Factory pattern to generate Sessions or whatever.
Note that there are some caveats explained in the code below.
package main
import (
"flag"
"fmt"
"log"
"time"
mgo "gopkg.in/mgo.v2"
"gopkg.in/mgo.v2/bson"
)
const (
// SESSION_TIMEOUT is a fixed and relatively short
// timeout for demo purposes
SESSION_TIMEOUT = 1 * time.Minute
)
// Session is just a sample session struct
// with various session related data and the
// date on which a session should expire.
type Session struct {
ID bson.ObjectId `bson:"_id"`
User string
Foo string
Bar string
ExpirationDate time.Time `bson:"expirationDate"`
}
// NewSession is just a simple helper method to
// return a session with a properly set expiration time
func NewSession(user, foo, bar string) Session {
// We use a static timeout here.
// However, you can easily adapt this to use an arbitrary timeout.
return Session{
ID: bson.NewObjectId(),
User: user,
Foo: foo,
Bar: bar,
ExpirationDate: time.Now().Add(SESSION_TIMEOUT),
}
}
var (
mgohost string
mgoport int
db string
col string
)
func init() {
flag.StringVar(&mgohost, "host", "localhost", "MongoDB host")
flag.IntVar(&mgoport, "port", 27017, "MongoDB port")
flag.StringVar(&db, "db", "test", "MongoDB database")
flag.StringVar(&col, "collection", "ttltest", "MongoDB collection")
}
func main() {
flag.Parse()
c, err := mgo.Dial(fmt.Sprintf("mongodb://%s:%d/%s", mgohost, mgoport, db))
if err != nil {
log.Fatalf("Error connecting to '%s:%d/%s': %s", mgohost, mgoport, db, err)
}
// We use a goroutine here in order to make sure
// that even when EnsureIndex blocks, our program continues
go func() {
log.Println("Ensuring sessionTTL index in background")
// Request a conncetion from the pool
m := c.DB(db).Session.Copy()
defer m.Close()
// We need to set this to 1 as 0 would fail to create the TTL index.
// See https://github.com/go-mgo/mgo/issues/103 for details
// This will expire the session within the minute after ExpirationDate.
//
// The TTL purging is done once a minute only.
// See https://docs.mongodb.com/manual/core/index-ttl/#timing-of-the-delete-operation
// for details
m.DB(db).C(col).EnsureIndex(mgo.Index{ExpireAfter: 1 * time.Second, Key: []string{"expirationDate"}})
log.Println("sessionTTL index is ready")
}()
s := NewSession("mwmahlberg", "foo", "bar")
if err := c.DB(db).C(col).Insert(&s); err != nil {
log.Fatalf("Error inserting %#v into %s.%s: %s", s, db, col, err)
}
l := Session{}
if err := c.DB(db).C(col).Find(nil).One(&l); err != nil {
log.Fatalf("Could not load session from %s.%s: %s", db, col, err)
}
log.Printf("Session with ID %s loaded for user '%s' which will expire in %s", l.ID, l.User, time.Until(l.ExpirationDate))
time.Sleep(2 * time.Minute)
// Let's check if the session is still there
if n, err := c.DB(db).C(col).Count(); err != nil {
log.Fatalf("Error counting documents in %s.%s: %s", db, col, err)
} else if n > 1 {
log.Fatalf("Uups! Someting went wrong!")
}
log.Println("All sessions were expired.")
}

fetching the data from a mongodb in golang

I'm trying to fetch data from mongodb in golang using the gopkg.in/mgo.v2 driver, the format of the data is not fixed , as in few rows will be containing some fields which other rows might not.
here is the code for the same
session, err := mgo.Dial("mongodb://root:root#localhost:27017/admin")
db := session.DB("test")
fmt.Println(reflect.TypeOf(db))
CheckError(err,"errpor")
result := make(map[string]string)
//query := make(map[string]string)
//query["_id"] = "3434"
err1 := db.C("mycollection").Find(nil).One(&result)
CheckError(err1,"error")
for k := range result {
fmt.Println(k)
}
Now the data contained in the collection is { "_id" : "3434", "0" : 1 }, however the for loop gives the output as _id , shouldn't there be two keys '_id' and '0' ? or am I doing something wrong here.
oh I found the solution
the "result" variable should be of type bson.M and then you can typecast accordingly as you go deep into the nesting structure.
Give a try with the following piece of code. This will help you fetching matching records from the Database using BSON Object.
Do not forget to rename the Database name and Collection name of your MongoDB in the below code. Also needs to change the query parameter accordingly.
Happy Coding...
package main
import (
"context"
"fmt"
"time"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
// This is a user defined method to close resourses.
// This method closes mongoDB connection and cancel context.
func close(client *mongo.Client, ctx context.Context, cancel context.CancelFunc) {
defer cancel()
defer func() {
if err := client.Disconnect(ctx); err != nil {
panic(err)
}
}()
}
// This is a user defined method that returns
// a mongo.Client, context.Context,
// context.CancelFunc and error.
// mongo.Client will be used for further database
// operation. context.Context will be used set
// deadlines for process. context.CancelFunc will
// be used to cancel context and resourse
// assositated with it.
func connect(uri string) (*mongo.Client, context.Context, context.CancelFunc, error) {
ctx, cancel := context.WithTimeout(context.Background(),
30*time.Second)
client, err := mongo.Connect(ctx, options.Client().ApplyURI(uri))
return client, ctx, cancel, err
}
// query is user defined method used to query MongoDB,
// that accepts mongo.client,context, database name,
// collection name, a query and field.
// datbase name and collection name is of type
// string. query is of type interface.
// field is of type interface, which limts
// the field being returned.
// query method returns a cursor and error.
func query(client *mongo.Client, ctx context.Context, dataBase, col string, query, field interface{}) (result *mongo.Cursor, err error) {
// select database and collection.
collection := client.Database(dataBase).Collection(col)
// collection has an method Find,
// that returns a mongo.cursor
// based on query and field.
result, err = collection.Find(ctx, query,
options.Find().SetProjection(field))
return
}
func main() {
// Get Client, Context, CalcelFunc and err from connect method.
client, ctx, cancel, err := connect("mongodb://localhost:27017")
if err != nil {
panic(err)
}
// Free the resource when mainn dunction is returned
defer close(client, ctx, cancel)
// create a filter an option of type interface,
// that stores bjson objects.
var filter, option interface{}
// filter gets all document,
// with maths field greater that 70
filter = bson.D{
{"_id", bson.D{{"$eq", 3434}}},
}
// option remove id field from all documents
option = bson.D{{"_id", 0}}
// call the query method with client, context,
// database name, collection name, filter and option
// This method returns momngo.cursor and error if any.
cursor, err := query(client, ctx, "YourDataBaseName",
"YourCollectioName", filter, option)
// handle the errors.
if err != nil {
panic(err)
}
var results []bson.D
// to get bson object from cursor,
// returns error if any.
if err := cursor.All(ctx, &results); err != nil {
// handle the error
panic(err)
}
// printing the result of query.
fmt.Println("Query Reult")
for _, doc := range results {
fmt.Println(doc)
}
}