Scala script wait for mongo to complete task - mongodb

I'm writing a simple scala-based script which supposed to insert some data into Mongo collection. The problem is, that script exits before mongo finishes it's task. What is the idiomatic/best approach to deal with the problem, considering following script:
#!/usr/bin/env scalas
/***
scalaVersion := "2.12.2"
libraryDependencies ++= {
Seq(
"org.mongodb.scala" %% "mongo-scala-driver" % "2.1.0"
)
}
*/
import org.mongodb.scala._
val mongoClient: MongoClient = MongoClient("mongodb://localhost")
val database: MongoDatabase = mongoClient.getDatabase("dev")
val doc: Document = Document("name" -> "MongoDB", "type" -> "database",
"count" -> 1, "info" -> Document("x" -> 203, "y" -> 102))
val collection: MongoCollection[Document] = database.getCollection("test")
val subscription = new Observer[Completed] {
override def onNext(result: Completed): Unit = println("Inserted")
override def onError(e: Throwable): Unit = println("Failed"+e.toString)
override def onComplete(): Unit = println("Completed")
}
collection.insertOne(doc).subscribe(subscription)
The script above produces follwoing error when executed:
com.mongodb.MongoInterruptedException: Interrupted acquiring a permit to retrieve an item from the pool
However, if I add Thread.sleep(3000) in the end it completes just fine.

I recommend using Promise object to notify completion of asynchronous jobs.
http://www.scala-lang.org/api/2.12.1/scala/concurrent/Promise.html
After asynchronous jobs finishing or after timeout, the program would exit.
val promise = Promise[Boolean]
...
override def onError(e: Throwable): Unit = {
println("Failed"+e.toString)
promise.success(false)
}
override def onComplete(): Unit = {
println("Completed")
promise.success(true)
}
val future = promise.future
Await.result(future, Duration(10, java.util.concurrent.TimeUnit.SECONDS))
//after completion, the program would exit.

Related

Scala mongodb transaction: How to rollback?

There is an example of Scala mongodb transaction:
https://github.com/mongodb/mongo-scala-driver/blob/r2.4.0/driver/src/it/scala/org/mongodb/scala/DocumentationTransactionsExampleSpec.scala
But it's not clear how to rollback the transaction in case of failure.
Here is the code I copied from official example but modified a bit to make the transaction fail in the second insertion (inserting 2 documents with same ids), but problem is that the first document is persisted, and I need the WHOLE transaction to be rolled back.
import org.mongodb.scala._
import scala.concurrent.Await
import scala.concurrent.duration.Duration
object Application extends App {
val mongoClient: MongoClient = MongoClient("mongodb://localhost:27018")
val database = mongoClient.getDatabase("hr")
val employeesCollection = database.getCollection("employees")
// Implicit functions that execute the Observable and return the results
val waitDuration = Duration(5, "seconds")
implicit class ObservableExecutor[T](observable: Observable[T]) {
def execute(): Seq[T] = Await.result(observable.toFuture(), waitDuration)
}
implicit class SingleObservableExecutor[T](observable: SingleObservable[T]) {
def execute(): T = Await.result(observable.toFuture(), waitDuration)
}
updateEmployeeInfoWithRetry(mongoClient).execute()
Thread.sleep(3000)
/// -------------------------
def updateEmployeeInfo(database: MongoDatabase, observable: SingleObservable[ClientSession]): SingleObservable[ClientSession] = {
observable.map(clientSession => {
val eventsCollection = database.getCollection("events")
val transactionOptions = TransactionOptions.builder().readConcern(ReadConcern.SNAPSHOT).writeConcern(WriteConcern.MAJORITY).build()
clientSession.startTransaction(transactionOptions)
eventsCollection.insertOne(clientSession, Document("_id" -> "123", "employee" -> 3, "status" -> Document("new" -> "Inactive", "old" -> "Active")))
.subscribe((res: Completed) => println(res))
// THIS SHOULD FAIL, SINCE THERE IS ALREADY DOCUMENT WITH ID = 123, but PREVIOUS OPERATION SHOULD BE ALSO ROLLED BACK.
// I COULD NOT FIND THE WAY HOW TO ROLLBACK WHOLE TRANSACTION IF ONE OF OPERATIONS FAILED
eventsCollection.insertOne(clientSession, Document("_id" -> "123", "employee" -> 3, "status" -> Document("new" -> "Inactive", "old" -> "Active")))
.subscribe((res: Completed) => println(res))
// I'VE TRIED VARIOUS THINGS (INCLUDING CODE BELOW)
// .subscribe(new Observer[Completed] {
// override def onNext(result: Completed): Unit = println("onNext")
//
// override def onError(e: Throwable): Unit = clientSession.abortTransaction()
//
// override def onComplete(): Unit = println("complete")
// })
clientSession
})
}
def commitAndRetry(observable: SingleObservable[Completed]): SingleObservable[Completed] = {
observable.recoverWith({
case e: MongoException if e.hasErrorLabel(MongoException.UNKNOWN_TRANSACTION_COMMIT_RESULT_LABEL) => {
println("UnknownTransactionCommitResult, retrying commit operation ...")
commitAndRetry(observable)
}
case e: Exception => {
println(s"Exception during commit ...: $e")
throw e
}
})
}
def runTransactionAndRetry(observable: SingleObservable[Completed]): SingleObservable[Completed] = {
observable.recoverWith({
case e: MongoException if e.hasErrorLabel(MongoException.TRANSIENT_TRANSACTION_ERROR_LABEL) => {
println("TransientTransactionError, aborting transaction and retrying ...")
runTransactionAndRetry(observable)
}
})
}
def updateEmployeeInfoWithRetry(client: MongoClient): SingleObservable[Completed] = {
val database = client.getDatabase("hr")
val updateEmployeeInfoObservable: Observable[ClientSession] = updateEmployeeInfo(database, client.startSession())
val commitTransactionObservable: SingleObservable[Completed] =
updateEmployeeInfoObservable.flatMap(clientSession => clientSession.commitTransaction())
val commitAndRetryObservable: SingleObservable[Completed] = commitAndRetry(commitTransactionObservable)
runTransactionAndRetry(commitAndRetryObservable)
}
}
How to rollback the whole transaction if any operation failed?
From the source code of the Scala driver at https://github.com/mongodb/mongo-scala-driver/blob/r2.6.0/driver/src/main/scala/org/mongodb/scala/ClientSessionImplicits.scala
It appears that there is an abortTransaction() method defined along with commitTransaction().
In another note, currently a single replica set transaction in MongoDB 4.0 will be automatically aborted if it's not committed within 60 seconds (configurable). In the MongoDB Multi-Document ACID Transactions blog post:
By default, MongoDB will automatically abort any multi-document transaction that runs for more than 60 seconds. Note that if write volumes to the server are low, you have the flexibility to tune your transactions for a longer execution time.

Mongo Scala Driver - Can't insert in the database

I'm practicing on a project that needs a database connection, I'm using the Play Framework combine to Scala and MongoDB.
I'm also using Mongo-scala-driver and following the documentation.
I wrote the exact same code:
println("start")
val mongoClient: MongoClient = MongoClient("mongodb://localhost:27017/Sandbox")
val database: MongoDatabase = mongoClient.getDatabase("test")
val collection: MongoCollection[Document] = database.getCollection("test")
val doc: Document = Document("_id" -> 0, "name" -> "MongoDB", "type" -> "database", "count" -> 1, "info" -> Document("x" -> 203, "y" -> 102))
collection.insertOne(doc).subscribe(new Observer[Completed] {
override def onSubscribe(subscription: Subscription): Unit = println("Subscribed")
override def onNext(result: Completed): Unit = println("Inserted")
override def onError(e: Throwable): Unit = println("Failed")
override def onComplete(): Unit = println("Completed")
})
mongoClient.close()
println("end")
Nothing is inserted into the database and the only result i get from the log is this:
start
Subscribed
end
I've been looking on stackoverflow for similar subject but everything I found didn't work for me.
You try insert document in asyncronous mode.
Therefore you must define three call back function onNext onError and onComplete
But you don't give time for execute insertion.
Try append any timeout before close connection. For example simple add
Thread.sleep(1000)
before
mongoClient.close()
And you no need redefine onSubscribe()
if you not want manually control demand when you move in documents list from you requests then you no need override onSubscribe(). The default definition for onSubscrime() very usable for trivial requests. In you case you no need override him.
The next code is worked
println("start")
val mongoClient: MongoClient = MongoClient("mongodb://DB01-MongoDB:27017/Sandbox")
val database: MongoDatabase = mongoClient.getDatabase("test")
val collection: MongoCollection[Document] = database.getCollection("test")
val doc: Document = Document("_id" -> 0,
"name" -> "MongoDB",
"type" -> "database",
"count" -> 1,
"info" -> Document("x" -> 203, "y" -> 102))
collection
.insertOne(doc)
.subscribe(new Observer[Completed] {
override def onNext(result: Completed): Unit = println("Inserted")
override def onError(e: Throwable): Unit = println("Failed")
override def onComplete(): Unit = println("Completed")
})
Thread.sleep(1000)
mongoClient.close()
println("end")
}
The problem was the Observer, I imported it from org.mongodb.async.client but the good one was org.mongodb.scala.
Hope this helps someone else.
The above solution may work but you might have to trade 1 second every time you insert (or any call). Another solution is to do make use of the call back :
val insertObservable = collection.insertOne(doc)
insertObservable.subscribe(new Observer[Completed] {
override def onComplete(): Unit = mongoClient.close()
})
Once the transaction completed, the connection gets closed automatically without wasting 1 second.

No server chosen by WritableServerSelector from cluster description when inserting a document

I'm following the MongoDB Scala Drive Quick Tour guide and trying to insert a document. But I keeping seeing the following message whenever I do so
INFO: No server chosen by WritableServerSelector from cluster
description ClusterDescription{type=UNKNOWN, connectionMode=SINGLE,
serverDescriptions=[ServerDescription{address=ds155695.mlab.com:55695,
type=UNKNOWN, state=CONNECTING}]}. Waiting for 30000 ms before timing
out
Here is what my code looks like
val url: String = "mongodb://heroku_#######:##############ds155695.mlab.com:55695/heroku_#########"
val mongoClient: MongoClient = MongoClient(url)
val db: MongoDatabase = mongoClient.getDatabase("heroku_#####")
val collection: MongoCollection[Document] = db.getCollection("omens")
println(collection)
val doc: Document = Document("_id" -> 3434, "name" -> "xxxxxx", "type" -> "yyyyyy")
val observable: Observable[Completed] = collection.insertOne(doc)
observable.subscribe(new Observer[Completed] {
override def onNext(result: Completed): Unit = println("Inserted")
override def onError(e: Throwable): Unit = println("Failed")
override def onComplete(): Unit = println("Completed")
})
If I change my code to use Futures, I get the same message. However, if I use the Await to explicitly wait for a time, its works. The following is defined in the Helpers.scala as suggested by the quick tour.
trait ImplicitObservable[C] {
val observable: Observable[C]
val converter: (C) => String
def results(): Seq[C] = Await.result(observable.toFuture(), Duration(10, TimeUnit.SECONDS))
def headResult() = Await.result(observable.head(), Duration(10, TimeUnit.SECONDS))
def printResults(initial: String = ""): Unit = {
if (initial.length > 0) print(initial)
results().foreach(res => println(converter(res)))
}
def printHeadResult(initial: String = ""): Unit = println(s"${initial}${converter(headResult())}")
}
And if I do the following, it'll work and insert the document.
val result = collection.insertOne(doc).results()
But I find that a little suboptimal and want to use Futures or Observable. Can someone point what I'm doing wrong?
Here is the full stack trace
INFO: Exception in monitor thread while connecting to server ds155695.mlab.com:55695
com.mongodb.MongoInterruptedException: Opening the AsynchronousSocketChannelStream failed
at com.mongodb.connection.FutureAsyncCompletionHandler.get(FutureAsyncCompletionHandler.java:59)
at com.mongodb.connection.FutureAsyncCompletionHandler.getOpen(FutureAsyncCompletionHandler.java:44)
at com.mongodb.connection.AsynchronousSocketChannelStream.open(AsynchronousSocketChannelStream.java:62)
at com.mongodb.connection.InternalStreamConnection.open(InternalStreamConnection.java:115)
at com.mongodb.connection.DefaultServerMonitor$ServerMonitorRunnable.run(DefaultServerMonitor.java:113)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1302)
at java.util.concurrent.CountDownLatch.await(CountDownLatch.java:231)
at com.mongodb.connection.FutureAsyncCompletionHandler.get(FutureAsyncCompletionHandler.java:57)

Scala error: org.bson.codecs.configuration.CodecConfigurationException: Can't find a codec for class scala.Some

I am trying to get count of mongo query result, but I am getting error
org.bson.codecs.configuration.CodecConfigurationException: Can't find a codec for class scala.Some. Can somebody help?
This is my code:
def fetchData() = {
val mongoClient = MongoClient("mongodb://127.0.0.1")
val database = mongoClient.getDatabase("assignment")
val movieCollection = database.getCollection("movies")
val ratingCollection = database.getCollection("ratings")
val latch1 = new CountDownLatch(1)
movieCollection.find().subscribe(new Observer[Document] {
override def onError(e: Throwable): Unit = {
println("Error while fetching data")
e.printStackTrace()
}
override def onComplete(): Unit = {
latch1.countDown()
println("Completed fetching data")
}
override def onNext(movie: Document): Unit = {
if (movie.get("movieId") != null) {
ratingCollection.count(equal("movieId", movie.get("movieId"))).subscribe(new Observer[Long] {
override def onError(e: Throwable): Unit = println(s"onError: $e")
override def onNext(result: Long): Unit = { println(s"In count result : $result") }
override def onComplete(): Unit = println("onComplete")
})
}
}
})
latch1.await()
mongoClient.close()
}
I am using mongo 3.2.12 and scala -driver:
<dependency>
<groupId>org.mongodb.scala</groupId>
<artifactId>mongo-scala-driver_2.11</artifactId>
<version>2.1.0</version>
</dependency>
Use the code in this answer, and then add that codec to your codec registry. First, add
import org.bson.codecs.configuration.CodecRegistries.fromCodecs
You might already have other imports from that package already; for example, if you're using both providers, registries and codecs:
import org.bson.codecs.configuration.CodecRegistries.{fromRegistries, fromProviders, fromCodecs}
Just make sure you have everything you need imported.
Then:
val codecRegistry = fromRegistries(/* ..., */ fromCodecs(new SomeCodec()), DEFAULT_CODEC_REGISTRY)
val mongoClient = MongoClient("mongodb://127.0.0.1")
val database = mongoClient.getDatabase("assignment").withCodecRegistry(codecRegistry)
This answer is a little bit old, after losing many hours solving the same issue I write an update to it
Using Macros it's much easier now:
import org.mongodb.scala.bson.codecs._
val movieCodecProvider: CodecProvider = Macros.createCodecProviderIgnoreNone[Movie]()
val codecRegistry: CodecRegistry = fromRegistries(fromProviders(movieCodecProvider), DEFAULT_CODEC_REGISTRY)
val movieCollection: MongoCollection[Movie] = mongo.database.withCodecRegistry(codecRegistry).getCollection("movie_collection")
pay attention when you write "manual" query (i.e. query in which you are not parsing an entire Movie object, like an update) you have to handle the Some field like a plain object
so to set it to None you do
movieCollection.updateOne(
equal("_id", movie._id),
unset("foo")
)
to set it to Some
movieCollection.updateOne(
equal("_id", movie._id),
set("foo","some_value")
)
Please make sure all fields are transformed into Strings. Especially enums, where you want the field to be inserted as <your-enum>.map(_.toString).
The code that causes the exception is this
ratingCollection.count(equal("movieId", movie.get("movieId")))
Specifically movie.get(...) which has return type Option[BsonValue]. You cannot query collections with Option[T] values. Since you already checked against null, you could change the code to movie.get("movieId").get but the scala approach would be to utilize pattern matching, something akin to this.
override def onNext(movie: Document): Unit = {
movie.get("movieId") match {
case Some(movieId: BsonValue32) =>
ratingCollection.count(equal("movieId", movieId)).subscribe(new Observer[Long] {
override def onError(e: Throwable): Unit = println(s"onError: $e")
override def onNext(result: Long): Unit = { println(s"In count result : $result") }
override def onComplete(): Unit = println("onComplete")
})
case invalidId =>
println(s"invalid id ${invalidId}")
}
}
The underlying issue is how the mongo scala driver handles Option[T] monads. It's not well documented. One of the answers already provided to this question already shows how to solve this issue with querying case classes like Foo(bar: Option[BsonValue]) but be aware that it fails for other case classes such as Foo(bar: Seq[Option[BsonValue]]).
As mentioned in the answer I refer to, the createCodecProviderIgnoreNone and related codec providers only applies to full document queries, like insert, findReplace etc. When doing field operation queries you have to unpack the Option yourself. I prefer to do this using pattern matching such as shown in my example.
This works for me using the versions below:
scalaVersion := "2.13.1"
sbt.version = 1.3.8
import org.mongodb.scala.bson.ObjectId
object Person {
def apply(firstName: String, lastName: String): Person =
Person(new ObjectId(), firstName, lastName)
}
case class Person(_id: ObjectId, firstName: String, lastName: String)
import models.Person
import org.mongodb.scala.{Completed, MongoClient, MongoCollection, MongoDatabase, Observer}
import org.mongodb.scala.bson.codecs.Macros._
import org.mongodb.scala.bson.codecs.DEFAULT_CODEC_REGISTRY
import org.bson.codecs.configuration.CodecRegistries.{fromRegistries, fromProviders}
object PersonMain extends App {
val codecRegistry = fromRegistries(fromProviders(classOf[Person]), DEFAULT_CODEC_REGISTRY )
val mongoClient: MongoClient = MongoClient("mongodb://localhost")
val database: MongoDatabase = mongoClient.getDatabase("mydb").withCodecRegistry(codecRegistry)
val collection: MongoCollection[Person] = database.getCollection("people")
def addDocument(doc: Person) = {
collection.insertOne(doc)
.subscribe(new Observer[Completed] {
override def onNext(result: Completed): Unit = println(s"Inserted $doc")
override def onError(e: Throwable): Unit = println(s"Failed $e")
override def onComplete(): Unit = println(s"Completed inserting $doc")
})
}
addDocument(Person("name", "surname"))
mongoClient.close()
}

Kafka tests failing intermittently if not starting/stopping kafka each time

I'm trying to run some integration tests for a data stream using an embedded kafka cluster. When executing all the tests in a different environment than my local, the tests are failing due to some internal state that's not removed properly.
I can get the all the tests running on the non-local environment when I start/stop the kafka cluster before/after each test but I only want to start and stop the cluster once, at the beginning and at the end of the execution of my suite of tests.
I tried to remove the local streams state but that didn't seem to work:
override protected def afterEach(): Unit = KStreamTestUtils.purgeLocalStreamsState(properties)
Is there a way to get my suit of tests running without having to start/stop cluster each time?
Right below there are the relevant classes.
class TweetStreamProcessorSpec extends FeatureSpec
with MockFactory with GivenWhenThen with Eventually with BeforeAndAfterEach with BeforeAndAfterAll {
val CLUSTER: EmbeddedKafkaCluster = new EmbeddedKafkaCluster
val TEST_TOPIC: String = "test_topic"
val properties = new Properties()
override def beforeAll(): Unit = {
CLUSTER.start()
CLUSTER.createTopic(TEST_TOPIC, 1, 1)
}
override def afterAll(): Unit = CLUSTER.stop()
// if uncommenting these lines tests works
// override def afterEach(): Unit = CLUSTER.stop()
// override protected def beforeEach(): Unit = CLUSTER.start()
def createProducer: KafkaProducer[String, TweetEvent] = {
val properties = Map(
KEY_SERIALIZER_CLASS_CONFIG -> classOf[StringSerializer].getName,
VALUE_SERIALIZER_CLASS_CONFIG -> classOf[ReflectAvroSerializer[TweetEvent]].getName,
BOOTSTRAP_SERVERS_CONFIG -> CLUSTER.bootstrapServers(),
SCHEMA_REGISTRY_URL_CONFIG -> CLUSTER.schemaRegistryUrlForcedToLocalhost()
)
new KafkaProducer[String, TweetEvent](properties)
}
def kafkaConsumerSettings: KafkaConfig = {
val bootstrapServers = CLUSTER.bootstrapServers()
val schemaRegistryUrl = CLUSTER.schemaRegistryUrlForcedToLocalhost()
val zookeeper = CLUSTER.zookeeperConnect()
KafkaConfig(
ConfigFactory.parseString(
s"""
akka.kafka.bootstrap.servers = "$bootstrapServers"
akka.kafka.schema.registry.url = "$schemaRegistryUrl"
akka.kafka.zookeeper.servers = "$zookeeper"
akka.kafka.topic-name = "$TEST_TOPIC"
akka.kafka.consumer.kafka-clients.key.deserializer = org.apache.kafka.common.serialization.StringDeserializer
akka.kafka.consumer.kafka-clients.value.deserializer = ${classOf[ReflectAvroDeserializer[TweetEvent]].getName}
akka.kafka.consumer.kafka-clients.client.id = client1
akka.kafka.consumer.wakeup-timeout=20s
akka.kafka.consumer.max-wakeups=10
""").withFallback(ConfigFactory.load()).getConfig("akka.kafka")
)
}
feature("Logging tweet data from kafka topic") {
scenario("log id and payload when consuming a update tweet event") {
publishEventsToKafka(List(upTweetEvent))
val logger = Mockito.mock(classOf[Logger])
val pipeline = new TweetStreamProcessor(kafkaConsumerSettings, logger)
pipeline.start
eventually(timeout(Span(5, Seconds))) {
Mockito.verify(logger, Mockito.times(1)).info(s"updating tweet uuid=${upTweetEvent.getUuid}, payload=${upTweetEvent.getPayload}")
}
pipeline.stop
}
scenario("log id when consuming a delete tweet event") {
publishEventsToKafka(List(delTweetEvent))
val logger = Mockito.mock(classOf[Logger])
val pipeline = new TweetStreamProcessor(kafkaConsumerSettings, logger)
pipeline.start
eventually(timeout(Span(5, Seconds))) {
Mockito.verify(logger, Mockito.times(1)).info(s"deleting tweet uuid=${delTweetEvent.getUuid}")
}
pipeline.stop
}
}
}
class TweetStreamProcessor(kafkaConfig: KafkaConfig, logger: Logger)
extends Lifecycle with TweetStreamProcessor with Logging {
private var control: Control = _
private val valueDeserializer: Option[Deserializer[TweetEvent]] = None
// ...
def tweetsSource(implicit mat: Materializer): Source[CommittableMessage[String, TweetEvent], Control] =
Consumer.committableSource(tweetConsumerSettings, Subscriptions.topics(kafkaConfig.topicName))
override def start: Future[Unit] = {
control = tweetsSource(materializer)
.mapAsync(1) { msg =>
logTweetEvent(msg.record.value())
.map(_ => msg.committableOffset)
}.batch(max = 20, first => CommittableOffsetBatch.empty.updated(first)) { (batch, elem) =>
batch.updated(elem)
}
.mapAsync(3)(_.commitScaladsl())
.to(Sink.ignore)
.run()
Future.successful()
}
override def stop: Future[Unit] = {
control.shutdown()
.map(_ => Unit)
}
}
Any help over this would be much appreciated? Thanks in advance.