I am working with Intellij Idea, in a Scala worksheet. I want to create a encoder for a scala case class. From various posts on internet I found the suggestion to use Encoders.product. But it never worked for me.
The following code
import org.apache.spark.sql.*
val spark: SparkSession =
SparkSession
.builder()
.appName("test")
.master("local")
.getOrCreate()
import scala3encoders.given
case class classa(i: Int, j: Int, s: String)
val enc = Encoders.product[classa]
keep throwing error:
-- Error: ----------------------------------------------------------------------
1 |val enc = Encoders.product[classa]
| ^
| No TypeTag available for classa
1 error found
Does anyone know what's going on there?
The content of build.sbt file is:
scalaVersion := "3.1.3"
scalacOptions ++= Seq("-language:implicitConversions", "-deprecation")
libraryDependencies ++= Seq(
excludes(("org.apache.spark" %% "spark-core" % "3.2.0").cross(CrossVersion.for3Use2_13)),
excludes(("org.apache.spark" %% "spark-sql" % "3.2.0").cross(CrossVersion.for3Use2_13)),
excludes("io.github.vincenzobaz" %% "spark-scala3" % "0.1.3"),
"org.scalameta" %% "munit" % "0.7.26" % Test
)
//netty-all replaces all these excludes
def excludes(m: ModuleID): ModuleID =
m.exclude("io.netty", "netty-common").
exclude("io.netty", "netty-handler").
exclude("io.netty", "netty-transport").
exclude("io.netty", "netty-buffer").
exclude("io.netty", "netty-codec").
exclude("io.netty", "netty-resolver").
exclude("io.netty", "netty-transport-native-epoll").
exclude("io.netty", "netty-transport-native-unix-common").
exclude("javax.xml.bind", "jaxb-api").
exclude("jakarta.xml.bind", "jaxb-api").
exclude("javax.activation", "activation").
exclude("jakarta.annotation", "jakarta.annotation-api").
exclude("javax.annotation", "javax.annotation-api")
// Without forking, ctrl-c doesn't actually fully stop Spark
run / fork := true
Test / fork := true
Encoders.product[classa] is a Scala 2 thing. This method accepts an implicit TypeTag. There are no TypeTags in Scala 3. In Scala 3 the library maintainers propose to work in the following way:
https://github.com/vincenzobaz/spark-scala3/blob/main/examples/src/main/scala/sql/StarWars.scala
package sql
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.{Dataset, DataFrame, SparkSession}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
object StarWars extends App:
val spark = SparkSession.builder().master("local").getOrCreate
import spark.implicits.localSeqToDatasetHolder
import scala3encoders.given
extension [T: Encoder] (seq: Seq[T])
def toDS: Dataset[T] =
localSeqToDatasetHolder(seq).toDS
case class Friends(name: String, friends: String)
val df: Dataset[Friends] = Seq(
("Yoda", "Obi-Wan Kenobi"),
("Anakin Skywalker", "Sheev Palpatine"),
("Luke Skywalker", "Han Solo, Leia Skywalker"),
("Leia Skywalker", "Obi-Wan Kenobi"),
("Sheev Palpatine", "Anakin Skywalker"),
("Han Solo", "Leia Skywalker, Luke Skywalker, Obi-Wan Kenobi, Chewbacca"),
("Obi-Wan Kenobi", "Yoda, Qui-Gon Jinn"),
("R2-D2", "C-3PO"),
("C-3PO", "R2-D2"),
("Darth Maul", "Sheev Palpatine"),
("Chewbacca", "Han Solo"),
("Lando Calrissian", "Han Solo"),
("Jabba", "Boba Fett")
).toDS.map((n,f) => Friends(n, f))
val friends = df.as[Friends]
friends.show()
case class FriendsMissing(who: String, friends: Option[String])
val dsMissing: Dataset[FriendsMissing] = Seq(
("Yoda", Some("Obi-Wan Kenobi")),
("Anakin Skywalker", Some("Sheev Palpatine")),
("Luke Skywalker", Option.empty[String]),
("Leia Skywalker", Some("Obi-Wan Kenobi")),
("Sheev Palpatine", Some("Anakin Skywalker")),
("Han Solo", Some("Leia Skywalker, Luke Skywalker, Obi-Wan Kenobi"))
).toDS
.map((a, b) => FriendsMissing(a, b))
dsMissing.show()
case class Character(
name: String,
height: Int,
weight: Option[Int],
eyecolor: Option[String],
haircolor: Option[String],
jedi: String,
species: String
)
val characters: Dataset[Character] = spark.sqlContext
.read
.option("header", "true")
.option("delimiter", ";")
.option("inferSchema", "true")
.csv("StarWars.csv")
.as[Character]
characters.show()
val sw_df = characters.join(friends, Seq("name"))
sw_df.show()
case class SW(
name: String,
height: Int,
weight: Option[Int],
eyecolor: Option[String],
haircolor: Option[String],
jedi: String,
species: String,
friends: String
)
val sw_ds = sw_df.as[SW]
So if you really need Encoders.product[classa] compile this part of your code with Scala 2
src/main/scala/App.scala
// this is Scala 3
object App {
def main(args: Array[String]): Unit = {
println(App1.schema)
// Seq(StructField(i,IntegerType,false), StructField(j,IntegerType,false), StructField(s,StringType,true))
}
}
scala2/src/main/scala/App1.scala
import org.apache.spark.sql._
// this is Scala 2
object App1 {
val schema = Encoders.product[classa].schema
}
common/src/main/scala/classa.scala
case class classa(i: Int, j: Int, s: String)
build.sbt
lazy val sparkCore = "org.apache.spark" %% "spark-core" % "3.2.0"
lazy val sparkSql = "org.apache.spark" %% "spark-sql" % "3.2.0"
lazy val scala3V = "3.1.3"
lazy val scala2V = "2.13.8"
lazy val root = project
.in(file("."))
.settings(
scalaVersion := scala3V,
scalacOptions ++= Seq("-language:implicitConversions", "-deprecation"),
libraryDependencies ++= Seq(
excludes(sparkCore.cross(CrossVersion.for3Use2_13)),
excludes(sparkSql.cross(CrossVersion.for3Use2_13)),
excludes("io.github.vincenzobaz" %% "spark-scala3" % "0.1.3"),
"org.scalameta" %% "munit" % "0.7.26" % Test
)
)
.dependsOn(scala2, common)
lazy val scala2 = project
.settings(
scalaVersion := scala2V,
libraryDependencies ++= Seq(
sparkCore,
sparkSql
)
)
.dependsOn(common)
lazy val common = project
.settings(
scalaVersion := scala3V,
crossScalaVersions := Seq(scala2V, scala3V)
)
//netty-all replaces all these excludes
def excludes(m: ModuleID): ModuleID =
m.exclude("io.netty", "netty-common").
exclude("io.netty", "netty-handler").
exclude("io.netty", "netty-transport").
exclude("io.netty", "netty-buffer").
exclude("io.netty", "netty-codec").
exclude("io.netty", "netty-resolver").
exclude("io.netty", "netty-transport-native-epoll").
exclude("io.netty", "netty-transport-native-unix-common").
exclude("javax.xml.bind", "jaxb-api").
exclude("jakarta.xml.bind", "jaxb-api").
exclude("javax.activation", "activation").
exclude("jakarta.annotation", "jakarta.annotation-api").
exclude("javax.annotation", "javax.annotation-api")
// Without forking, ctrl-c doesn't actually fully stop Spark
run / fork := true
Test / fork := true
Alternatively, in Scala 3 you can calculate TypeTag with Scala 2 runtime compilation (reflective Toolbox): How to compile and execute scala code at run-time in Scala3?
Scala 2 macros don't work, so we can't do runtime.currentMirror or q"..." but can do universe.runtimeMirror, tb.parse. It turns out this still works in Scala 3.
// this is Scala 3
import org.apache.spark.sql.*
import scala.tools.reflect.ToolBox
import scala.reflect.runtime.universe
import scala.reflect.runtime.universe.*
import mypackage.classa
val rm = universe.runtimeMirror(getClass.getClassLoader)
val tb = rm.mkToolBox()
val typeTag = tb.eval(tb.parse(
"scala.reflect.runtime.universe.typeTag[mypackage.classa]"
)).asInstanceOf[TypeTag[classa]]
Encoders.product[classa](typeTag).schema
// Seq(StructField(i,IntegerType,false), StructField(j,IntegerType,false), StructField(s,StringType,true))
build.sbt
lazy val sparkCore = "org.apache.spark" %% "spark-core" % "3.2.0"
lazy val sparkSql = "org.apache.spark" %% "spark-sql" % "3.2.0"
lazy val scala3V = "3.1.3"
lazy val scala2V = "2.13.8"
lazy val root = project
.in(file("."))
.settings(
scalaVersion := scala3V,
scalacOptions ++= Seq(
"-language:implicitConversions",
"-deprecation"
),
libraryDependencies ++= Seq(
excludes(sparkCore.cross(CrossVersion.for3Use2_13)),
excludes(sparkSql.cross(CrossVersion.for3Use2_13)),
excludes("io.github.vincenzobaz" %% "spark-scala3" % "0.1.3"),
"org.scalameta" %% "munit" % "0.7.26" % Test,
scalaOrganization.value % "scala-reflect" % scala2V,
scalaOrganization.value % "scala-compiler" % scala2V,
),
)
def excludes(m: ModuleID): ModuleID =
m.exclude("io.netty", "netty-common").
exclude("io.netty", "netty-handler").
exclude("io.netty", "netty-transport").
exclude("io.netty", "netty-buffer").
exclude("io.netty", "netty-codec").
exclude("io.netty", "netty-resolver").
exclude("io.netty", "netty-transport-native-epoll").
exclude("io.netty", "netty-transport-native-unix-common").
exclude("javax.xml.bind", "jaxb-api").
exclude("jakarta.xml.bind", "jaxb-api").
exclude("javax.activation", "activation").
exclude("jakarta.annotation", "jakarta.annotation-api").
exclude("javax.annotation", "javax.annotation-api")
// Without forking, ctrl-c doesn't actually fully stop Spark
run / fork := true
Test / fork := true
One more option is to make a type tag manually
import scala.reflect.runtime.universe.*
import org.apache.spark.sql.*
val rm = runtimeMirror(getClass.getClassLoader)
val tpe: Type = internal.typeRef(internal.typeRef(NoType, rm.staticPackage("mypackage"), Nil), rm.staticClass("mypackage.classa"), Nil)
val ttg: TypeTag[_] = createTypeTag(rm, tpe)
Encoders.product[classa](ttg.asInstanceOf[TypeTag[classa]]).schema
// Seq(StructField(i,IntegerType,false), StructField(j,IntegerType,false), StructField(s,StringType,true))
package mypackage
case class classa(i: Int, j: Int, s: String)
import scala.reflect.api
inline def createTypeTag(mirror: api.Mirror[_ <: api.Universe with Singleton], tpe: mirror.universe.Type): mirror.universe.TypeTag[_] = {
mirror.universe.TypeTag.apply(mirror.asInstanceOf[api.Mirror[mirror.universe.type]],
new api.TypeCreator {
override def apply[U <: api.Universe with Singleton](m: api.Mirror[U]): m.universe.Type = {
tpe.asInstanceOf[m.universe.Type]
}
}
)
}
scalaVersion := "3.1.3"
libraryDependencies ++= Seq(
scalaOrganization.value % "scala-reflect" % "2.13.8",
"org.apache.spark" %% "spark-core" % "3.2.0" cross CrossVersion.for3Use2_13 exclude("org.scala-lang.modules", "scala-xml_2.13"),
"org.apache.spark" %% "spark-sql" % "3.2.0" cross CrossVersion.for3Use2_13 exclude("org.scala-lang.modules", "scala-xml_2.13"),
)
inline is to make a type tag serializable/deserializable.
In scala 2.12, why none of the TypeTag created in runtime is serializable?
How to create a TypeTag manually? (answer)
Get a TypeTag from a Type?
In Scala, how to create a TypeTag from a type that is serializable?
What causes ClassCastException when serializing TypeTags?
Get TypeTag[A] from Class[A]
Using the Scala play library I'm attempting to parse the string :
var str = "{\"payload\": \"[{\\\"test\\\":\\\"123\\\",\\\"tester\\\":\\\"456\\\"}," +
"{\\\"test1\\\":\\\"1234\\\",\\\"tester2\\\":\\\"4567\\\"}]\"}";
into a list of Payload classes using code below :
import play.api.libs.json._
object TestParse extends App {
case class Payload(test : String , tester : String)
object Payload {
implicit val jsonFormat: Format[Payload] = Json.format[Payload]
}
var str = "{\"payload\": \"[{\\\"test\\\":\\\"123\\\",\\\"tester\\\":\\\"456\\\"}," +
"{\\\"test1\\\":\\\"1234\\\",\\\"tester2\\\":\\\"4567\\\"}]\"}";
println((Json.parse(str) \ "payload").as[List[Payload]])
}
build.sbt :
name := "akka-streams"
version := "0.1"
scalaVersion := "2.12.8"
lazy val akkaVersion = "2.5.19"
lazy val scalaTestVersion = "3.0.5"
libraryDependencies ++= Seq(
"com.typesafe.akka" %% "akka-stream" % akkaVersion,
"com.typesafe.akka" %% "akka-stream-testkit" % akkaVersion,
"com.typesafe.akka" %% "akka-testkit" % akkaVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion
)
// https://mvnrepository.com/artifact/com.typesafe.play/play-json
libraryDependencies += "com.typesafe.play" %% "play-json" % "2.10.0-RC6"
It fails with exception :
Exception in thread "main" play.api.libs.json.JsResultException: JsResultException(errors:List((,List(JsonValidationError(List("" is not an object),WrappedArray())))))
Is the case class structure incorrect ?
I've updated the code to :
import play.api.libs.json._
object TestParse extends App {
import TestParse.Payload.jsonFormat
object Payload {
implicit val jsonFormat: Format[RootInterface] = Json.format[RootInterface]
}
case class Payload (
test: Option[String],
tester: Option[String]
)
case class RootInterface (
payload: List[Payload]
)
val str = """{"payload": [{"test":"123","tester":"456"},{"test1":"1234","tester2":"4567"}]}"""
println(Json.parse(str).as[RootInterface])
}
which returns error :
No instance of play.api.libs.json.Format is available for scala.collection.immutable.List[TestParse.Payload] in the implicit scope (Hint: if declared in the same file, make sure it's declared before)
implicit val jsonFormat: Format[RootInterface] = Json.format[RootInterface]
This performs the task but there are cleaner solutions :
import akka.actor.ActorSystem
import akka.stream.scaladsl.{Flow, Sink, Source}
import org.scalatest.Assertions._
import spray.json.{JsObject, JsonParser}
import scala.concurrent.Await
import scala.concurrent.duration.DurationInt
object TestStream extends App {
implicit val actorSystem = ActorSystem()
val mapperFlow = Flow[JsObject].map(x => {
x.fields.get("payload").get.toString().replace("{", "")
.replace("}", "")
.replace("[", "")
.replace("]", "")
.replace("\"", "")
.replace("\\", "")
.split(":").map(m => m.split(","))
.toList
.flatten
.grouped(4)
.map(m => Test(m(1), m(3).toDouble))
.toList
})
val str = """{"payload": [{"test":"123","tester":"456"},{"test":"1234","tester":"4567"}]}"""
case class Test(test: String, tester: Double)
val graph = Source.repeat(JsonParser(str).asJsObject())
.take(3)
.via(mapperFlow)
.mapConcat(identity)
.runWith(Sink.seq)
val result = Await.result(graph, 3.seconds)
println(result)
assert(result.length == 6)
assert(result(0).test == "123")
assert(result(0).tester == 456 )
assert(result(1).test == "1234")
assert(result(1).tester == 4567 )
assert(result(2).test == "123")
assert(result(2).tester == 456 )
assert(result(3).test == "1234")
assert(result(3).tester == 4567 )
}
Alternative, ioiomatic Scala answers are welcome.
Below is a Scala test of websocket:
import java.util.function.Consumer
import play.shaded.ahc.org.asynchttpclient.AsyncHttpClient
import play.api.inject.guice.GuiceApplicationBuilder
import play.api.test.{Helpers, TestServer, WsTestClient}
import scala.compat.java8.FutureConverters
import scala.concurrent.Await
import scala.concurrent.duration._
import org.scalatestplus.play._
class SocketTest extends PlaySpec with ScalaFutures {
"HomeController" should {
"reject a websocket flow if the origin is set incorrectly" in WsTestClient.withClient { client =>
// Pick a non standard port that will fail the (somewhat contrived) origin check...
lazy val port: Int = 31337
val app = new GuiceApplicationBuilder().build()
Helpers.running(TestServer(port, app)) {
val myPublicAddress = s"localhost:$port"
val serverURL = s"ws://$myPublicAddress/ws"
val asyncHttpClient: AsyncHttpClient = client.underlying[AsyncHttpClient]
val webSocketClient = new WebSocketClient(asyncHttpClient)
try {
val origin = "ws://example.com/ws"
val consumer: Consumer[String] = new Consumer[String] {
override def accept(message: String): Unit = println(message)
}
val listener = new WebSocketClient.LoggingListener(consumer)
val completionStage = webSocketClient.call(serverURL, origin, listener)
val f = FutureConverters.toScala(completionStage)
Await.result(f, atMost = 1000.millis)
listener.getThrowable mustBe a[IllegalStateException]
} catch {
case e: IllegalStateException =>
e mustBe an[IllegalStateException]
case e: java.util.concurrent.ExecutionException =>
val foo = e.getCause
foo mustBe an[IllegalStateException]
}
}
}
}
}
But compile is failing on line import org.scalatestplus.play._ with error :
Cannot resolve symbol scalatestplus
From https://www.playframework.com/documentation/2.8.x/ScalaTestingWithScalaTest I have added scalatest and play to build:
build.sbt:
name := "testproject"
version := "1.0"
lazy val `testproject` = (project in file(".")).enablePlugins(PlayScala)
resolvers += "scalaz-bintray" at "https://dl.bintray.com/scalaz/releases"
resolvers += "Akka Snapshot Repository" at "https://repo.akka.io/snapshots/"
scalaVersion := "2.12.2"
libraryDependencies ++= Seq( jdbc , ehcache , ws , guice , specs2 % Test)
// https://mvnrepository.com/artifact/com.typesafe.scala-logging/scala-logging
libraryDependencies += "com.typesafe.scala-logging" %% "scala-logging" % "3.9.2"
libraryDependencies ++= Seq(
"org.scalatestplus.play" %% "scalatestplus-play" % "3.0.0" % "test"
)
unmanagedResourceDirectories in Test <+= baseDirectory ( _ /"target/web/public/test" )
I've tried rebuilding the project and module in IntelliJ "build" option and "Build Option" when I right click on build.sbt but the import is not found.
sbt dist from Intellij "sbt shell" then File -> "Invalidate caches" with restart of IntelliJ seems to fix the issue
:Invalidate caches screenshot
I'm using slick-pg which adds support(with implicits) for the List and DateTime types in slick.
Unfortunately I cannot use List[DateTime] - slick does not understand composition of those types, but I've checked that both work correctly on their own(for example List[Int] and DateTime).
Is there a way to easily compose those two implicits?
1. Try to add
implicit def dateTimeList =
MappedColumnType.base[List[DateTime], List[Timestamp]](
_.map(dt => new Timestamp(dt.getMillis)),
_.map(ts => new DateTime(ts.getTime))
)
Just in case, the whole code that compiles:
import java.sql.Timestamp
import org.joda.time.DateTime
import slick.jdbc.PostgresProfile.api._
import slick.lifted.ProvenShape
import slick.basic.Capability
import slick.jdbc.JdbcCapabilities
import com.github.tototoshi.slick.PostgresJodaSupport._
import com.github.tminglei.slickpg._
object App {
trait MyPostgresProfile extends ExPostgresProfile
with PgArraySupport
with PgDate2Support
with PgRangeSupport
with PgHStoreSupport
// with PgPlayJsonSupport
with PgSearchSupport
// with PgPostGISSupport
with PgNetSupport
with PgLTreeSupport {
def pgjson = "jsonb" // jsonb support is in postgres 9.4.0 onward; for 9.3.x use "json"
// Add back `capabilities.insertOrUpdate` to enable native `upsert` support; for postgres 9.5+
override protected def computeCapabilities: Set[Capability] =
super.computeCapabilities + /*JdbcProfile.capabilities.insertOrUpdate*/ JdbcCapabilities.insertOrUpdate
override val api = MyAPI
object MyAPI extends API with ArrayImplicits
with DateTimeImplicits
// with JsonImplicits
with NetImplicits
with LTreeImplicits
with RangeImplicits
with HStoreImplicits
with SearchImplicits
with SearchAssistants {
implicit val strListTypeMapper = new SimpleArrayJdbcType[String]("text").to(_.toList)
// implicit val playJsonArrayTypeMapper =
// new AdvancedArrayJdbcType[JsValue](pgjson,
// (s) => utils.SimpleArrayUtils.fromString[JsValue](Json.parse(_))(s).orNull,
// (v) => utils.SimpleArrayUtils.mkString[JsValue](_.toString())(v)
// ).to(_.toList)
}
}
object MyPostgresProfile extends MyPostgresProfile
import MyPostgresProfile.api._
// This can be used instead of slick-joda-mapper library
// implicit def dateTime =
// MappedColumnType.base[DateTime, Timestamp](
// dt => new Timestamp(dt.getMillis),
// ts => new DateTime(ts.getTime)
// )
implicit def dateTimeList =
MappedColumnType.base[List[DateTime], List[Timestamp]](
_.map(dt => new Timestamp(dt.getMillis)),
_.map(ts => new DateTime(ts.getTime))
)
case class Record(id: Int, name: String, friends: List[Int], registered: DateTime, visits: List[DateTime])
class RecordTable(tag: Tag) extends Table[Record](tag, Some("public"), "records") {
def id: Rep[Int] = column[Int]("id", O.PrimaryKey, O.AutoInc)
def name: Rep[String] = column[String]("name")
def friends: Rep[List[Int]] = column[List[Int]]("friends")
def registered: Rep[DateTime] = column[DateTime]("registered")
def visits: Rep[List[DateTime]] = column[List[DateTime]]("visits")
def * : ProvenShape[Record] = (id, name, friends, registered, visits) <> (Record.tupled, Record.unapply)
}
val records: TableQuery[RecordTable] = TableQuery[RecordTable]
}
build.sbt
name := "slickdemo"
version := "0.1"
scalaVersion := "2.12.3"
libraryDependencies += "com.typesafe.slick" %% "slick" % "3.2.1"
libraryDependencies += "org.slf4j" % "slf4j-nop" % "1.7.25"
libraryDependencies += "com.typesafe.slick" %% "slick-hikaricp" % "3.2.1"
libraryDependencies += "org.postgresql" % "postgresql" % "42.1.4"
libraryDependencies += "com.github.tminglei" %% "slick-pg" % "0.15.3"
libraryDependencies += "joda-time" % "joda-time" % "2.9.9"
libraryDependencies += "org.joda" % "joda-convert" % "1.9.2"
libraryDependencies += "com.github.tototoshi" % "slick-joda-mapper_2.12" % "2.3.0"
Based on answer and documentation.
2. Alternatively you can add
implicit val dateTimeArrayTypeMapper =
new AdvancedArrayJdbcType[DateTime]("timestamp",
(s) => utils.SimpleArrayUtils.fromString[DateTime](DateTime.parse)(s).orNull,
(v) => utils.SimpleArrayUtils.mkString[DateTime](_.toString)(v)
).to(_.toList)
after strListTypeMapper and playJsonArrayTypeMapper.