Tag: scala

Scala for Java developers – decompiles Scala classes to Java byte code

Classes in scala (decompiled to Java byte code to see generated methods)

scala> me@MacBook:~$ scala
Welcome to Scala 2.11.12 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_192).
Type in expressions for evaluation. Or try :help.

scala> class Person(name: String, age: Int) {
     | println("In default constructor") 
     | override def toString = "Person(name=" + name + ",age=" + age + ")"
     | }
defined class Person

scala> new Person("Bob",18)
In default constructor
res0: Person = Person(name=Bob,age=18)

scala> :javap -p Person
Compiled from ""
public class Person {
  private final java.lang.String name;
  private final int age;
  public java.lang.String toString();
  public Person(java.lang.String, int);
}


scala> :paste
// Entering paste mode (ctrl-D to finish)

class Person(name: String, age: Int) {
  override def toString = "Person(name=" + name + ",age=" + age + ")"
}

object Person {
  def apply(name: String, age: Int) = {   // create factory method(s) using apply method(s)
    new Person(name,age)
  }
}

// Exiting paste mode, now interpreting.

defined class Person
defined object Person   // companion object

scala> new Person("Alice",19)
res0: Person = Person(name=Alice,age=19)

scala> Person.apply("Alice", 19)         // access companion object methods by object-name.method-name
res1: Person = Person(name=Alice,age=19)

scala> Person("Alice", 19)               // can even skip typing apply method
res2: Person = Person(name=Alice,age=19)

scala> :javap -p Person$
Compiled from ""
public class Person$ {
  public static final Person$ MODULE$;
  public static {};
  public Person apply(java.lang.String, int);
  public Person$();
}

scala> class Person(val name: String, val age: Int) {
     | println("In default constructor") 
     | override def toString = "Person(name=" + name + ",age=" + age + ")"
     | }
defined class Person

scala> :javap -p Person
Compiled from ""
public class Person {
  private final java.lang.String name;
  private final int age;
  public java.lang.String name();
  public int age();
  public java.lang.String toString();
  public Person(java.lang.String, int);
}

scala> new Person("Bob",18)
In default constructor
res1: Person = Person(name=Bob,age=18)

scala> res1.name
res2: String = Bob

scala> res1.name()
:13: error: not enough arguments for method apply: (index: Int)Char in class StringOps.
Unspecified value parameter index.
       res1.name()
                ^

scala> class Person(var name: String, var age: Int) {
     | override def toString = "Person(name=" + name + ",age=" + age + ")"
     | }
defined class Person

scala> :javap -p Person
Compiled from ""
public class Person {
  private java.lang.String name;
  private int age;
  public java.lang.String name();
  public void name_$eq(java.lang.String);
  public int age();
  public void age_$eq(int);
  public java.lang.String toString();
  public Person(java.lang.String, int);
}

scala> new Person("Bob",18)
res4: Person = Person(name=Bob,age=18)

scala> res4.name_=("Bobby")

scala> res4
res6: Person = Person(name=Bobby,age=18)

scala> res4.name = "Bobby2"
res4.name: String = Bobby2

scala> res4
res7: Person = Person(name=Bobby2,age=18)

scala> case class Person(var name: String, var age: Int) // var to add setter methods
defined class Person

scala> new Person("Alice",17)
res0: Person = Person(Alice,17) // case class has by default toString with fields (instead byte address)

scala> Person("Bobby",18)  // automatically added companion object with apply method
res1: Person = Person(Bobby,18)

scala> :javap -p Person
Compiled from ""
public class Person implements scala.Product,scala.Serializable {
  private java.lang.String name;
  private int age;
  public java.lang.String name();
  public void name_$eq(java.lang.String);  // var to add setter methods
  public int age();
  public void age_$eq(int);
  public Person copy(java.lang.String, int);
  public java.lang.String copy$default$1();
  public int copy$default$2();
  public java.lang.String productPrefix();
  public int productArity();
  public java.lang.Object productElement(int);
  public scala.collection.Iterator productIterator();
  public boolean canEqual(java.lang.Object);
  public int hashCode();
  public java.lang.String toString();
  public boolean equals(java.lang.Object);
  public Person(java.lang.String, int);
}

scala> :javap -p scala.Serializable
Compiled from "Serializable.scala"
public interface scala.Serializable extends java.io.Serializable {
}

scala> :javap -p java.io.Serializable
Compiled from "Serializable.java"
public interface java.io.Serializable {
}

scala> :javap -p scala.Product
Compiled from "Product.scala"
public interface scala.Product extends scala.Equals {
  public abstract java.lang.Object productElement(int);
  public abstract int productArity();
  public abstract scala.collection.Iterator productIterator();
  public abstract java.lang.String productPrefix();
}

scala> :javap -p Person$  // automatically added companion object with apply method
Compiled from ""
public class Person$ extends scala.runtime.AbstractFunction2 implements scala.Serializable {
  public static final Person$ MODULE$;
  public static {};
  public final java.lang.String toString();
  public Person apply(java.lang.String, int);
  public scala.Option<scala.Tuple2> unapply(Person);
  private java.lang.Object readResolve();
  public java.lang.Object apply(java.lang.Object, java.lang.Object);
  public Person$();
}

scala> case class Person(name: String, age: Int)  // no var before args - no setters generated
defined class Person

scala> new Person("Alice",17)
res0: Person = Person(Alice,17)

scala> Person("Bobby",18)
res1: Person = Person(Bobby,18)

scala> :javap -p Person
Compiled from ""
public class Person implements scala.Product,scala.Serializable {
  private final java.lang.String name;
  private final int age;
  public java.lang.String name();
  public int age();
  public Person copy(java.lang.String, int);
  public java.lang.String copy$default$1();
  public int copy$default$2();
  public java.lang.String productPrefix();
  public int productArity();
  public java.lang.Object productElement(int);
  public scala.collection.Iterator productIterator();
  public boolean canEqual(java.lang.Object);
  public int hashCode();
  public java.lang.String toString();
  public boolean equals(java.lang.Object);
  public Person(java.lang.String, int);
}

scala> :javap -p Person$
Compiled from ""
public class Person$ extends scala.runtime.AbstractFunction2 implements scala.Serializable {
  public static final Person$ MODULE$;
  public static {};
  public final java.lang.String toString();
  public Person apply(java.lang.String, int);
  public scala.Option<scala.Tuple2> unapply(Person);
  private java.lang.Object readResolve();
  public java.lang.Object apply(java.lang.Object, java.lang.Object);
  public Person$();
}

scala> case class Person(name: String, age: Int) { // override constructor and toString method in case class
     | println("In my constructor")
     | override def toString = name + age
     | }
defined class Person

scala> new Person("a",1)
In my constructor
res4: Person = a1

 

Advertisements

Apache spark rdd / dataframe re-calculation

package com.bawi.spark

import java.io.File
import java.nio.file.{Files, Paths}

import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}

object TestRecalculation {
  def main(args: Array[String]): Unit = {

    val output = if (args.length > 0) args(0) else "numbers"
    
    val sparkConf = new SparkConf().setAppName(getClass.getName)
    if (isLocal()) {
      sparkConf.setMaster("local[*]")
      val file = new File(output)
      if (file.exists())
        FileUtils.deleteDirectory(file)
    }

    val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
    val sparkContext = sparkSession.sparkContext

    val rdd: RDD[Int] = sparkContext.parallelize(Seq(1, 2, 3))
    val rdd2: RDD[Int] = rdd.map(n => {
      println(s"processing: $n")
      n
    })
    import sparkSession.implicits._
    val df: DataFrame = rdd2.toDF("numbers")

    df.write.json(output)

    df.show() // re-calculates

    sparkSession.close()
  }

  def isMac(): Boolean = {
    if (System.getProperty("os.name").contains("Mac")) true else false
  }

  def isUbuntu(): Boolean = {
    if (System.getProperty("os.name").contains("Linux")){
      val path = Paths.get ("/etc/os-release")
      if (path.toFile.exists()) {
        new String(Files.readAllBytes(path)).toLowerCase.contains("Ubuntu")
      } else false
    } else false
  }

  def isLocal(): Boolean = {
    isMac() || isUbuntu()
  }
}

Output:

19/01/04 13:55:20 INFO SparkContext: Running Spark version 2.3.2
19/01/04 13:55:21 INFO Utils: Successfully started service 'SparkUI' on port 4040.
19/01/04 13:55:21 INFO SparkUI: Bound SparkUI to 127.0.0.1, and started at http://localhost:4040
19/01/04 13:55:21 INFO Executor: Starting executor ID driver on host localhost
19/01/04 13:55:24 INFO SparkContext: Starting job: json at TestRecalculation.scala:25
19/01/04 13:55:24 INFO Executor: Running task 2.0 in stage 0.0 (TID 2)
19/01/04 13:55:24 INFO Executor: Running task 4.0 in stage 0.0 (TID 4)
19/01/04 13:55:24 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
19/01/04 13:55:24 INFO Executor: Running task 3.0 in stage 0.0 (TID 3)
19/01/04 13:55:24 INFO Executor: Running task 5.0 in stage 0.0 (TID 5)
19/01/04 13:55:24 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
19/01/04 13:55:24 INFO Executor: Running task 6.0 in stage 0.0 (TID 6)
19/01/04 13:55:24 INFO Executor: Running task 7.0 in stage 0.0 (TID 7)
processing: 2
processing: 3
processing: 1
19/01/04 13:55:25 INFO SparkContext: Starting job: show at TestRecalculation.scala:27
19/01/04 13:55:25 INFO Executor: Running task 0.0 in stage 1.0 (TID 8)
19/01/04 13:55:25 INFO Executor: Running task 0.0 in stage 2.0 (TID 9)
19/01/04 13:55:25 INFO Executor: Running task 1.0 in stage 2.0 (TID 10)
19/01/04 13:55:25 INFO Executor: Running task 2.0 in stage 2.0 (TID 11)
19/01/04 13:55:25 INFO Executor: Running task 3.0 in stage 2.0 (TID 12)
processing: 1
19/01/04 13:55:25 INFO SparkContext: Starting job: show at TestRecalculation.scala:27
19/01/04 13:55:25 INFO Executor: Running task 1.0 in stage 3.0 (TID 14)
19/01/04 13:55:25 INFO Executor: Running task 0.0 in stage 3.0 (TID 13)
19/01/04 13:55:25 INFO Executor: Running task 2.0 in stage 3.0 (TID 15)
processing: 3
processing: 2
+-------+
|numbers|
+-------+
|      1|
|      2|
|      3|
+-------+
19/01/04 13:55:25 INFO SparkUI: Stopped Spark web UI at http://localhost:4040
19/01/04 13:55:25 INFO SparkContext: Successfully stopped SparkContext
Process finished with exit code 0

Note the numbers were processed twice.

 
Now with cached dataframe (before calling df.write):

    df.cache()

Output:

19/01/04 14:22:37 INFO SparkContext: Running Spark version 2.3.2
19/01/04 14:22:41 INFO SparkContext: Starting job: json at TestRecalculation.scala:37
19/01/04 14:22:41 INFO Executor: Running task 6.0 in stage 0.0 (TID 6)
19/01/04 14:22:41 INFO Executor: Running task 1.0 in stage 0.0 (TID 1)
19/01/04 14:22:41 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
19/01/04 14:22:41 INFO Executor: Running task 5.0 in stage 0.0 (TID 5)
19/01/04 14:22:41 INFO Executor: Running task 7.0 in stage 0.0 (TID 7)
19/01/04 14:22:41 INFO Executor: Running task 3.0 in stage 0.0 (TID 3)
19/01/04 14:22:41 INFO Executor: Running task 4.0 in stage 0.0 (TID 4)
19/01/04 14:22:41 INFO Executor: Running task 2.0 in stage 0.0 (TID 2)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_0 stored as values in memory (estimated size 16.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_3 stored as values in memory (estimated size 16.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_0 in memory on localhost:63790 (size: 16.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_6 stored as values in memory (estimated size 16.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_1 stored as values in memory (estimated size 16.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_3 in memory on localhost:63790 (size: 16.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_6 in memory on localhost:63790 (size: 16.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_4 stored as values in memory (estimated size 16.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_1 in memory on localhost:63790 (size: 16.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_4 in memory on localhost:63790 (size: 16.0 B, free: 2004.5 MB)
processing: 3
processing: 1
processing: 2
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_7 stored as values in memory (estimated size 232.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO MemoryStore: Block rdd_4_5 stored as values in memory (estimated size 232.0 B, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_7 in memory on localhost:63790 (size: 232.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added rdd_4_5 in memory on localhost:63790 (size: 232.0 B, free: 2004.5 MB)
19/01/04 14:22:42 INFO SparkContext: Starting job: show at TestRecalculation.scala:39
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 18.0 KB, free 2004.4 MB)
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 7.5 KB, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:63790 (size: 7.5 KB, free: 2004.5 MB)
19/01/04 14:22:42 INFO Executor: Running task 0.0 in stage 1.0 (TID 8)
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_0 locally
19/01/04 14:22:42 INFO SparkContext: Starting job: show at TestRecalculation.scala:39
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 18.0 KB, free 2004.4 MB)
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 7.5 KB, free 2004.4 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:63790 (size: 7.5 KB, free: 2004.5 MB)
19/01/04 14:22:42 INFO Executor: Running task 0.0 in stage 2.0 (TID 9)
19/01/04 14:22:42 INFO Executor: Running task 2.0 in stage 2.0 (TID 11)
19/01/04 14:22:42 INFO Executor: Running task 1.0 in stage 2.0 (TID 10)
19/01/04 14:22:42 INFO Executor: Running task 3.0 in stage 2.0 (TID 12)
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_3 locally
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_4 locally
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_2 locally
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_1 locally
19/01/04 14:22:42 INFO SparkContext: Starting job: show at TestRecalculation.scala:39
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 18.0 KB, free 2004.3 MB)
19/01/04 14:22:42 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 7.5 KB, free 2004.3 MB)
19/01/04 14:22:42 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:63790 (size: 7.5 KB, free: 2004.5 MB)
19/01/04 14:22:42 INFO Executor: Running task 0.0 in stage 3.0 (TID 13)
19/01/04 14:22:42 INFO Executor: Running task 1.0 in stage 3.0 (TID 14)
19/01/04 14:22:42 INFO Executor: Running task 2.0 in stage 3.0 (TID 15)
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_5 locally
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_7 locally
19/01/04 14:22:42 INFO BlockManager: Found block rdd_4_6 locally
+-------+
|numbers|
+-------+
|      1|
|      2|
|      3|
+-------+
19/01/04 14:22:42 INFO SparkUI: Stopped Spark web UI at http://localhost:4040
19/01/04 14:22:42 INFO SparkContext: Successfully stopped SparkContext
19/01/04 14:22:42 INFO ShutdownHookManager: Shutdown hook called
Process finished with exit code 0

Note: the numbers were processed only once.

Scala for Java developers


class MyClass {
  var myname: String = _ // unitialize to default value // var so that cannot be changed
  var myseq: Seq[Any] = Nil // empty seq
  def mytrim: String = myname.trim
  
}
  
case class Person(age: Int, name: String)
  
object PersonImplicits {
  implicit class PersonHelper(person: Person) {
    def describe: String = s"Person[${person.age},${person.name}]"
  }
}
  
  
object MyClass {
  def main(args: Array[String]): Unit = {
 
    println(sum(1, 10, i => i)) // 55 pass directly function body
 
    println(sum(1, 10, square)) // 385 pass method calculating a square
 
    def square(i: Int): Int = i * i // declare method after referencing it
  
    import java.util
  
    val javaList: util.List[String] = new util.ArrayList[String] // import java jdk List and ArrayList
    javaList.add("a")
  
    import scala.collection.mutable
    import scala.collection.JavaConverters._
    val scalaSeq: mutable.Seq[String] = javaList.asScala // convert java javaList to scala mutable seq
  
    // alternative ways to println elements
    scalaSeq.foreach(s => println(s))
    scalaSeq.foreach(println(_))
    scalaSeq.foreach(println _)
    scalaSeq.foreach(println)
  
    val list: List[String] = scalaSeq.toList // convert from seq to list
    scalaSeq.foreach(println)
  
    val seq: Seq[String] = Seq("a", "b", "c") // scala Seq is a trait so equivalent to Java List interface
    seq.foreach(println)
  
    val immutableList = List(1, 2 ,3) // scala list is immutable and is an abstract class (that is extended by Nil == empty list)
    immutableList.map(10 * _).foreach(println)
  
    val ints: mutable.ListBuffer[Int] = mutable.ListBuffer(11, 22, 33)
    ints += 44 // mutable ListBuffer allows adding elements via +=, Seq does not
  
    val immutableMap: Map[String, Int] = Map("a" -> 1, "b" -> 2)
    println("map:" + immutableMap("a")) // get value by key = "a"
  
    val newImmutableMap: Map[String, Int] = immutableMap + ("c" -> 3) // create a new map based on existing and new entry
    newImmutableMap.foreach(e => println(e._1 + ":" + e._2))
  
    val mutableMap = mutable.Map("aa" -> 10)
    mutableMap += "bb" -> 20
    mutableMap.foreach{ case(k, v) => println(k + ":" + v) }
  
    printOptional(Some("a")) // value=a
    printOptional(None)      // value=empty
  
    println(divide(12,4,3))  // result 1
    println(divide(b=4,a=12,c=3)) // same result 1: alternatively with named params and different order
  
    println(divide(12,4)) // result 3: since third param has default value then no need to specify it unless overwrite
  
    val clazz = new MyClass
    clazz.myname = " aaa bbb "
    println(s"'${clazz.mytrim}'")
    clazz.myseq = Seq(12)
  
    matchcase(0)
    matchcase(1)
  
    import PersonImplicits._
    println(Person(18, "me").describe) // implicitly add describe method
  }
  
  def printOptional (optionalValue: Option[String]): Unit = {
    val value = if (optionalValue.isDefined) optionalValue.get else "empty"
    println("value=" + value)
  }
  
  def divide (a: Int, b: Int, c: Int = 1): Int = a / b / c    // value of last statement is the method return value
  
  def matchcase (n: Int): Unit = {
    n match {
      case -1 | 0 => println("non positive")
      case other => println("positive " + other)
    }
  }
 
  def sum(lb: Int, ub: Int, fun: Int => Int) = { // define a method (before referencing it) accepting function as 3rd arg
    var sum = 0 // declare mutable variable (val is immutable)
    for (el <- lb to ub) { // iterate by 1 from lb to ub
      sum += fun(el)
    }
    sum
  }
}