一颗蔬菜

我虽是只猫却也常常思考

实践

  • API编程
import org.apache.spark.sql.{DataFrame, SparkSession}

object DataFrameAPIApp {
  def main(args: Array[String]): Unit = {
    val spark : SparkSession = SparkSession.builder().master("local").getOrCreate()
    val df : DataFrame = spark.read.json("file:///C:\\Users\\gosur\\IdeaProjects\\wordCount\\data\\people.json")

    // Displays the content of the DataFrame to stdout
    df.show()

    // +----+-------+
    // | age|   name|
    // +----+-------+
    // |null|Michael|
    // |  30|   Andy|
    // |  19| Justin|
    // +----+-------+

    // Print the schema in a tree format
    df.printSchema()
    // root
    // |-- age: long (nullable = true)
    // |-- name: string (nullable = true)

    // Select only the "name" column
    df.select("name").show()

    // This import is needed to use the $-notation
    import spark.implicits._
    df.select($"name").show()
    // +-------+
    // |   name|
    // +-------+
    // |Michael|
    // |   Andy|
    // | Justin|
    // +-------+

    // Select everybody, but increment the age by 20
    df.select($"name",$"age" + 20).show()
    //   +-------+----------+
    //   |   name|(age + 20)|
    //   +-------+----------+
    //   |Michael|      null|
    //   |   Andy|        50|
    //   | Justin|        39|
    //   +-------+----------+

    // Select people older than 21
    df.filter($"age" > 20).show()
    df.filter("age > 20").show()
    //    +---+----+
    //    |age|name|
    //    +---+----+
    //    | 30|Andy|
    //    +---+----+

    // Count people by age
    df.groupBy("age").count().show()
    //    +----+-----+
    //    | age|count|
    //    +----+-----+
    //    |  19|    1|
    //    |null|    1|
    //    |  30|    1|
    //    +----+-----+

    spark.stop()
  }

}
  • 使用SQL的方式操作
import org.apache.spark.sql.{DataFrame, SparkSession}

object DataFrameAPIApp {
  def main(args: Array[String]): Unit = {
    var spark : SparkSession = SparkSession.builder().master("local").getOrCreate()
    var df : DataFrame = spark.read.json("file:///C:\\Users\\gosur\\IdeaProjects\\wordCount\\data\\people.json")

    // Register the DataFrame as a SQL temporary view
    df.createOrReplaceTempView("people")
    val sqlDF = spark.sql("SELECT * FROM people")
    sqlDF.show()
    // +----+-------+
    // | age|   name|
    // +----+-------+
    // |null|Michael|
    // |  30|   Andy|
    // |  19| Justin|
    // +----+-------+
    spark.stop()
  }
}

版权声明:本文为原创文章,版权归 一颗蔬菜 所有,转载请联系博主获得授权!
本文地址:https://www.suwenjin.com/index.php/archives/255/

发表评论

正在加载 Emoji