直接上代码:
package com.suning.scdc.hspark.goods.test
import scala.collection.Seq
import scala.collection.mutable.LinkedList
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.elasticsearch.spark.sparkContextFunctions
import org.slf4j.LoggerFactory
import org.elasticsearch.spark.rdd.EsSpark
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SaveMode
object OrderES {
val logger = LoggerFactory.getLogger(OrderES.getClass)
var sc: SparkContext = null
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.set("es.nodes", "10.37.154.82,10.37.154.83,10.37.154.84")
.set("cluster.name", "elasticsearch")
.set("es.port", "9200")
sc = new SparkContext(conf)
dfEs(sc)
//esRdd(sc)
}
def esRdd(sc: SparkContext): Unit = {
//查询合作方为abc的数据
val query = """{"query":{"match":{"memberId": "7013894650"}}}"""
val esRdd = sc.esRDD(s"snprime_login/login", query)
val rdd = esRdd.map(line => {
val key = line._1
val value = line._2
for (tmp <- value) {
val key1 = tmp._1
val value1 = tmp._2
}
val mp = scala.collection.immutable.Map(
"orderNo" -> value("memberId").toString(),
"loginTm" -> value("loginTime").toString(),
"year" -> "1994")
(key, mp)
})
print("lst=")
rdd.foreach(println)
EsSpark.saveToEsWithMeta(rdd, "bmps/order")
}
def dfEs(sc: SparkContext): Unit = {
val spark = SparkSession
.builder()
.appName("sql test")
.master("local")
.getOrCreate()
import spark.implicits._
import spark.sql
// //创建dataframe示例
// val df = spark.read.json("C:\\Users\\Administrator\\Desktop\\people.json")
// df.createOrReplaceTempView("people")
// val sqlDF = spark.sql("select * from people")
// sqlDF.show()
val query = """{"query":{"match":{"memberId": "7013894650"}}}"""
val readDf = spark.read.format("org.elasticsearch.spark.sql").load(s"snprime_login/login")
.select("memberId", "loginTime")
readDf.show
// set primary key for es
val esmap = Map("es.mapping.id" -> "memberId")
readDf.write.format("org.elasticsearch.spark.sql").options(esmap).save("bmps/order")
//readDf.write.mode(SaveMode.Append).format("org.elasticsearch.spark.sql").save("bmps/order")
//val esDf = sqlContext.esDF(s"snprime_login/login", query)
//将dataFrame/rdd写入es
//esRdd.saveToEs("cmall_order/order")
//resultDf.saveToEs("index/type")
// val schema = StructType(
// Seq(
// StructField("memberId",StringType,true)
// ,StructField("loginTime",StringType,true)
// )
// )
// val schema2 = StructType(List(
//
// StructField("integer_column", IntegerType, nullable = false),
//
// StructField("string_column", StringType, nullable = true),
//
// StructField("date_column", DateType, nullable = true)
//
//))
//
}
}
Spark用dataframe操作ES
点赞
收藏