spark自定义hive的UDF/UDAF函数

Post author:xfxia
Post published:2023年8月31日
Post category:其他

自定义UDF函数

package com.ws.spark
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}

object MyUDF{
  def main(args: Array[String]): Unit = {
    val sparkConf: SparkConf = new SparkConf().setAppName("RangeTest").setMaster("local")
    val sparkContext: SparkContext = new SparkContext(sparkConf)

    val hiveContext: HiveContext = new HiveContext(sparkContext)

    //注册一个UDF函数
    hiveContext.udf.register("myAdd",(x : Int)=> x * 100)

    val dataFrame: DataFrame = hiveContext.sql("select myAdd(age) from ws.t_hive")
    //显示4个结果
    dataFrame.show(4)

    sparkContext.stop()
  }
}

自定义UDAF函数

package com.ws.spark

import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sq

原文链接：https://blog.csdn.net/bb23417274/article/details/87976136

你可能也喜欢