spark|spark UDAF根据某列去重求合 distinct sum

spark UDAF根据某列去重求合 distinct sum
例子:

package spark.groupzbimport org.apache.log4j.{Level, Logger} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SparkSession}/** * create by roy on 2020-03020 */ object OrderDistinctSumTest { Logger.getRootLogger.setLevel(Level.WARN)def main(args: Array[String]): Unit = { val data = https://www.it610.com/article/Seq( Row("barnd1", "a", 300.0), Row("barnd1", "a", 300.0), Row("barnd1", "b", 200.0), Row("barnd2", "c", 200.0), Row("barnd2", "c", 200.0), Row("barnd3", "c", 200.0), Row("barnd3", "c", 200.0) //Row("a", "400", Array(Row("a", "name_a"))) ) val schme = new StructType() .add("barnd", StringType) .add("orderid", StringType) .add("price", DoubleType) schme.printTreeString() val spark = SparkSession.builder().master("local[*]").getOrCreate() val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schme) df.show() import spark.implicits._ df.createOrReplaceTempView("tmp_tab") //spark.sql( //""" //|select barnd,sum(price) sum_price //|from tmp_tab group by barnd //""".stripMargin).show() //println("orderDistinctSumUDAF") val orderDistinctSumUDAF = new OrderDistinctSumUDAF spark.sqlContext.udf.register("orderDistinctSumUDAF", orderDistinctSumUDAF) //订单去重,求次数,订单量,金额 spark.sql( """ |select barnd,count(1) ct,count(distinct orderid) order_num,orderDistinctSumUDAF(orderid,price) sum_price |from tmp_tabgroup by barnd """.stripMargin).show() //+------+---+---------+---------+ //| barnd| ct|order_num|sum_price| //+------+---+---------+---------+ //|barnd3|2|1|200.0| //|barnd2|2|1|200.0| //|barnd1|3|2|500.0| //+------+---+---------+---------+ //这样需再次连接求出 spark.sql( """ |select barnd,sum(price) pricesfrom ( |select barnd,orderid,price,count(1) |from tmp_tab group by barnd,orderid,price |) t1 group by barnd """.stripMargin).show() //+------+------+ //| barnd|prices| //+------+------+ //|barnd3| 200.0| //|barnd2| 200.0| //|barnd1| 500.0| //+------+------+//println("partition by barnd,orderid ") //spark.sql( //""" //|select barnd,first_value(orderid) over(PARTITION BY orderid) lowid //|from tmp_tab //""".stripMargin).show() }}

UDAF:
package spark.groupzbimport org.apache.spark.sql.Row import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction} import org.apache.spark.sql.types._/** * create by roy 2020-03-18 * 订单id去重,并求合 * */ class OrderDistinctSumUDAF extends UserDefinedAggregateFunction {override def inputSchema: StructType = { new StructType() //.add("barnd", StringType) .add("orderid", StringType) .add("sum_v", DoubleType)}override def update(buffer: MutableAggregationBuffer, input: Row): Unit = { //println("update==>>>", buffer, input) //餐品 有多少个订单 val orderid = input.getAs[String](0) val sum_v = input.getAs[Double](1) //取出新加入的行,并加入缓存区 //buffer(0) = buffer.getSeq[String](0) ++ orderid buffer(0) =Seq[String](orderid) buffer(1) = buffer.getAs[Double](1) + sum_v}override def bufferSchema: StructType = { //new StructType().add("items", ArrayType(new StructType().add("orderid", StringType).add("sumv", DoubleType), true), nullable = true) new StructType().add("orderid", ArrayType(StringType)).add("sumv", DoubleType) }//合并数据 override def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = { //println("merge==>", buffer1, "|", buffer2) val b1Seq = buffer1.getSeq[String](0) val orderid=buffer2.getSeq[String](0)(0)//第1列,第1个元素 if (!b1Seq.contains(orderid)) { buffer1(0) = b1Seq ++ orderid buffer1(1) = buffer1.getAs[Double](1) + buffer2.getAs[Double](1) } }override def initialize(buffer: MutableAggregationBuffer): Unit = { buffer(0) = Seq[String]() buffer(1) = 0.0}override def deterministic: Boolean = trueoverride def evaluate(buffer: Row): Any = { //buffer.getSeq[String](0).length buffer.getAs[Double](1) }override def dataType: DataType = DoubleTypecase class ObjetValus(orderId: String, sumV: Double)}

输出:
spark|spark UDAF根据某列去重求合 distinct sum
文章图片

"C:\Program Files\Java\jdk1.8.0_181\bin\java.exe" "-javaagent:D:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.2\lib\idea_rt.jar=64860:D:\Program Files\JetBrains\IntelliJ IDEA Community Edition 2018.2\bin" -Dfile.encoding=UTF-8 -classpath "C:\Program Files\Java\jdk1.8.0_181\jre\lib\charsets.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\deploy.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\access-bridge-64.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\cldrdata.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\dnsns.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\jaccess.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\jfxrt.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\localedata.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\nashorn.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\sunec.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\sunjce_provider.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\sunmscapi.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\sunpkcs11.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\ext\zipfs.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\javaws.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\jce.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\jfr.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\jfxswt.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\jsse.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\management-agent.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\plugin.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\resources.jar; C:\Program Files\Java\jdk1.8.0_181\jre\lib\rt.jar; E:\pro\dev_pro\paas-data-customization\data-custom-jde\target\test-classes; E:\pro\dev_pro\paas-data-customization\data-custom-jde\target\classes; E:\pro\dev_pro\paas-data-customization\data-custom-core\target\classes; D:\tools\mavenRepository\org\apache\spark\spark-core_2.11\2.4.4\spark-core_2.11-2.4.4.jar; D:\tools\mavenRepository\com\thoughtworks\paranamer\paranamer\2.8\paranamer-2.8.jar; D:\tools\mavenRepository\org\apache\avro\avro-mapred\1.8.2\avro-mapred-1.8.2-hadoop2.jar; D:\tools\mavenRepository\org\apache\avro\avro-ipc\1.8.2\avro-ipc-1.8.2.jar; D:\tools\mavenRepository\com\twitter\chill_2.11\0.9.3\chill_2.11-0.9.3.jar; D:\tools\mavenRepository\com\esotericsoftware\kryo-shaded\4.0.2\kryo-shaded-4.0.2.jar; D:\tools\mavenRepository\com\esotericsoftware\minlog\1.3.0\minlog-1.3.0.jar; D:\tools\mavenRepository\org\objenesis\objenesis\2.5.1\objenesis-2.5.1.jar; D:\tools\mavenRepository\com\twitter\chill-java\0.9.3\chill-java-0.9.3.jar; D:\tools\mavenRepository\org\apache\xbean\xbean-asm6-shaded\4.8\xbean-asm6-shaded-4.8.jar; D:\tools\mavenRepository\org\apache\spark\spark-launcher_2.11\2.4.4\spark-launcher_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\spark\spark-kvstore_2.11\2.4.4\spark-kvstore_2.11-2.4.4.jar; D:\tools\mavenRepository\com\fasterxml\jackson\core\jackson-annotations\2.6.7\jackson-annotations-2.6.7.jar; D:\tools\mavenRepository\org\apache\spark\spark-network-common_2.11\2.4.4\spark-network-common_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\spark\spark-network-shuffle_2.11\2.4.4\spark-network-shuffle_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\spark\spark-unsafe_2.11\2.4.4\spark-unsafe_2.11-2.4.4.jar; D:\tools\mavenRepository\javax\activation\activation\1.1.1\activation-1.1.1.jar; D:\tools\mavenRepository\org\apache\curator\curator-recipes\2.6.0\curator-recipes-2.6.0.jar; D:\tools\mavenRepository\javax\servlet\javax.servlet-api\3.1.0\javax.servlet-api-3.1.0.jar; D:\tools\mavenRepository\org\apache\commons\commons-lang3\3.5\commons-lang3-3.5.jar; D:\tools\mavenRepository\org\apache\commons\commons-math3\3.4.1\commons-math3-3.4.1.jar; D:\tools\mavenRepository\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar; D:\tools\mavenRepository\org\slf4j\jul-to-slf4j\1.7.16\jul-to-slf4j-1.7.16.jar; D:\tools\mavenRepository\org\slf4j\jcl-over-slf4j\1.7.16\jcl-over-slf4j-1.7.16.jar; D:\tools\mavenRepository\com\ning\compress-lzf\1.0.3\compress-lzf-1.0.3.jar; D:\tools\mavenRepository\org\xerial\snappy\snappy-java\1.1.7.3\snappy-java-1.1.7.3.jar; D:\tools\mavenRepository\org\lz4\lz4-java\1.4.0\lz4-java-1.4.0.jar; D:\tools\mavenRepository\com\github\luben\zstd-jni\1.3.2-2\zstd-jni-1.3.2-2.jar; D:\tools\mavenRepository\org\roaringbitmap\RoaringBitmap\0.7.45\RoaringBitmap-0.7.45.jar; D:\tools\mavenRepository\org\roaringbitmap\shims\0.7.45\shims-0.7.45.jar; D:\tools\mavenRepository\commons-net\commons-net\3.1\commons-net-3.1.jar; D:\tools\mavenRepository\org\json4s\json4s-jackson_2.11\3.5.3\json4s-jackson_2.11-3.5.3.jar; D:\tools\mavenRepository\org\json4s\json4s-core_2.11\3.5.3\json4s-core_2.11-3.5.3.jar; D:\tools\mavenRepository\org\json4s\json4s-ast_2.11\3.5.3\json4s-ast_2.11-3.5.3.jar; D:\tools\mavenRepository\org\json4s\json4s-scalap_2.11\3.5.3\json4s-scalap_2.11-3.5.3.jar; D:\tools\mavenRepository\org\glassfish\jersey\core\jersey-client\2.22.2\jersey-client-2.22.2.jar; D:\tools\mavenRepository\javax\ws\rs\javax.ws.rs-api\2.0.1\javax.ws.rs-api-2.0.1.jar; D:\tools\mavenRepository\org\glassfish\hk2\hk2-api\2.4.0-b34\hk2-api-2.4.0-b34.jar; D:\tools\mavenRepository\org\glassfish\hk2\hk2-utils\2.4.0-b34\hk2-utils-2.4.0-b34.jar; D:\tools\mavenRepository\org\glassfish\hk2\external\aopalliance-repackaged\2.4.0-b34\aopalliance-repackaged-2.4.0-b34.jar; D:\tools\mavenRepository\org\glassfish\hk2\external\javax.inject\2.4.0-b34\javax.inject-2.4.0-b34.jar; D:\tools\mavenRepository\org\glassfish\hk2\hk2-locator\2.4.0-b34\hk2-locator-2.4.0-b34.jar; D:\tools\mavenRepository\org\javassist\javassist\3.18.1-GA\javassist-3.18.1-GA.jar; D:\tools\mavenRepository\org\glassfish\jersey\core\jersey-common\2.22.2\jersey-common-2.22.2.jar; D:\tools\mavenRepository\javax\annotation\javax.annotation-api\1.2\javax.annotation-api-1.2.jar; D:\tools\mavenRepository\org\glassfish\jersey\bundles\repackaged\jersey-guava\2.22.2\jersey-guava-2.22.2.jar; D:\tools\mavenRepository\org\glassfish\hk2\osgi-resource-locator\1.0.1\osgi-resource-locator-1.0.1.jar; D:\tools\mavenRepository\org\glassfish\jersey\core\jersey-server\2.22.2\jersey-server-2.22.2.jar; D:\tools\mavenRepository\org\glassfish\jersey\media\jersey-media-jaxb\2.22.2\jersey-media-jaxb-2.22.2.jar; D:\tools\mavenRepository\javax\validation\validation-api\1.1.0.Final\validation-api-1.1.0.Final.jar; D:\tools\mavenRepository\org\glassfish\jersey\containers\jersey-container-servlet\2.22.2\jersey-container-servlet-2.22.2.jar; D:\tools\mavenRepository\org\glassfish\jersey\containers\jersey-container-servlet-core\2.22.2\jersey-container-servlet-core-2.22.2.jar; D:\tools\mavenRepository\io\netty\netty\3.9.9.Final\netty-3.9.9.Final.jar; D:\tools\mavenRepository\com\clearspring\analytics\stream\2.7.0\stream-2.7.0.jar; D:\tools\mavenRepository\io\dropwizard\metrics\metrics-core\3.1.5\metrics-core-3.1.5.jar; D:\tools\mavenRepository\io\dropwizard\metrics\metrics-jvm\3.1.5\metrics-jvm-3.1.5.jar; D:\tools\mavenRepository\io\dropwizard\metrics\metrics-json\3.1.5\metrics-json-3.1.5.jar; D:\tools\mavenRepository\io\dropwizard\metrics\metrics-graphite\3.1.5\metrics-graphite-3.1.5.jar; D:\tools\mavenRepository\com\fasterxml\jackson\core\jackson-databind\2.6.7.1\jackson-databind-2.6.7.1.jar; D:\tools\mavenRepository\com\fasterxml\jackson\module\jackson-module-scala_2.11\2.6.7.1\jackson-module-scala_2.11-2.6.7.1.jar; D:\tools\mavenRepository\org\scala-lang\scala-reflect\2.11.8\scala-reflect-2.11.8.jar; D:\tools\mavenRepository\com\fasterxml\jackson\module\jackson-module-paranamer\2.7.9\jackson-module-paranamer-2.7.9.jar; D:\tools\mavenRepository\org\apache\ivy\ivy\2.4.0\ivy-2.4.0.jar; D:\tools\mavenRepository\oro\oro\2.0.8\oro-2.0.8.jar; D:\tools\mavenRepository\net\razorvine\pyrolite\4.13\pyrolite-4.13.jar; D:\tools\mavenRepository\net\sf\py4j\py4j\0.10.7\py4j-0.10.7.jar; D:\tools\mavenRepository\org\apache\spark\spark-tags_2.11\2.4.4\spark-tags_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\commons\commons-crypto\1.0.0\commons-crypto-1.0.0.jar; D:\tools\mavenRepository\org\spark-project\spark\unused\1.0.0\unused-1.0.0.jar; D:\tools\mavenRepository\org\apache\spark\spark-sql_2.11\2.4.4\spark-sql_2.11-2.4.4.jar; D:\tools\mavenRepository\com\univocity\univocity-parsers\2.7.3\univocity-parsers-2.7.3.jar; D:\tools\mavenRepository\org\apache\spark\spark-sketch_2.11\2.4.4\spark-sketch_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\spark\spark-catalyst_2.11\2.4.4\spark-catalyst_2.11-2.4.4.jar; D:\tools\mavenRepository\org\scala-lang\modules\scala-parser-combinators_2.11\1.1.0\scala-parser-combinators_2.11-1.1.0.jar; D:\tools\mavenRepository\org\codehaus\janino\janino\3.0.9\janino-3.0.9.jar; D:\tools\mavenRepository\org\codehaus\janino\commons-compiler\3.0.9\commons-compiler-3.0.9.jar; D:\tools\mavenRepository\org\apache\orc\orc-core\1.5.5\orc-core-1.5.5-nohive.jar; D:\tools\mavenRepository\org\apache\orc\orc-shims\1.5.5\orc-shims-1.5.5.jar; D:\tools\mavenRepository\io\airlift\aircompressor\0.10\aircompressor-0.10.jar; D:\tools\mavenRepository\org\apache\orc\orc-mapreduce\1.5.5\orc-mapreduce-1.5.5-nohive.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-column\1.10.1\parquet-column-1.10.1.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-common\1.10.1\parquet-common-1.10.1.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-encoding\1.10.1\parquet-encoding-1.10.1.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-hadoop\1.10.1\parquet-hadoop-1.10.1.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-format\2.4.0\parquet-format-2.4.0.jar; D:\tools\mavenRepository\org\apache\parquet\parquet-jackson\1.10.1\parquet-jackson-1.10.1.jar; D:\tools\mavenRepository\org\apache\arrow\arrow-vector\0.10.0\arrow-vector-0.10.0.jar; D:\tools\mavenRepository\org\apache\arrow\arrow-format\0.10.0\arrow-format-0.10.0.jar; D:\tools\mavenRepository\org\apache\arrow\arrow-memory\0.10.0\arrow-memory-0.10.0.jar; D:\tools\mavenRepository\com\carrotsearch\hppc\0.7.2\hppc-0.7.2.jar; D:\tools\mavenRepository\com\vlkan\flatbuffers\1.2.0-3f79e055\flatbuffers-1.2.0-3f79e055.jar; D:\tools\mavenRepository\org\apache\spark\spark-streaming-kafka-0-10_2.11\2.4.4\spark-streaming-kafka-0-10_2.11-2.4.4.jar; D:\tools\mavenRepository\org\apache\kafka\kafka-clients\2.0.0\kafka-clients-2.0.0.jar; D:\tools\mavenRepository\org\mongodb\spark\mongo-spark-connector_2.11\2.3.3\mongo-spark-connector_2.11-2.3.3.jar; D:\tools\mavenRepository\org\mongodb\mongo-java-driver\3.11.0-rc0\mongo-java-driver-3.11.0-rc0.jar; D:\tools\mavenRepository\org\apache\spark\spark-hive_2.11\2.4.4\spark-hive_2.11-2.4.4.jar; D:\tools\mavenRepository\com\twitter\parquet-hadoop-bundle\1.6.0\parquet-hadoop-bundle-1.6.0.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-metastore\1.2.1.spark2\hive-metastore-1.2.1.spark2.jar; D:\tools\mavenRepository\com\jolbox\bonecp\0.8.0.RELEASE\bonecp-0.8.0.RELEASE.jar; D:\tools\mavenRepository\org\datanucleus\datanucleus-api-jdo\3.2.6\datanucleus-api-jdo-3.2.6.jar; D:\tools\mavenRepository\org\datanucleus\datanucleus-rdbms\3.2.9\datanucleus-rdbms-3.2.9.jar; D:\tools\mavenRepository\commons-pool\commons-pool\1.5.4\commons-pool-1.5.4.jar; D:\tools\mavenRepository\commons-dbcp\commons-dbcp\1.4\commons-dbcp-1.4.jar; D:\tools\mavenRepository\javax\jdo\jdo-api\3.0.1\jdo-api-3.0.1.jar; D:\tools\mavenRepository\javax\transaction\jta\1.1\jta-1.1.jar; D:\tools\mavenRepository\org\apache\calcite\calcite-avatica\1.2.0-incubating\calcite-avatica-1.2.0-incubating.jar; D:\tools\mavenRepository\org\apache\calcite\calcite-core\1.2.0-incubating\calcite-core-1.2.0-incubating.jar; D:\tools\mavenRepository\org\apache\calcite\calcite-linq4j\1.2.0-incubating\calcite-linq4j-1.2.0-incubating.jar; D:\tools\mavenRepository\net\hydromatic\eigenbase-properties\1.1.5\eigenbase-properties-1.1.5.jar; D:\tools\mavenRepository\joda-time\joda-time\2.9.3\joda-time-2.9.3.jar; D:\tools\mavenRepository\org\jodd\jodd-core\3.5.2\jodd-core-3.5.2.jar; D:\tools\mavenRepository\org\datanucleus\datanucleus-core\3.2.10\datanucleus-core-3.2.10.jar; D:\tools\mavenRepository\org\apache\thrift\libthrift\0.9.3\libthrift-0.9.3.jar; D:\tools\mavenRepository\org\apache\thrift\libfb303\0.9.3\libfb303-0.9.3.jar; D:\tools\mavenRepository\org\apache\derby\derby\10.12.1.1\derby-10.12.1.1.jar; D:\tools\mavenRepository\mysql\mysql-connector-java\5.1.47\mysql-connector-java-5.1.47.jar; D:\tools\mavenRepository\org\slf4j\slf4j-api\1.7.25\slf4j-api-1.7.25.jar; D:\tools\mavenRepository\io\circe\circe-parser_2.11\0.12.0-M3\circe-parser_2.11-0.12.0-M3.jar; D:\tools\mavenRepository\io\circe\circe-jawn_2.11\0.12.0-M3\circe-jawn_2.11-0.12.0-M3.jar; D:\tools\mavenRepository\org\typelevel\jawn-parser_2.11\0.14.2\jawn-parser_2.11-0.14.2.jar; D:\tools\mavenRepository\io\circe\circe-core_2.11\0.12.0-M3\circe-core_2.11-0.12.0-M3.jar; D:\tools\mavenRepository\io\circe\circe-numbers_2.11\0.12.0-M3\circe-numbers_2.11-0.12.0-M3.jar; D:\tools\mavenRepository\org\typelevel\cats-core_2.11\2.0.0-M4\cats-core_2.11-2.0.0-M4.jar; D:\tools\mavenRepository\org\typelevel\cats-macros_2.11\2.0.0-M4\cats-macros_2.11-2.0.0-M4.jar; D:\tools\mavenRepository\org\typelevel\cats-kernel_2.11\2.0.0-M4\cats-kernel_2.11-2.0.0-M4.jar; D:\tools\mavenRepository\org\typelevel\machinist_2.11\0.6.8\machinist_2.11-0.6.8.jar; D:\tools\mavenRepository\com\crealytics\spark-excel_2.11\0.12.2\spark-excel_2.11-0.12.2.jar; D:\tools\mavenRepository\org\apache\poi\poi\4.1.0\poi-4.1.0.jar; D:\tools\mavenRepository\org\apache\commons\commons-collections4\4.3\commons-collections4-4.3.jar; D:\tools\mavenRepository\org\apache\poi\poi-ooxml\4.1.0\poi-ooxml-4.1.0.jar; D:\tools\mavenRepository\org\apache\poi\poi-ooxml-schemas\4.1.0\poi-ooxml-schemas-4.1.0.jar; D:\tools\mavenRepository\org\apache\xmlbeans\xmlbeans\3.1.0\xmlbeans-3.1.0.jar; D:\tools\mavenRepository\com\github\virtuald\curvesapi\1.06\curvesapi-1.06.jar; D:\tools\mavenRepository\com\norbitltd\spoiwo_2.11\1.6.0\spoiwo_2.11-1.6.0.jar; D:\tools\mavenRepository\org\scala-lang\modules\scala-xml_2.11\1.2.0\scala-xml_2.11-1.2.0.jar; D:\tools\mavenRepository\org\joda\joda-convert\2.0.1\joda-convert-2.0.1.jar; D:\tools\mavenRepository\org\apache\commons\commons-compress\1.18\commons-compress-1.18.jar; D:\tools\mavenRepository\com\fasterxml\jackson\core\jackson-core\2.8.8\jackson-core-2.8.8.jar; D:\tools\mavenRepository\com\monitorjbl\xlsx-streamer\2.1.0\xlsx-streamer-2.1.0.jar; D:\tools\mavenRepository\com\rackspace\apache\xerces2-xsd11\2.11.1\xerces2-xsd11-2.11.1.jar; D:\tools\mavenRepository\com\rackspace\eclipse\webtools\sourceediting\org.eclipse.wst.xml.xpath2.processor\2.1.100\org.eclipse.wst.xml.xpath2.processor-2.1.100.jar; D:\tools\mavenRepository\edu\princeton\cup\java-cup\10k\java-cup-10k.jar; D:\tools\mavenRepository\xml-resolver\xml-resolver\1.2\xml-resolver-1.2.jar; D:\tools\mavenRepository\xml-apis\xml-apis\1.4.01\xml-apis-1.4.01.jar; D:\tools\mavenRepository\junit\junit\4.12\junit-4.12.jar; D:\tools\mavenRepository\org\hamcrest\hamcrest-core\1.3\hamcrest-core-1.3.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-client\1.4.10\hbase-client-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-annotations\1.4.10\hbase-annotations-1.4.10.jar; D:\tools\mavenRepository\commons-codec\commons-codec\1.9\commons-codec-1.9.jar; D:\tools\mavenRepository\commons-io\commons-io\2.4\commons-io-2.4.jar; D:\tools\mavenRepository\commons-lang\commons-lang\2.6\commons-lang-2.6.jar; D:\tools\mavenRepository\commons-logging\commons-logging\1.2\commons-logging-1.2.jar; D:\tools\mavenRepository\com\google\guava\guava\12.0.1\guava-12.0.1.jar; D:\tools\mavenRepository\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar; D:\tools\mavenRepository\org\apache\zookeeper\zookeeper\3.4.10\zookeeper-3.4.10.jar; D:\tools\mavenRepository\org\apache\htrace\htrace-core\3.1.0-incubating\htrace-core-3.1.0-incubating.jar; D:\tools\mavenRepository\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar; D:\tools\mavenRepository\org\jruby\jcodings\jcodings\1.0.8\jcodings-1.0.8.jar; D:\tools\mavenRepository\org\jruby\joni\joni\2.1.2\joni-2.1.2.jar; D:\tools\mavenRepository\com\yammer\metrics\metrics-core\2.2.0\metrics-core-2.2.0.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-auth\2.7.4\hadoop-auth-2.7.4.jar; D:\tools\mavenRepository\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar; D:\tools\mavenRepository\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar; D:\tools\mavenRepository\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar; D:\tools\mavenRepository\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar; D:\tools\mavenRepository\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-common\2.7.4\hadoop-common-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-annotations\2.7.4\hadoop-annotations-2.7.4.jar; C:\Program Files\Java\jdk1.8.0_181\lib\tools.jar; D:\tools\mavenRepository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar; D:\tools\mavenRepository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar; D:\tools\mavenRepository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar; D:\tools\mavenRepository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar; D:\tools\mavenRepository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar; D:\tools\mavenRepository\com\google\code\gson\gson\2.2.4\gson-2.2.4.jar; D:\tools\mavenRepository\com\jcraft\jsch\0.1.54\jsch-0.1.54.jar; D:\tools\mavenRepository\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-mapreduce-client-core\2.7.4\hadoop-mapreduce-client-core-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-common\2.7.4\hadoop-yarn-common-2.7.4.jar; D:\tools\mavenRepository\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar; D:\tools\mavenRepository\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-common\1.4.10\hbase-common-1.4.10.jar; D:\tools\mavenRepository\commons-collections\commons-collections\3.2.2\commons-collections-3.2.2.jar; D:\tools\mavenRepository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar; D:\tools\mavenRepository\org\apache\avro\avro\1.7.7\avro-1.7.7.jar; D:\tools\mavenRepository\com\github\stephenc\findbugs\findbugs-annotations\1.3.9-1\findbugs-annotations-1.3.9-1.jar; D:\tools\mavenRepository\log4j\log4j\1.2.17\log4j-1.2.17.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-protocol\1.4.10\hbase-protocol-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-server\1.4.10\hbase-server-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-procedure\1.4.10\hbase-procedure-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-common\1.4.10\hbase-common-1.4.10-tests.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-prefix-tree\1.4.10\hbase-prefix-tree-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-metrics-api\1.4.10\hbase-metrics-api-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-metrics\1.4.10\hbase-metrics-1.4.10.jar; D:\tools\mavenRepository\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-hadoop-compat\1.4.10\hbase-hadoop-compat-1.4.10.jar; D:\tools\mavenRepository\org\apache\hbase\hbase-hadoop2-compat\1.4.10\hbase-hadoop2-compat-1.4.10.jar; D:\tools\mavenRepository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar; D:\tools\mavenRepository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar; D:\tools\mavenRepository\asm\asm\3.1\asm-3.1.jar; D:\tools\mavenRepository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar; D:\tools\mavenRepository\org\apache\commons\commons-math\2.2\commons-math-2.2.jar; D:\tools\mavenRepository\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar; D:\tools\mavenRepository\org\mortbay\jetty\jetty-sslengine\6.1.26\jetty-sslengine-6.1.26.jar; D:\tools\mavenRepository\org\mortbay\jetty\jsp-2.1\6.1.14\jsp-2.1-6.1.14.jar; D:\tools\mavenRepository\org\mortbay\jetty\jsp-api-2.1\6.1.14\jsp-api-2.1-6.1.14.jar; D:\tools\mavenRepository\org\mortbay\jetty\servlet-api-2.5\6.1.14\servlet-api-2.5-6.1.14.jar; D:\tools\mavenRepository\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar; D:\tools\mavenRepository\org\codehaus\jackson\jackson-jaxrs\1.9.13\jackson-jaxrs-1.9.13.jar; D:\tools\mavenRepository\tomcat\jasper-compiler\5.5.23\jasper-compiler-5.5.23.jar; D:\tools\mavenRepository\tomcat\jasper-runtime\5.5.23\jasper-runtime-5.5.23.jar; D:\tools\mavenRepository\commons-el\commons-el\1.0\commons-el-1.0.jar; D:\tools\mavenRepository\org\jamon\jamon-runtime\2.4.1\jamon-runtime-2.4.1.jar; D:\tools\mavenRepository\com\lmax\disruptor\3.3.0\disruptor-3.3.0.jar; D:\tools\mavenRepository\org\apache\httpcomponents\httpclient\4.5.2\httpclient-4.5.2.jar; D:\tools\mavenRepository\org\apache\httpcomponents\httpcore\4.4.4\httpcore-4.4.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-client\2.7.4\hadoop-client-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-mapreduce-client-app\2.7.4\hadoop-mapreduce-client-app-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-mapreduce-client-common\2.7.4\hadoop-mapreduce-client-common-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-client\2.7.4\hadoop-yarn-client-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-server-common\2.7.4\hadoop-yarn-server-common-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-mapreduce-client-shuffle\2.7.4\hadoop-mapreduce-client-shuffle-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-api\2.7.4\hadoop-yarn-api-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-mapreduce-client-jobclient\2.7.4\hadoop-mapreduce-client-jobclient-2.7.4.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-hdfs\2.7.4\hadoop-hdfs-2.7.4.jar; D:\tools\mavenRepository\commons-daemon\commons-daemon\1.0.13\commons-daemon-1.0.13.jar; D:\tools\mavenRepository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-hbase-handler\1.2.1.spark2\hive-hbase-handler-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-exec\1.2.1.spark2\hive-exec-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-ant\1.2.1.spark2\hive-ant-1.2.1.spark2.jar; D:\tools\mavenRepository\org\apache\velocity\velocity\1.5\velocity-1.5.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-shims\1.2.1.spark2\hive-shims-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\shims\hive-shims-common\1.2.1.spark2\hive-shims-common-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\shims\hive-shims-0.20S\1.2.1.spark2\hive-shims-0.20S-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\shims\hive-shims-0.23\1.2.1.spark2\hive-shims-0.23-1.2.1.spark2.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-server-resourcemanager\2.6.0\hadoop-yarn-server-resourcemanager-2.6.0.jar; D:\tools\mavenRepository\com\google\inject\extensions\guice-servlet\3.0\guice-servlet-3.0.jar; D:\tools\mavenRepository\com\google\inject\guice\3.0\guice-3.0.jar; D:\tools\mavenRepository\javax\inject\javax.inject\1\javax.inject-1.jar; D:\tools\mavenRepository\aopalliance\aopalliance\1.0\aopalliance-1.0.jar; D:\tools\mavenRepository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar; D:\tools\mavenRepository\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar; D:\tools\mavenRepository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar; D:\tools\mavenRepository\com\sun\jersey\contribs\jersey-guice\1.9\jersey-guice-1.9.jar; D:\tools\mavenRepository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar; D:\tools\mavenRepository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-server-applicationhistoryservice\2.6.0\hadoop-yarn-server-applicationhistoryservice-2.6.0.jar; D:\tools\mavenRepository\org\apache\hadoop\hadoop-yarn-server-web-proxy\2.6.0\hadoop-yarn-server-web-proxy-2.6.0.jar; D:\tools\mavenRepository\org\spark-project\hive\shims\hive-shims-scheduler\1.2.1.spark2\hive-shims-scheduler-1.2.1.spark2.jar; D:\tools\mavenRepository\org\spark-project\hive\spark-client\1.2.1.spark2\spark-client-1.2.1.spark2.jar; D:\tools\mavenRepository\com\esotericsoftware\kryo\kryo\2.21\kryo-2.21.jar; D:\tools\mavenRepository\com\esotericsoftware\reflectasm\reflectasm\1.07\reflectasm-1.07-shaded.jar; D:\tools\mavenRepository\org\ow2\asm\asm\4.0\asm-4.0.jar; D:\tools\mavenRepository\com\esotericsoftware\minlog\minlog\1.2\minlog-1.2.jar; D:\tools\mavenRepository\org\spark-project\hive\hive-common\1.2.1.spark2\hive-common-1.2.1.spark2.jar; D:\tools\mavenRepository\org\apache\spark\spark-core_2.10\1.3.1\spark-core_2.10-1.3.1.jar; D:\tools\mavenRepository\com\twitter\chill_2.10\0.5.0\chill_2.10-0.5.0.jar; D:\tools\mavenRepository\org\apache\spark\spark-network-common_2.10\1.3.1\spark-network-common_2.10-1.3.1.jar; D:\tools\mavenRepository\org\apache\spark\spark-network-shuffle_2.10\1.3.1\spark-network-shuffle_2.10-1.3.1.jar; D:\tools\mavenRepository\net\java\dev\jets3t\jets3t\0.7.1\jets3t-0.7.1.jar; D:\tools\mavenRepository\org\eclipse\jetty\orbit\javax.servlet\3.0.0.v201112011016\javax.servlet-3.0.0.v201112011016.jar; D:\tools\mavenRepository\net\jpountz\lz4\lz4\1.2.0\lz4-1.2.0.jar; D:\tools\mavenRepository\org\spark-project\akka\akka-remote_2.10\2.3.4-spark\akka-remote_2.10-2.3.4-spark.jar; D:\tools\mavenRepository\org\spark-project\akka\akka-actor_2.10\2.3.4-spark\akka-actor_2.10-2.3.4-spark.jar; D:\tools\mavenRepository\com\typesafe\config\1.2.1\config-1.2.1.jar; D:\tools\mavenRepository\org\spark-project\protobuf\protobuf-java\2.5.0-spark\protobuf-java-2.5.0-spark.jar; D:\tools\mavenRepository\org\uncommons\maths\uncommons-maths\1.2.2a\uncommons-maths-1.2.2a.jar; D:\tools\mavenRepository\org\spark-project\akka\akka-slf4j_2.10\2.3.4-spark\akka-slf4j_2.10-2.3.4-spark.jar; D:\tools\mavenRepository\org\json4s\json4s-jackson_2.10\3.2.10\json4s-jackson_2.10-3.2.10.jar; D:\tools\mavenRepository\org\json4s\json4s-core_2.10\3.2.10\json4s-core_2.10-3.2.10.jar; D:\tools\mavenRepository\org\json4s\json4s-ast_2.10\3.2.10\json4s-ast_2.10-3.2.10.jar; D:\tools\mavenRepository\org\scala-lang\scalap\2.10.0\scalap-2.10.0.jar; D:\tools\mavenRepository\org\scala-lang\scala-compiler\2.10.0\scala-compiler-2.10.0.jar; D:\tools\mavenRepository\org\apache\mesos\mesos\0.21.0\mesos-0.21.0-shaded-protobuf.jar; D:\tools\mavenRepository\com\fasterxml\jackson\module\jackson-module-scala_2.10\2.4.4\jackson-module-scala_2.10-2.4.4.jar; D:\tools\mavenRepository\org\tachyonproject\tachyon-client\0.5.0\tachyon-client-0.5.0.jar; D:\tools\mavenRepository\org\tachyonproject\tachyon\0.5.0\tachyon-0.5.0.jar; D:\tools\mavenRepository\org\spark-project\pyrolite\2.0.1\pyrolite-2.0.1.jar; D:\tools\mavenRepository\javolution\javolution\5.5.1\javolution-5.5.1.jar; D:\tools\mavenRepository\log4j\apache-log4j-extras\1.2.17\apache-log4j-extras-1.2.17.jar; D:\tools\mavenRepository\org\antlr\antlr-runtime\3.4\antlr-runtime-3.4.jar; D:\tools\mavenRepository\org\antlr\stringtemplate\3.2.1\stringtemplate-3.2.1.jar; D:\tools\mavenRepository\antlr\antlr\2.7.7\antlr-2.7.7.jar; D:\tools\mavenRepository\org\antlr\ST4\4.0.4\ST4-4.0.4.jar; D:\tools\mavenRepository\org\apache\ant\ant\1.9.1\ant-1.9.1.jar; D:\tools\mavenRepository\org\apache\ant\ant-launcher\1.9.1\ant-launcher-1.9.1.jar; D:\tools\mavenRepository\org\codehaus\groovy\groovy-all\2.1.6\groovy-all-2.1.6.jar; D:\tools\mavenRepository\com\googlecode\javaewah\JavaEWAH\0.3.2\JavaEWAH-0.3.2.jar; D:\tools\mavenRepository\org\iq80\snappy\snappy\0.2\snappy-0.2.jar; D:\tools\mavenRepository\org\json\json\20090211\json-20090211.jar; D:\tools\mavenRepository\stax\stax-api\1.0.1\stax-api-1.0.1.jar; D:\tools\mavenRepository\net\sf\opencsv\opencsv\2.3\opencsv-2.3.jar; D:\tools\mavenRepository\jline\jline\2.12\jline-2.12.jar; D:\tools\mavenRepository\org\slf4j\slf4j-log4j12\1.7.5\slf4j-log4j12-1.7.5.jar; D:\tools\mavenRepository\io\netty\netty-all\4.1.18.Final\netty-all-4.1.18.Final.jar; D:\tools\mavenRepository\io\delta\delta-core_2.11\0.4.0\delta-core_2.11-0.4.0.jar; D:\tools\mavenRepository\org\scala-lang\scala-library\2.11.12\scala-library-2.11.12.jar; D:\tools\mavenRepository\org\antlr\antlr4\4.7\antlr4-4.7.jar; D:\tools\mavenRepository\org\abego\treelayout\org.abego.treelayout.core\1.0.3\org.abego.treelayout.core-1.0.3.jar; D:\tools\mavenRepository\org\glassfish\javax.json\1.0.4\javax.json-1.0.4.jar; D:\tools\mavenRepository\com\ibm\icu\icu4j\58.2\icu4j-58.2.jar; D:\tools\mavenRepository\org\antlr\antlr4-runtime\4.7\antlr4-runtime-4.7.jar" spark.groupzb.OrderDistinctSumTest root |-- barnd: string (nullable = true) |-- orderid: string (nullable = true) |-- price: double (nullable = true)20/03/20 00:07:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +------+-------+-----+ | barnd|orderid|price| +------+-------+-----+ |barnd1|a|300.0| |barnd1|a|300.0| |barnd1|b|200.0| |barnd2|c|200.0| |barnd2|c|200.0| |barnd3|c|200.0| |barnd3|c|200.0| +------+-------+-----++------+---+---------+---------+ | barnd| ct|order_num|sum_price| +------+---+---------+---------+ |barnd3|2|1|200.0| |barnd2|2|1|200.0| |barnd1|3|2|500.0| +------+---+---------+---------+

【spark|spark UDAF根据某列去重求合 distinct sum】

    推荐阅读