spark+java1.8+lamda wordCount 实例,并且实现按单词出现的次数的倒序排序


【spark+java1.8+lamda wordCount 实例,并且实现按单词出现的次数的倒序排序】

package com.jiangzeyun.spark.core; import java.util.Arrays; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import scala.Tuple2; /** * * @author jiangzeyun * @brief WordCount 第一个spark程序 * */ public class WordCount { public static void main(String[] args) { //本地模式 SparkConf conf = new SparkConf().setAppName("WordCount").setMaster("local"); @SuppressWarnings("resource") JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD lines = sc.textFile("/usr/local/hadoop/README.txt"); JavaRDD words =lines.flatMap( s -> Arrays.asList(s.split(" ")).iterator()); JavaPairRDD pairs = words.mapToPair(s -> new Tuple2(s, 1)); JavaPairRDD counts = pairs.reduceByKey((a, b) -> a + b); //如果要实现按单词出现的次数从高低排名,1,首先要tuple的key value值,然后再按key排序,然后再交换过来 JavaPairRDD tmp = counts.mapToPair(s->new Tuple2(s._2,s._1)).sortByKey(false); JavaPairRDD result = tmp.mapToPair(s->new Tuple2(s._2,s._1)); result.foreach(a -> System.out.println(a._1+"=>"+a._2())); sc.stop(); } }



    推荐阅读