Hadoop 系统入门+核心精讲

download:Hadoop 系统入门+核心精讲 package com.zzger.model;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import com.zzger.module.queue.UrlQueue;
import com.zzger.util.HttpUtils;
import com.zzger.util.RegexUtils;
public class WebSite {

/** * 站点url */ private String url; /** * 需求匍匐的url队列 */ private UrlQueue urls = new UrlQueue<>(); /** * 已匍匐过的页面url */ private List exitUrls = Collections.synchronizedList(new ArrayList<>()); private static final int TOTAL_THREADS = 12; private final CountDownLatch mStartSignal = new CountDownLatch(1); private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS); public WebSite(String url){ this.url = url; urls.offer(url); //把网站首页参加需求匍匐的队列中 }public void guangDu(){ new Thread(new Runnable() { @Override public void run() { paxing(HttpUtils.httpGet(url)); } }).start(); }public void paxing(String html){ if(html.lastIndexOf("下一页

")<0) return ;
String strList = html.substring(html.indexOf("
  • "), html.lastIndexOf("下一页

  • "));
    String url = RegexUtils.RegexString(" list = page.ybhqSection().getSections(); for(Section section : list){ new Thread(new Runnable() { @Override public void run() { mStartSignal.countDown(); // 计数减一为0,工作线程真正启动详细操作 try { mStartSignal.await(); // 阻塞,等候mStartSignal计数为0运转后面的代码 // 一切的工作线程都在等候同一个启动的命令 } catch (InterruptedException e) { e.printStackTrace(); } DuanZi duanzi = section.select().getModel(); System.out.println(duanzi.getTitle()); mDoneSignal.countDown(); // 完成以后计数减一 } } ).start(); } try { mDoneSignal.await(); // 等候一切工作线程完毕 } catch (InterruptedException e) { e.printStackTrace(); } dxcPx(); //线程任务执行完后,再次获取url队列停止任务 } public static void main(String[] args) { WebSite web = new WebSite("http://duanziwang.com"); web.guangDu(); for(int i = 0; i<10; i++){ new Thread(new Runnable() { @Override public void run() { web.dxcPx(); } }).start(); }}

    【Hadoop 系统入门+核心精讲】}

      推荐阅读