java爬虫获取图片

获取每个页面图片链接地址
package com.wxq.pachong;
import com.alibaba.fastjson.JSON;
import java.util.ArrayList;
import java.util.List;
/**
* @title:
* @description:
* @author:
* @date:2019/3/11 15:09
**/
public class JianDanHtmlParserimplements Runnable{
private Stringhtml;
private int page;
public JianDanHtmlParser(String html,int page) {
this.html = html;
this.page = page;
}
@Override
public void run() {
System.out.println("==========第"+page+"页============");
List list =new ArrayList();
html =html.substring(html.indexOf("list"));
String[] images =html.split("li>");
for (String image : images) {
String[] ss = image.split("br");
for (String s : ss) {
if (s.indexOf("java爬虫获取图片 list.add(s.substring(i, s.indexOf("\"", i +1)));
}catch (Exception e) {
System.out.println(s);
}
}
}
}
for(String imageUrl : list){
System.out.println("图片链接:"+ JSON.toJSONString(imageUrl));
if(imageUrl.indexOf("uploads")>0){
new Thread(new JianDanImageCreator(imageUrl,page)).start();
}
}
}
}


将图片链接下载到本地
package com.wxq.pachong;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
/**
* @title:
* @description:
* @author:
* @date:2019/3/11 15:09
**/
public class JianDanImageCreatorimplements Runnable{
private static int count =0;
private StringimageUrl;
private int page;
//存储路径,自定义
private static final StringbasePath ="E:/jiandan";
public JianDanImageCreator(String imageUrl,int page) {
this.imageUrl = imageUrl;
this.page = page;
}
【java爬虫获取图片】@Override
public void run() {
File dir =new File(basePath);
if(!dir.exists()){
dir.mkdirs();
System.out.println("图片存放于"+basePath+"目录下");
}
String imageName =imageUrl.substring(imageUrl.lastIndexOf("/")+1);
try {
File file =new File(basePath+"/"+page+"--"+imageName);
OutputStream os =new FileOutputStream(file);
//创建一个url对象
URL url =new URL(imageUrl);
InputStream is = url.openStream();
byte[] buff =new byte[1024];
while(true) {
int readed = is.read(buff);
if(readed == -1) {
break;
}
byte[] temp =new byte[readed];
System.arraycopy(buff, 0, temp, 0, readed);
//写入文件
os.write(temp);
}
System.out.println("第"+(count++)+"张妹子:"+file.getAbsolutePath());
is.close();
os.close();
}catch (Exception e) {
e.printStackTrace();
}
}
}


爬虫执行方法


package com.wxq.pachong;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import java.io.InputStream;
/**
* @title:
* @description:
* @author:
* @date:2019/3/11 15:07
**/
public class SimpleSpider {
//起始页码
private static final int page =1264;
public static void main(String[] args) {
//HttpClient 超时配置
RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).setConnectionRequestTimeout(6000).setConnectTimeout(6000).build();
CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(globalConfig).build();
System.out.println("5秒后开始抓取煎蛋妹子图……");
for (int i =page; i >0; i--) {
//创建一个GET请求
HttpGet httpGet =new HttpGet("http://www.jf258.com/nvsheng/"+ i+"1.html");
httpGet.addHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36");
httpGet.addHeader("Cookie","_gat=1; nsfw-click-load=off; gif-click-load=on; _ga=GA1.2.1861846600.1423061484");
try {
//不敢爬太快
Thread.sleep(5000);
//发送请求,并执行
CloseableHttpResponse response = httpClient.execute(httpGet);
InputStream in = response.getEntity().getContent();
String html = Utils.convertStreamToString(in);
//网页内容解析
new Thread(new JianDanHtmlParser(html, i)).start();
}catch (Exception e) {
e.printStackTrace();
}
}
}
}


工具类


package com.wxq.pachong;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
/**
* @title:
* @description:
* @author:
* @date:2019/3/11 15:20
**/
public class Utils {
public static StringconvertStreamToString(InputStream is) {
BufferedReader reader =new BufferedReader(new InputStreamReader(is));
StringBuilder sb =new StringBuilder();
String line =null;
try {
while ((line = reader.readLine()) !=null) {
sb.append(line +"\n");
}
}catch (IOException e) {
e.printStackTrace();
}finally {
try {
is.close();
}catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
}

    推荐阅读