Java使用HttpClient执行请求一些网页时,可能需要登陆或维护一下cookie,才能请求到我们想要的数据。本文就要介绍一下使用HttpClient执行请求时设置cookie和维持维护cookie的方法及示例代码。

1、添加定义cookie

1) 创建一个cookie存储并在该存储中设置示例cookie

BasicCookieStore cookieStore = new BasicCookieStore();
BasicClientCookie cookie = new BasicClientCookie("JSESSIONID", "1234");
cookie.setDomain(".github.com");
cookie.setPath("/");
cookieStore.addCookie(cookie);

2) HttpClient 4.3之前版本

@Test
public void givenUsingDeprecatedApi_whenSettingCookiesOnTheHttpClient_thenCorrect() 
  throws ClientProtocolException, IOException {
    BasicCookieStore cookieStore = new BasicCookieStore();
    BasicClientCookie cookie = new BasicClientCookie("JSESSIONID", "1234");
    cookie.setDomain(".github.com");
    cookie.setPath("/");
    cookieStore.addCookie(cookie);
    DefaultHttpClient client = new DefaultHttpClient();
    client.setCookieStore(cookieStore);
    HttpGet request = new HttpGet("http://www.github.com");
    response = client.execute(request);
    assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
}

3) HttpClient 4.3之后版本

@Test
public void whenSettingCookiesOnTheHttpClient_thenCookieSentCorrectly() 
  throws ClientProtocolException, IOException {
    BasicCookieStore cookieStore = new BasicCookieStore();
    BasicClientCookie cookie = new BasicClientCookie("JSESSIONID", "1234");
    cookie.setDomain(".github.com");
    cookie.setPath("/");
    cookieStore.addCookie(cookie);
    HttpClient client = HttpClientBuilder.create().setDefaultCookieStore(cookieStore).build();
    final HttpGet request = new HttpGet("http://www.github.com");
    response = client.execute(request);
    assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
}

4) 根据请求设置Cookie

如果不能在整个HttpClient上设置cookie,则可以使用HttpContext类分别使用cookie配置请求:

@Test
public void whenSettingCookiesOnTheRequest_thenCookieSentCorrectly() 
  throws ClientProtocolException, IOException {
    BasicCookieStore cookieStore = new BasicCookieStore();
    BasicClientCookie cookie = new BasicClientCookie("JSESSIONID", "1234");
    cookie.setDomain(".github.com");
    cookie.setPath("/");
    cookieStore.addCookie(cookie);
    instance = HttpClientBuilder.create().build();
    HttpGet request = new HttpGet("http://www.github.com");
    HttpContext localContext = new BasicHttpContext();
    localContext.setAttribute(HttpClientContext.COOKIE_STORE, cookieStore);
    // localContext.setAttribute(ClientContext.COOKIE_STORE, cookieStore); // before 4.3
    response = instance.execute(request, localContext);
    assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
}

5) 低级请求上设置Cookie

@Test
public void whenSettingCookiesOnARequest_thenCorrect() 
  throws ClientProtocolException, IOException {
    instance = HttpClientBuilder.create().build();
    HttpGet request = new HttpGet("http://www.github.com");
    request.setHeader("Cookie", "JSESSIONID=1234");
    response = instance.execute(request);
    assertThat(response.getStatusLine().getStatusCode(), equalTo(200));
}

2、维护保存响应的cookie

httpClientContext.getCookieStore()读取到响应的cookie信息保存起来,下次请求之前可以使用HttpClientBuilder.create().setDefaultCookieStore()方法设置之前保存的cookie信息,执行请求时就会携带cookie信息,具体代码如下:

package org.hfutec.crawler.main;
import com.google.common.collect.Lists;
import org.apache.http.Header;
import org.apache.http.HttpHeaders;
import org.apache.http.NameValuePair;
import org.apache.http.client.CookieStore;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.methods.RequestBuilder;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
public class RequestWithCookie {
  public static void main(String[] args) throws URISyntaxException, IOException, ClassNotFoundException {
    //待请求的地址
    String url = "http://www.example.com";
    //请求参数
    List<NameValuePair> loginNV = new ArrayList<>();
    loginNV.add(new BasicNameValuePair("userName", "test"));
    loginNV.add(new BasicNameValuePair("passWord", "test"));
    //构造请求资源地址
    URI uri = new URIBuilder(url).addParameters(loginNV).build();
    //创建一个HttpContext对象,用来保存Cookie
    HttpClientContext httpClientContext = HttpClientContext.create();
    //构造自定义Header信息
    List<Header> headerList = Lists.newArrayList();
    headerList.add(new BasicHeader(HttpHeaders.ACCEPT, "text/html,application/xhtml+xml,application/xml;q=0.9," +
            "image/webp,image/apng,*/*;q=0.8"));
    headerList.add(new BasicHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"));
    headerList.add(new BasicHeader(HttpHeaders.ACCEPT_ENCODING, "gzip, deflate"));
    headerList.add(new BasicHeader(HttpHeaders.CACHE_CONTROL, "max-age=0"));
    headerList.add(new BasicHeader(HttpHeaders.CONNECTION, "keep-alive"));
    headerList.add(new BasicHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4,ja;q=0.2," +
            "de;q=0.2"));
    //构造自定义的HttpClient对象
    HttpClient httpClient = HttpClients.custom().setDefaultHeaders(headerList).build();
    //构造请求对象
    HttpUriRequest httpUriRequest = RequestBuilder.get().setUri(uri).build();
    //执行请求,传入HttpContext,将会得到请求结果的信息
    httpClient.execute(httpUriRequest, httpClientContext);
    //获取请求结果中Cookie,此时的Cookie已经带有登录信息了
    CookieStore cookieStore = httpClientContext.getCookieStore();
    //这个CookieStore保存了我们的登录信息,我们可以先将它保存到某个本地文件,后面直接读取使用
    saveCookieStore(cookieStore,"cookie");
    //使用Cookie来请求,首先读取之前的Cookie
    CookieStore cookieStore1 = readCookieStore("cookie");
    //构造一个带这个Cookie的HttpClient
    HttpClient newHttpClient = HttpClientBuilder.create().setDefaultCookieStore(cookieStore1).build();
    //使用新的HttpClient请求。此时HttpClient已经带有了之前的登录信息,再爬取就不用登录了
    newHttpClient.execute(httpUriRequest, httpClientContext);

  }
  //使用序列化的方式保存CookieStore到本地文件,方便后续的读取使用
  private static void saveCookieStore( CookieStore cookieStore, String savePath ) throws IOException {
    FileOutputStream fs = new FileOutputStream(savePath);
    ObjectOutputStream os =  new ObjectOutputStream(fs);
    os.writeObject(cookieStore);
    os.close();
  }
  //读取Cookie的序列化文件,读取后可以直接使用
  private static CookieStore readCookieStore( String savePath ) throws IOException, ClassNotFoundException {
    FileInputStream fs = new FileInputStream("cookie");//("foo.ser");
    ObjectInputStream ois = new ObjectInputStream(fs);
    CookieStore cookieStore = (CookieStore) ois.readObject();
    ois.close();
    return cookieStore;

  }
}

相关文档Java使用HttpClient、HttpURLConnection、Request执行Get和Post请求


推荐文档