【已解决】Java中模拟登陆百度期间通过HttpClient无法获得所希望的返回的html和cookie

【背景】

折腾:

【教程】模拟登陆百度之Java代码版

期间,用如下代码:

    /** Get response from url  */
    public HttpResponse getUrlResponse(
    		String url,
    		List<NameValuePair> headerDict,
    		List<NameValuePair> postDict,
    		int timeout
    		)
    {
    	// init
    	HttpResponse response = null;
    	HttpUriRequest request = null;
    	DefaultHttpClient httpClient = new DefaultHttpClient();
    	
    	//HttpParams headerParams = new HttpParams();
    	//HttpParams headerParams = new DefaultedHttpParams(headerParams, headerParams);
    	//HttpParams headerParams = new BasicHttpParams();
    	BasicHttpParams headerParams = new BasicHttpParams();
    	//HttpConnectionParams.
		//default enable auto redirect
    	headerParams.setParameter(CoreProtocolPNames.USER_AGENT, gUserAgent);
    	headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE);
    	
    	if(postDict != null)
    	{
    		HttpPost postReq = new HttpPost(url);
    		
    		try{
    			HttpEntity postBodyEnt = new UrlEncodedFormEntity(postDict);
    			postReq.setEntity(postBodyEnt);
    		}
    		catch(Exception e){
    			e.printStackTrace();
    		}

    		request = postReq;
    	}
    	else
    	{
        	HttpGet getReq = new HttpGet(url);
        	
        	request = getReq;
    	}

    	if(headerParams != null)
    	{
    		//HttpProtocolParams.setUserAgent(headerParams, gUserAgent);
    		//headerParams.setHeader(HttpMethodParams.USER_AGENT, gUserAgent);
    		request.setParams(headerParams);
    	}
    	
    	//request.setHeader("User-Agent", gUserAgent);
    	

		try{			
			HttpContext localContext = new BasicHttpContext();
			localContext.setAttribute(ClientContext.COOKIE_STORE, gCurCookieStore);
			response = httpClient.execute(request, localContext);
			
			//response HeaderGroup value:
			//[Via: 1.1 SC-SZ-06, Connection: Keep-Alive, Proxy-Connection: Keep-Alive, Content-Length: 11006, Expires: Tue, 17 Sep 2013 01:43:44 GMT, Date: Tue, 17 Sep 2013 01:43:44 GMT, Content-Type: text/html;charset=utf-8, Server: BWS/1.0, Cache-Control: private, BDPAGETYPE: 1, BDUSERID: 0, BDQID: 0xaaa869770d8d5dcd, Set-Cookie: BDSVRTM=2; path=/, Set-Cookie: H_PS_PSSID=3361_2777_1465_2975_3109; path=/; domain=.baidu.com, Set-Cookie: BAIDUID=C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1; expires=Tue, 17-Sep-43 01:43:44 GMT; path=/; domain=.baidu.com, P3P: CP=" OTI DSP COR IVA OUR IND COM "]
			
			//gCurCookieStore (formatted ouput) value:
			/*{
			    [version: 0][name: BAIDUID][value: C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 09:43:44 CST 2043]=java.lang.Object@55ba1c2b,
			    [version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b,
			    [version: 0][name: H_PS_PSSID][value: 3361_2777_1465_2975_3109][domain: .baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b
			}*/
		} catch (ClientProtocolException cpe) {
            // TODO Auto-generated catch block
        	cpe.printStackTrace();
        } catch (IOException ioe) {
            // TODO Auto-generated catch block
        	ioe.printStackTrace();
        }
		
    	return response;
    }

和:

/**
 * [File]
 * EmulateLoginBaidu.java
 * 
 * [Function]
 * Use Java code to emulate login baidu
 * 
 * 【教程】模拟登陆百度之Java代码版
 * http://www.crifan.com/emulate_login_baidu_use_java_code
 * 
 * [Version]
 * v1.0
 * 2013-09-16
 * 
 * [Note]
 * 1. need add apache http lib:
 * 【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved
 * http://www.crifan.com/java_eclipse_the_import_org_apache_cannot_be_resolved/
 * 2.need crifanLib.java
 * http://code.google.com/p/crifanlib/source/browse/trunk/java/crifanLib.java
 * 
 * [History]
 * [v1.0]
 * 1. initial version. 
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.impl.cookie.BasicClientCookie2;
import org.apache.http.message.BasicNameValuePair;

//import crifanLib;

/**
 * @author CLi
 *
 */
public class EmulateLoginBaidu {
	static crifanLib crl;

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		crl = new crifanLib();
				
		// TODO Auto-generated method stub
		EmulateLoginBaiduUsingJava();
	}

	// emulate login baidu using java code	
	public static void EmulateLoginBaiduUsingJava()
	{
		boolean bLoginBaiduOk = false;
		List<Cookie> curCookieList;
		
		//step1: login baidu, got cookie BAIDUID
		String strTokenValue = "";
		boolean bGotCookieBaiduid = false;
		String strBaiduUrl = "http://www.baidu.com/";
		HttpResponse baiduResp = crl.getUrlResponse(strBaiduUrl);
		
		List<Cookie> cookieList =crl.getCurCookieStore().getCookies(); 
		crl.dbgPrintCookies(cookieList, strBaiduUrl);
		for(Cookie ck : cookieList)
		{
			String cookieName = ck.getName();
			if(cookieName.equals("BAIDUID"))
			{
				bGotCookieBaiduid = true;
			}
		}
        if (bGotCookieBaiduid)
        {
        	System.out.println("正确:已找到cookie BAIDUID");
        }
        else 
        {
        	System.out.println("错误:没有找到cookie BAIDUID !");
        }
		
		//step2: login, pass paras, extract resp cookie
        boolean bExtractTokenValueOK = false;
		if(bGotCookieBaiduid)
		{
			//https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true
			String getapiUrl = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true";
			String getApiRespHtml = crl.getUrlRespHtml(getapiUrl);
			
			//bdPass.api.params.login_token='5ab690978812b0e7fbbe1bfc267b90b3';
			//bdPass.api.params.login_token='3cf421493884e0fe9080593d05f4744f';
			Pattern tokenValP = Pattern.compile("bdPass\\.api\\.params\\.login_token='(?<tokenVal>\\w+)';");
			Matcher tokenValMatcher = tokenValP.matcher(getApiRespHtml);
			//boolean foundTokenValue = tokenValMatcher.matches();
			boolean foundTokenValue = tokenValMatcher.find();
			if(foundTokenValue)
			{
				strTokenValue = tokenValMatcher.group("tokenVal"); //3cf421493884e0fe9080593d05f4744f
				System.out.println("正确:找到 bdPass.api.params.login_token=" + strTokenValue);
				
				bExtractTokenValueOK = true;
			}
			else
			{
				System.out.println("错误:没找到bdPass.api.params.login_token !");
			}
		}
		
		//step3: verify returned cookies
        if (bGotCookieBaiduid && bExtractTokenValueOK)
        {
        	//do some workaround to makesure here cookie H_PS_PSSID not expire
        	//[version: 0][name: H_PS_PSSID][value: 3359_3341_2776_1424_2981][domain: .baidu.com][path: /][expiry: null]
        	
			//Date newExpiryDate = new Date("Thu Sep 17 14:22:08 CST 2043");
        	//Date newExpiryDate = new Date();
        	//Date newExpiryDate = new Date(2043, 9, 17);
        	Date newExpiryDate = new Date(143, 9, 17);
        	//Date newExpiryDate = new Date(2020, 9, 17);
			//Calendar newExpiryCalendar = new Calendar();
			//Calendar newExpiryCalendar = new GregorianCalendar();
			//Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17);
			//Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17, 14, 22, 8);
        	
        	BasicClientCookie hPsPssidCookie = null;
        	//int hPsPssidCookieIdx = 0;
        	
        	curCookieList = crl.getCurCookieList();
        	for(Cookie ck : curCookieList)
        	{
        		if(ck.getName().equalsIgnoreCase("H_PS_PSSID"))
        		{
        			//hPsPssidCookieIdx = curCookieList.indexOf(ck);
        			hPsPssidCookie = (BasicClientCookie) ck;
        			hPsPssidCookie.setExpiryDate(newExpiryDate);
        			ck = hPsPssidCookie;
        			//break;
        		}
        	}
        	
        	crl.setCurCookieList(curCookieList);

        	
        	String staticPageUrl = "http://www.baidu.com/cache/user/html/jump.html";

        	//List<NameValuePair> headerDict = new List<NameValuePair>();
        	List<NameValuePair> postDict = new ArrayList<NameValuePair>();
        	//ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>();
            //postDict.Add("ppui_logintime", "");
            postDict.add(new BasicNameValuePair("charset", "utf-8"));
            //postDict.add(new BasicNameValuePair("codestring", ""));
            //postDict.Add("", "");
            postDict.add(new BasicNameValuePair("token", strTokenValue));
            postDict.add(new BasicNameValuePair("isPhone", "false"));
            postDict.add(new BasicNameValuePair("index", "0"));
            //postDict.add(new BasicNameValuePair("u", ""));
            //postDict.add(new BasicNameValuePair("safeflg", "0"));
            postDict.add(new BasicNameValuePair("staticpage", staticPageUrl));
            postDict.add(new BasicNameValuePair("loginType", "1"));
            postDict.add(new BasicNameValuePair("tpl", "mn"));
            postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback"));
        	
            String strBaiduUsername = "";
        	String strBaiduPassword = "";
            Scanner inputReader = new Scanner(System.in);
            System.out.println("Please Enter Your:" );
            System.out.println("Baidu Username:" );
            strBaiduUsername = inputReader.nextLine();
            System.out.println("You Entered Username=" + strBaiduUsername);
            System.out.println("Baidu Password:" );
            strBaiduPassword = inputReader.nextLine();
            System.out.println("You Entered Password=" + strBaiduPassword);
            
            postDict.add(new BasicNameValuePair("verifycode", ""));
            
            postDict.add(new BasicNameValuePair("mem_pass", "on"));

            String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
            String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict);
                  
/*            List<NameValuePair> cookieNamePairList = new ArrayList<NameValuePair>();
            cookieNamePairList.add(new BasicNameValuePair("BDUSS", false));
            cookieNamePairList.add(new BasicNameValuePair("PTOKEN", false));
            cookieNamePairList.add(new BasicNameValuePair("STOKEN", false));
            cookieNamePairList.add(new BasicNameValuePair("SAVEUSERID", false));*/
            
            //Map cookieNameDict = new Map();
            Map cookieNameDict = new Hashtable();
            //Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>;
            cookieNameDict.put("BDUSS", false);
            cookieNameDict.put("PTOKEN", false);
            cookieNameDict.put("STOKEN", false);
            cookieNameDict.put("SAVEUSERID", false);
            
            curCookieList = crl.getCurCookieList();
            for(Object objCookieName : cookieNameDict.keySet().toArray())
            {
            	String strCookieName = objCookieName.toString();
                for(Cookie ck: curCookieList)
                {
                	if(strCookieName.equalsIgnoreCase(ck.getName()))
                	{
                		cookieNameDict.put(strCookieName, true);
                	}
                }
            }
            
            boolean bAllCookiesFound = true;
            for (Object  objFoundCurCookie : cookieNameDict.values())
            {
            	bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString()); 
            }

            bLoginBaiduOk = bAllCookiesFound;
                        
            if (bLoginBaiduOk)
            {
            	System.out.println("成功模拟登陆百度首页!" );
            }
            else
            {
            	System.out.println("模拟登陆百度首页 失败!");
            	System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml);
            }
        }

		return;
	}

/*	public EmulateLoginBaidu()
	{
		
		//EmulateLoginBaiduUsingJava();
	}*/
}

始终还是无法获得对应的返回的html和cookie,从而无法正常模拟登陆。

【折腾过程】

1.很是怀疑,是不是其中的自动跳转,没有工作。

即:

headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE);

没有起到对应的自动跳转的效果,即:

正常,自动跳转后的结果,应该是:

{Connection: keep-alive
Pragma: public
Content-Length: 515
Cache-Control: public
Content-Type: text/html
Date: Tue, 17 Sep 2013 06:20:24 GMT
Expires: 0
ETag: w/"iIuDLhaYSmAaLXqM5VeFg8Iy2wvCnATm:1379398824"
Last-Modified: Tue, 17 Sep 2013 06:20:24 6SepGMT
P3P: CP=" OTI DSP COR IVA OUR IND COM "
Set-Cookie: BDUSS=43TERMeExpdWVQaUtVLVYxZG4tU0JZSHpiWTJBTVVFVmhqc1QydElqbW9nVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKj0N1Ko9DdSLX; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=baidu.com; httponly,PTOKEN=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=baidu.com; httponly,PTOKEN=a530e5fed0d27f1600c5be851accbedb; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,STOKEN=964d5d78f925418579c09f050b76354b; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=passport.baidu.com; httponly,USERNAMETYPE=1; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly
Server: 
Vary: Accept-Encoding
Content-Encoding: 

}

而现在,返回的结果,是没有跳转时,所返回的结果:

{Transfer-Encoding: chunked
Connection: keep-alive
Pragma: public
Vary: Accept-Encoding
Content-Encoding: 
Cache-Control: public
Content-Type: text/html
Date: Tue, 17 Sep 2013 08:02:32 GMT
Expires: 0
ETag: w/"TL3qSUIdbJn8y4PtWqjPr1EFB8W22Q1g:1379404952"
Last-Modified: Tue, 17 Sep 2013 08:02:32 8SepGMT
P3P: CP=" OTI DSP COR IVA OUR IND COM "
Server: 

}

2.参考:

How auto redirect in HttpClient (java, apache)

说是:

HttpClient 3.x不支持自动跳转

HttpClient 4.x 就支持了。

3.我这里的HttpClient,是之前:

【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved

中加的,去看了下,的确已经是4.2.5,是支持的了。

4.看到:

Interface ClientPNames

已不推荐再使用ClientPNames了,改推荐:

Class RequestConfig

但是好像是4.3的,我此处,用的是4.2的库,应该是不支持的。

5.我此处调试的时候,其实可以看到对应的设置的:

{http.useragent=Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E, http.protocol.handle-redirects=true}

但是,现实的返回结果,貌似是没有自动跳转的。

6.再继续调试,经过:

request.setParams(headerParams);

是可以将对应的上面的header信息,设置给request的:

parameters contains the auto redirects true

7.后来继续调试,发现好像不是自动跳转的问题。

貌似是cookie的问题。

正常的话的,C#中,访问:

https://passport.baidu.com/v2/api/?login

之前的cookie是:

cookie BAIDUID value before login

cookie H_PS_PSSID value before login

cookie HOSUPPORT value before login

cookie BDSVRTM value before login

而此处,调试看到的cookie却是:

[
    [version: 0][name: BAIDUID][value: 8EFD622E00F965E706C2F4CC3A392AF9:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:38:11 CST 2043],
    [version: 0][name: BDSVRTM][value: 1][domain: www.baidu.com][path: /][expiry: null],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:38:12 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3361_3381_1447_2976][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043]
]

很明显,其中的:

  • BDSVRTM:
    • domain不对
    • expiry不对
  • H_PS_PSSID:
    • 已经经过上述手动加代码,修复了不对的expiry

所以,去手动添加代码修正这些cookie:

        	BasicClientCookie hPsPssidCookie = null;
        	BasicClientCookie dbsvrtmCookie = null;
        	//int hPsPssidCookieIdx = 0;
        	
        	curCookieList = crl.getCurCookieList();
        	for(Cookie ck : curCookieList)
        	{
        		if(ck.getName().equalsIgnoreCase("H_PS_PSSID"))
        		{
        			//hPsPssidCookieIdx = curCookieList.indexOf(ck);
        			hPsPssidCookie = (BasicClientCookie) ck;
        			hPsPssidCookie.setExpiryDate(newExpiryDate);
        			ck = hPsPssidCookie;
        			//break;
        		}
        		
        		if(ck.getName().equalsIgnoreCase("BDSVRTM"))
        		{
        			dbsvrtmCookie = (BasicClientCookie) ck;
        			dbsvrtmCookie.setDomain(".baidu.com");
        			dbsvrtmCookie.setExpiryDate(newExpiryDate);
        			ck = dbsvrtmCookie;
        			//break;
        		}
        	}
        	
        	crl.setCurCookieList(curCookieList);

修改后为:

[
    [version: 0][name: BAIDUID][value: B3CCB8F5D6DA04D26A3CF718FDE8A753:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:57:38 CST 2043],
    [version: 0][name: BDSVRTM][value: 1][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:57:39 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3359_3381_1422_2975_2981_3092][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043]
]

看看结果如何。

结果问题依旧。

8.但是,注意到此处的输出的html是:

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>


<script type="text/javascript">

var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice33303435645673725939414876674e4464472b634173726b77736e62576b674e6f7071306c7074722f39346645676b58634b496e52492b5849394e77744d75554648724931322b7378416873447777766f6853372f4a6e6f572f5555707047635035626746797679386a364269555979625874444a67347a686e454f567171755a79507658386e424d6533754e7a59516b654334507a78492b695a664d784c765945426e584d525764305150666837645a4265324f434e334b796a704c43464966654b527551506e5a737541494d5751526c38566176556f304634307a61334c30376a4a5452756a68614e6f5775785849474c66634a565a67497a3337454a48524c336b564d526a686d506c49413558462b39446933653142627746474338&username=&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257');
//parent.callback(url)
window.location.replace(url);

</script>
</body>
</html>

看起来,貌似右向正常的返回的html。

所以,真的再次去C#项目中,设置autoredirect为false,看看其返回的结果如何。

结果是:

C#中,如果也是设置autoredirect为false的话,那么返回的内容,也是:

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>


<script type="text/javascript">

var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice64346332423033614479666e5a4c6f336a67534864484939683445457846644676452f4734437858723973515236505979696c4d7a726a385447706a6b7349497055316c426e4a577335553945432b3452433361374b325a475a6863396d496e7866477a5771524a4a734457795147442f3445704d533071747a6633386e70543651785663725837515344316b4a74423473614447676653547a7461596b6c726c75334a624d38556a3679706f4d48567875736565624b32584f796d476e646f363179486b426831767745443946397635725250516e305552417432663566486a6176787675757070636a4d53452b3575796a434434456d4b6b4a56305837594b6c41725965636974794a59494c3848423858413771325542657a6a393464326a5973&username=xxx&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257');
//parent.callback(url)
window.location.replace(url);

</script>
</body>
</html>

但是,注意到,此处C#中,获得的html中,是有:

username=xxx

的,而java中返回的html中是没有的。

9.然后想到,会不会是由于connection不是keep-alive而导致的?

所以,去确认java中的connection,的确是keep-alive

参考:

Tomcat, HTTP Keep-Alive and Java’s HttpsUrlConnection

【整理】关于Java中的httpClient中可以传入的参数

而加入了:

headerParams.setParameter(CoreConnectionPNames.SO_KEEPALIVE, Boolean.TRUE);

试试效果。

结果问题依旧,返回的html中,还是没有username:

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>


<script type="text/javascript">

var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice6330386433734b6a45483661695042706663673772305a65396d2b6261636753473545314b41795932745136774576595746576a465557376a2f513332523230596f675a68434b63553738734337645567664643304878665839306e56696d74335535415637716c437379376b2f496f3451626e492f4d50684e4f616d59316b346131476e6163366f6945764d7a476a7976455543614954533066307a59333555646f6a46594f616f3078324b42585870515866496f5473676b3632566e4d36732b47682b494342314837702f572f38635757636f6a4a6e7845564a6f6c68416377596b362b725a2b314742475562435a4844315a376f4b75555942416f7239716f4b6a5762514d516b7a453749484d4766426e3044424f7462615169494d35&username=&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257');
//parent.callback(url)
window.location.replace(url);

</script>
</body>
</html>

另外,参考:

What is HTTP Persistent Connections?

其实已经说是默认是true了。

10.另外,注意到,之前C#正常的时候,cookie中的:

BDSVRTM的值是2

而此处的java的

BDSVRTM的值,始终是1

所以,要去研究一下,原先的C#中,什么时候得到的BDSVRTM是2的。

调试C#项目,确定了:

autoredirect为false的时候,用某个账户(3),也是可以正常模拟登陆的;

且BDSVRTM值也是1,且返回的html也是上述的html,但是其中是包含username的。

所以,再去java中看看。

结果java中,还是返回html中不带username的。

还是无法获得对应的cookie。

11.然后的然后,去检查自己代码,看看post data中是否少传了什么参数,结果发现一个惊天大问题:

之前只注意去java中从控制台中获得用户名和密码,结果竟然忘了将用户名和密码传入到post data中。。。。

你妹的,这个也太疏忽了。。。

所以,去加上,变成:

        	String staticPageUrl = "http://www.baidu.com/cache/user/html/jump.html";

        	//List<NameValuePair> headerDict = new List<NameValuePair>();
        	List<NameValuePair> postDict = new ArrayList<NameValuePair>();
        	//ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>();
            //postDict.Add("ppui_logintime", "");
            postDict.add(new BasicNameValuePair("charset", "utf-8"));
            //postDict.add(new BasicNameValuePair("codestring", ""));
            //postDict.Add("", "");
            postDict.add(new BasicNameValuePair("token", strTokenValue));
            postDict.add(new BasicNameValuePair("isPhone", "false"));
            postDict.add(new BasicNameValuePair("index", "0"));
            //postDict.add(new BasicNameValuePair("u", ""));
            //postDict.add(new BasicNameValuePair("safeflg", "0"));
            postDict.add(new BasicNameValuePair("staticpage", staticPageUrl));
            postDict.add(new BasicNameValuePair("loginType", "1"));
            postDict.add(new BasicNameValuePair("tpl", "mn"));
            postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback"));
        	
            String strBaiduUsername = "";
        	String strBaiduPassword = "";
            Scanner inputReader = new Scanner(System.in);
            System.out.println("Please Enter Your:" );
            System.out.println("Baidu Username:" );
            strBaiduUsername = inputReader.nextLine();
            System.out.println("You Entered Username=" + strBaiduUsername);
            System.out.println("Baidu Password:" );
            strBaiduPassword = inputReader.nextLine();
            System.out.println("You Entered Password=" + strBaiduPassword);
            
            postDict.add(new BasicNameValuePair("username", strBaiduUsername));
            postDict.add(new BasicNameValuePair("password", strBaiduPassword));
            
            postDict.add(new BasicNameValuePair("verifycode", ""));
            
            postDict.add(new BasicNameValuePair("mem_pass", "on"));

            String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
            String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict);

然后再去试试:

就可以了:

也可以返回cookie了:

[
    [version: 0][name: BAIDUID][value: 2804E22D17A174919725D664400B91F5:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:39:10 CST 2043],
    [version: 0][name: BDSVRTM][value: 2][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: BDUSS][value: hVT3RTT2pJcE45azE2R3dxbmVMVXBPaXBnQmlzLVhWNEkyZU5kaEVIaFhzRjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFcjOFJXIzhSb0][domain: baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:11 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3361_2776_1466_2976_2980_3092_3109][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: PTOKEN][value: 7de481b4f02704457006d4a457041ceb][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: STOKEN][value: e35d5d2ab6ad6300cbf92a47ddc90337][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: USERNAMETYPE][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021]
]

返回的html中,也带username了:

<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>


<script type="text/javascript">

var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?hao123Param=aFZUM1JUVDJwSmNFNDVhekUyUjNkeGJtVk1WWEJQYVhCblFtbHpMVmhXTkVreVpVNWthRVZJYUZoelJqbFRRVkZCUVVGQkpDUUFBQUFBQUFBQUFBRUFBQUFaUDBnQ1lXZGhhVzVwYm5CMWRETUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFGY2pPRkpYSXpoU2Iw&callback=parent.bdPass.api.login._postCallback&index=0&codestring=&username=xxx&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=0&gotourl=&auth=&error=0');
//parent.callback(url)
window.location.replace(url);

</script>
</body>
</html>

12.然后,此处,其实又出现一点点小意外:

java中,返回的cookie中,没有:SAVEUSERID

而C#中,返回的cookie中是有:SAVEUSERID的。

然后去看看获得的response,结果是,其中的cookie中,是有SAVEUSERID的:

Set-Cookie: BDUSS=stVkRscHpnak5yVXJLcFpUVENqczF1Q3NhbmltV2Rwc2V-bmp6Q3JzT2ZzVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ8kOFKfJDhSaG; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=baidu.com; httponly,
Set-Cookie: PTOKEN=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=baidu.com; httponly,
Set-Cookie: PTOKEN=d75d78bd32c23c5cdb9c27b2f484ea9c; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: STOKEN=75ba9addf1f921c25a64dd7f7d278d40; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: USERNAMETYPE=1; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly

但是很明显,SAVEUSERID是deleted,

所以被java中的代码解析后,就被丢掉了。

所以,此处,java中,获得的CookieStore,是没有SAVEUSERID的。其实就是正常的现象了。

所以,java中,判断模拟登陆百度成功后的cookie的判断,就需要修改一下,去掉针对于SAVEUSERID的判断,变成:

            //Map cookieNameDict = new Map();
            Map cookieNameDict = new Hashtable();
            //Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>;
            cookieNameDict.put("BDUSS", false);
            cookieNameDict.put("PTOKEN", false);
            cookieNameDict.put("STOKEN", false);
            //cookieNameDict.put("SAVEUSERID", false);
            
            curCookieList = crl.getCurCookieList();
            for(Object objCookieName : cookieNameDict.keySet().toArray())
            {
            	String strCookieName = objCookieName.toString();
                for(Cookie ck: curCookieList)
                {
                	if(strCookieName.equalsIgnoreCase(ck.getName()))
                	{
                		cookieNameDict.put(strCookieName, true);
                	}
                }
            }
            
            boolean bAllCookiesFound = true;
            for (Object  objFoundCurCookie : cookieNameDict.values())
            {
            	bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString()); 
            }

            bLoginBaiduOk = bAllCookiesFound;
                        
            if (bLoginBaiduOk)
            {
            	System.out.println("成功模拟登陆百度首页!" );
            }
            else
            {
            	System.out.println("模拟登陆百度首页 失败!");
            	System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml);
            }

即可。

成功模拟登陆百度后的输出是:

Cookies for http://www.baidu.com/
[version: 0][name: BAIDUID][value: EB16BC3D9EB401EAD360D7F49C635382:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:49:06 CST 2043]
[version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null]
[version: 0][name: H_PS_PSSID][value: 3360_3380_1466_2976][domain: .baidu.com][path: /][expiry: null]
正确:已找到cookie BAIDUID
正确:找到 bdPass.api.params.login_token=7744b9c37d759c47709e44b98527af0c
Please Enter Your:
Baidu Username:
xxx
Baidu Password:
yyy
成功模拟登陆百度首页!

如图:

can found cookie and token and emulate login baidu ok

随后,再稍微整理一下代码去,即可。

 

【总结】

此处,java中模拟登陆百度访问login的url,没有获得所希望的返回的html和cookie,原因是:

自己粗心大意,导致忘了发送用户名和密码

所以必然登陆失败。

解决办法:

(1)传递对应的username和password,即可成功返回所希望的cookie;

(2)此处java中,将:

Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly,

解析后,丢掉了。

所以,java中,判断模拟登陆百度是否成功的检测cookie时,就不去判断对应的SAVEUSERID,只判断:BDUSS,PTOKEN,STOKEN即可。

(3)完整的,成功的,模拟登陆百度的代码,自己去看:

【教程】模拟登陆百度之Java代码版



发表评论

电子邮件地址不会被公开。 必填项已用*标注

无觅相关文章插件,快速提升流量