
我正在用node.js编写一个应用程序,它需要向外部网站发出http请求,并需要包括登录和管理这些网站的cookie的功能,以便会话id始终存在于任何后续请求头中。
在Java中执行类似的任务时,使用java.net.CookieHandler & java.net.CookieManager,加上java.net.HttpURLConnection来进行请求是很直接的(如果有用的话,我可以提供这方面的示例代码,但现在不想把这个线程搞得太乱,因为重点应该放在node.js的实现上):每次进行请求时,cookie都会按照预期的那样,根据Set-Cookie响应头正确更新和维护。
对于node.js应用,我试图使用restler来处理https请求^3.2.2,以及cookie-manager^0.0.19。这似乎需要在发送每个请求时在请求头中手动设置cookie,并在请求完成时根据响应头更新cookie。登录请求的示例代码。
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followredirects: true,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
if (res.headers["set-cookie"] != null){
//Loop through response cookies and add to cookie store for host1
cm.store(
host1,_.map(res.headers["set-cookie"], function(cookie){
return cookie.split(';')[0];
}, "").join(";")
);
}
successcallback(data,res);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
我面临的问题是重定向:有时第三方网站的登录页面会重定向到一个新的hostsub域等。应该发生的情况是,后续的GET请求应该是向新的主机发出的,并且应该为重定向主机管理一个新的cookie。最后的重定向应该返回到原来的主机,并且原来的cookie应该还在使用。这个过程的请求头示例。
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
GET https://host2/page HTTP/1.1
Host: host2
<no cookie> //No cookie set yet for new host
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
GET https://host1/result HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; //Cookies from Re1 response appended for host1
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; host1-cookie3=val5; host1-cookie4=val6 //All cookies set as expected for host1
我看到三个问题:
重定向后有一个POST 在最初的请求头中设置的相同的cookie将被用于所有后续请求中,不管后续主机是否有变化,或者从重定向响应头中设置的任何cookie(似乎只有在收到200状态的响应时才会被设置)。 我在上面使用的cookie设置代码应该是遍历所有 "Set-cookie "头,并设置cookie中每个字符串的第一部分。然而,它似乎只对遇到的第一个 "Set-Cookie "头进行了设置。下面的例子。
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
POST https://host2/page HTTP/1.1 //This should be GET not POST!
Host: host2
Cookie: host1-cookie0=val0 //This should not be set!
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
POST https://host1/result HTTP/1.1 //This should be GET not POST!
Host: host1
Cookie: host1-cookie0=val0 //Req1 response cookies not set!
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie3=va51; //Only first cookie from Req3 response is appended
这是 restlercookie-manager 库的局限性,还是需要更聪明的方法(例如,不使用自动重定向,用新 cookie 的 GET 方式手动发送后续请求)?虽然正在构建的应用程序被限制在 node.js 中运行,但对使用的库没有限制,所以如果切换到其他 httpcookie 管理库是明智的,我愿意这样做。
投票
1)为了防止POST的自动重定向跟进,我不得不在初始请求选项中添加 "followRedirects: false",如果响应代码是[301,302,303]中的一个,则手动重新提交GET请求。]
2)由于重定向是手动进行的,所以我能够在每次请求时根据新的域名手动设置cookie。
3) 不需要从每个 "Set-cookie "头中提取值,并将它们附加到一个单一的字符串中--cookie-manager会自动做到这一点。
新的工作代码(结合了上述1,2,3的修正)。
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followRedirects: false,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
cm.store(host1, res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
function redirectCallback(url,successcallback,errorcallback,timeoutcallback){
var options = {
//Set the cookie for new host in the request header
headers : {'Cookie': cm.prepare( getHostFromUrl(url) )}, //getHostFromUrl strips page/queryparams from URL - cookie manager doesn't seem to do this automatically
followRedirects: false,
timeout: 5000,
};
restler.get(url,restlerOptions).on('complete',function(data,res){
cm.store(getHostFromUrl(url), res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
}