You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
577 lines
16 KiB
577 lines
16 KiB
3 years ago
|
|
||
|
// HtCookieMemJar.cc
|
||
|
//
|
||
|
// HtCookieMemJar: This class stores/retrieves cookies.
|
||
|
//
|
||
|
// by Robert La Ferla. Started 12/9/2000.
|
||
|
// Reviewed by G.Bartolini - since 24 Feb 2001
|
||
|
//
|
||
|
////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// The HtCookieMemJar class stores/retrieves cookies
|
||
|
// directly into memory. It is derived from HtCookieJar class.
|
||
|
//
|
||
|
// See "PERSISTENT CLIENT STATE HTTP COOKIES" Specification
|
||
|
// at http://www.netscape.com/newsref/std/cookie_spec.html
|
||
|
// Modified according to RFC2109 (max age and version attributes)
|
||
|
//
|
||
|
///////
|
||
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Part of the ht://Check package <http://htcheck.sourceforge.net/>
|
||
|
// Copyright (c) 2001-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
// $Id: HtCookieMemJar.cc,v 1.10 2004/05/28 13:15:23 lha Exp $
|
||
|
//
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "htconfig.h"
|
||
|
#endif
|
||
|
|
||
|
#include "HtCookieMemJar.h"
|
||
|
#include "HtCookie.h"
|
||
|
#include "List.h"
|
||
|
#include "Dictionary.h"
|
||
|
#include <stdlib.h>
|
||
|
#include <ctype.h>
|
||
|
|
||
|
#ifdef HAVE_STD
|
||
|
#include <iostream>
|
||
|
#ifdef HAVE_NAMESPACES
|
||
|
using namespace std;
|
||
|
#endif
|
||
|
#else
|
||
|
#include <iostream.h>
|
||
|
#endif /* HAVE_STD */
|
||
|
|
||
|
// Constructor
|
||
|
HtCookieMemJar::HtCookieMemJar()
|
||
|
: _key(0), _list(0), _idx(0)
|
||
|
{
|
||
|
cookieDict = new Dictionary();
|
||
|
cookieDict->Start_Get(); // reset the iterator
|
||
|
}
|
||
|
|
||
|
// Copy constructor
|
||
|
HtCookieMemJar::HtCookieMemJar(const HtCookieMemJar& rhs)
|
||
|
: _key(0), _list(0), _idx(0)
|
||
|
{
|
||
|
|
||
|
if (rhs.cookieDict)
|
||
|
{
|
||
|
// Let's perform a deep copy of the 'jar'
|
||
|
cookieDict = new Dictionary();
|
||
|
rhs.cookieDict->Start_Get();
|
||
|
|
||
|
// Let's walk the domains
|
||
|
while (char* d = rhs.cookieDict->Get_Next())
|
||
|
{
|
||
|
List* l = new List();
|
||
|
cookieDict->Add(d, l); // add that domain
|
||
|
|
||
|
// Let's walk the cookies for that domain
|
||
|
if (List* rhsl = (List*) rhs.cookieDict->Find(d))
|
||
|
{
|
||
|
|
||
|
rhsl->Start_Get();
|
||
|
|
||
|
while (HtCookie* cookie = ((HtCookie *)rhsl->Get_Next()))
|
||
|
{
|
||
|
HtCookie* new_cookie = new HtCookie(*cookie);
|
||
|
l->Add((Object *)new_cookie); // add this cookie
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
cookieDict = new Dictionary();
|
||
|
|
||
|
cookieDict->Start_Get(); // reset the iterator
|
||
|
}
|
||
|
|
||
|
// Destructor
|
||
|
HtCookieMemJar::~HtCookieMemJar()
|
||
|
{
|
||
|
if (debug>4)
|
||
|
printDebug();
|
||
|
|
||
|
if (cookieDict)
|
||
|
delete cookieDict;
|
||
|
}
|
||
|
|
||
|
// Add a cookie to the Jar
|
||
|
int HtCookieMemJar::AddCookie(const String &CookieString, const URL &url)
|
||
|
{
|
||
|
|
||
|
// Builds a new Cookie object
|
||
|
HtCookie *Cookie = new HtCookie(CookieString, url.get());
|
||
|
|
||
|
// Interface to the insert method
|
||
|
// If the cookie has not been added, we'd better delete it
|
||
|
if (!AddCookieForHost (Cookie, url.host()))
|
||
|
delete Cookie;
|
||
|
|
||
|
return true;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
// Add a cookie to a host
|
||
|
int HtCookieMemJar::AddCookieForHost(HtCookie *cookie, String HostName)
|
||
|
{
|
||
|
|
||
|
List *list; // pointer to the Cookie list of an exact host
|
||
|
HtCookie *theCookie;
|
||
|
bool inList = false;
|
||
|
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
// That's an abstract from the Netscape Cookies specification
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// When searching the cookie list for valid cookies,
|
||
|
// a comparison of the domain attributes of the cookie
|
||
|
// is made with the Internet domain name of the host from which the URL
|
||
|
// will be fetched. If there is a tail match, then the cookie
|
||
|
// will go through path matching to see if it should be sent.
|
||
|
//
|
||
|
// "Tail matching" means that domain attribute is matched against
|
||
|
// the tail of the fully qualified domain name of the host.
|
||
|
// A domain attribute of "acme.com" would match host names "anvil.acme.com"
|
||
|
// as well as "shipping.crate.acme.com".
|
||
|
//
|
||
|
// Only hosts within the specified domain can set a cookie
|
||
|
// for a domain and domains must have at least two (2)
|
||
|
// or three (3) periods in them to prevent domains of
|
||
|
// the form: ".com", ".edu", and "va.us".
|
||
|
//
|
||
|
// Any domain that fails within one of the seven special top level domains
|
||
|
// listed below only require two periods.
|
||
|
// Any other domain requires at least three.
|
||
|
//
|
||
|
// The seven special top level domains are:
|
||
|
// "COM", "EDU", "NET", "ORG", "GOV", "MIL", and "INT".
|
||
|
//
|
||
|
// The default value of domain is the host name of the
|
||
|
// server which generated the cookie response.
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
|
||
|
|
||
|
// Let's get the domain of the cookie
|
||
|
String Domain(cookie->GetDomain());
|
||
|
|
||
|
// Lowercase the HostName
|
||
|
HostName.lowercase();
|
||
|
|
||
|
if (!Domain.length())
|
||
|
Domain = HostName;
|
||
|
else
|
||
|
{
|
||
|
Domain.lowercase(); // lowercase the domain
|
||
|
|
||
|
// The cookie's domain must have a minimum number of periods
|
||
|
// inside, as stated by the abstract cited above
|
||
|
int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
|
||
|
|
||
|
if (!minimum_periods)
|
||
|
{
|
||
|
if (debug > 2)
|
||
|
cout << "Cookie - Invalid domain "
|
||
|
<< "(minimum number of periods): " << Domain << endl;
|
||
|
|
||
|
cookie->SetIsDomainValid(false);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Let's see if the domain is now valid
|
||
|
const char* s = Domain.get();
|
||
|
const char* r = s + strlen(s) - 1; // go to the last char
|
||
|
int num_periods = 1; // at minimum is one
|
||
|
|
||
|
while (r > s && *r)
|
||
|
{
|
||
|
if (*r == '.' && *(r+1) && *(r+1) != '.')
|
||
|
++num_periods; // when a 'dot' is found increment
|
||
|
// the number of periods
|
||
|
--r;
|
||
|
}
|
||
|
|
||
|
if (num_periods >= minimum_periods) // here is a so-far valid domain
|
||
|
{
|
||
|
while (*r && *r == '.')
|
||
|
++r; // goes beyond the first dot
|
||
|
|
||
|
if (r>s)
|
||
|
Domain.set((char*) r); // Set the new 'shorter' domain
|
||
|
|
||
|
|
||
|
if (HostName.indexOf(Domain.get()) != -1)
|
||
|
{
|
||
|
if (debug > 2)
|
||
|
cout << "Cookie - valid domain: "
|
||
|
<< Domain << endl;
|
||
|
}
|
||
|
else if (HostName.length() == 0)
|
||
|
{
|
||
|
if (debug > 2)
|
||
|
cout << "Imported cookie - valid domain: "
|
||
|
<< Domain << endl;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cookie->SetIsDomainValid(false);
|
||
|
if (debug > 2)
|
||
|
cout << "Cookie - Invalid domain "
|
||
|
<< "(host not within the specified domain): " << Domain << endl;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cookie->SetIsDomainValid(false);
|
||
|
if (debug > 2)
|
||
|
cout << "Cookie - Invalid domain "
|
||
|
<< "(minimum number of periods): " << Domain << endl;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (! cookie->getIsDomainValid()) // Not a valid domain
|
||
|
Domain = HostName; // Set the default
|
||
|
|
||
|
// Is the host in the dictionary?
|
||
|
if (cookieDict->Exists(Domain) == 0)
|
||
|
{
|
||
|
// No, add a list instance
|
||
|
list = new List();
|
||
|
cookieDict->Add(Domain, list);
|
||
|
}
|
||
|
else list = (List *)cookieDict->Find(Domain);
|
||
|
|
||
|
// Is cookie already in list?
|
||
|
list->Start_Get();
|
||
|
|
||
|
// Let's start looking for it
|
||
|
// The match is made on the name and the path
|
||
|
|
||
|
if (debug > 5)
|
||
|
cout << "- Let's go searching for the cookie '"
|
||
|
<< cookie->GetName() << "' in the list" << endl;
|
||
|
|
||
|
while (!inList && (theCookie = (HtCookie *)list->Get_Next()))
|
||
|
{
|
||
|
if ( (theCookie->GetName().compare(cookie->GetName()) == 0 )
|
||
|
&& ( theCookie->GetPath().compare(cookie->GetPath()) == 0 ))
|
||
|
{
|
||
|
// The cookie has been found
|
||
|
inList = true;
|
||
|
|
||
|
// Let's update the expiration datetime
|
||
|
if (debug > 5)
|
||
|
cout << " - Found: Update cookie expire time." << endl;
|
||
|
|
||
|
theCookie->SetExpires(cookie->GetExpires());
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Well ... the cookie wasn't in the list. Until now! ;-)
|
||
|
// Let's go add it!
|
||
|
if (inList == false)
|
||
|
{
|
||
|
if (debug > 5)
|
||
|
cout << " - Not Found: let's go add it." << endl;
|
||
|
|
||
|
list->Add((Object *)cookie);
|
||
|
}
|
||
|
|
||
|
return !inList;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Retrieve all cookies that are valid for a domain
|
||
|
List * HtCookieMemJar::cookiesForDomain(const String &DomainName)
|
||
|
{
|
||
|
List * list;
|
||
|
|
||
|
list = (List *)cookieDict->Find(DomainName);
|
||
|
return list;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
int HtCookieMemJar::SetHTTPRequest_CookiesString(const URL &_url,
|
||
|
String &RequestString)
|
||
|
{
|
||
|
|
||
|
// Let's split the URL domain and get all of the subdomains.
|
||
|
// For instance:
|
||
|
// - bar.com
|
||
|
// - foo.bar.com
|
||
|
// - www.foo.bar.com
|
||
|
|
||
|
String Domain(_url.host());
|
||
|
Domain.lowercase();
|
||
|
|
||
|
int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
|
||
|
|
||
|
if (debug > 3)
|
||
|
cout << "Looking for cookies - Domain: "
|
||
|
<< Domain
|
||
|
<< " (Minimum periods: " << minimum_periods << ")" << endl;
|
||
|
|
||
|
// Let's get the subdomains, starting from the end
|
||
|
const char* s = Domain.get();
|
||
|
const char* r = s + strlen(s) - 1; // go to the last char
|
||
|
int num_periods = 1; // at minimum is one
|
||
|
|
||
|
while (r > s && *r)
|
||
|
{
|
||
|
if (*r == '.' && *(r+1) && *(r+1) != '.')
|
||
|
{
|
||
|
++num_periods; // when a 'dot' is found increment
|
||
|
// the number of periods
|
||
|
|
||
|
if (num_periods > minimum_periods) // here is a so-far valid domain
|
||
|
{
|
||
|
const String SubDomain(r+1);
|
||
|
if (debug > 3)
|
||
|
cout << "Trying to find cookies for subdomain: "
|
||
|
<< SubDomain << endl;
|
||
|
|
||
|
if (cookieDict->Exists(SubDomain))
|
||
|
WriteDomainCookiesString(_url, SubDomain, RequestString);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
--r;
|
||
|
}
|
||
|
|
||
|
if (num_periods >= minimum_periods
|
||
|
&& cookieDict->Exists(Domain))
|
||
|
// Let's send cookies for this domain to the Web server ...
|
||
|
WriteDomainCookiesString(_url, Domain, RequestString);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
// That's an abstract from the Netscape Cookies specification
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
//
|
||
|
// When requesting a URL from an HTTP server, the browser will match
|
||
|
// the URL against all cookies and if any of them match,
|
||
|
// a line containing the name/value pairs of all matching cookies
|
||
|
// will be included in the HTTP request.
|
||
|
//
|
||
|
// Here is the format of that line:
|
||
|
// Cookie: NAME1=OPAQUE_STRING1; NAME2=OPAQUE_STRING2 ...
|
||
|
//
|
||
|
// This method writes on a string (RequestString) the headers
|
||
|
// for cookies settings as defined by Netscape standard
|
||
|
//
|
||
|
/////////////////////////////////////////////////////////////
|
||
|
|
||
|
int HtCookieMemJar::WriteDomainCookiesString(const URL &_url,
|
||
|
const String &Domain, String &RequestString)
|
||
|
{
|
||
|
|
||
|
// Cookie support. We need a list of cookies and a cookie object
|
||
|
List *cookieList;
|
||
|
HtCookie *cookie;
|
||
|
const HtDateTime now; // Instant time, used for checking
|
||
|
// cookies expiration time
|
||
|
|
||
|
// Let's find all the valid cookies depending on the specified domain
|
||
|
cookieList = cookiesForDomain(Domain);
|
||
|
|
||
|
if (cookieList)
|
||
|
{
|
||
|
// Let's store the number of cookies eventually sent
|
||
|
int NumCookies = 0;
|
||
|
|
||
|
if (debug > 5)
|
||
|
cout << "Found a cookie list for: '" << Domain << "'" << endl;
|
||
|
|
||
|
// Let's crawl the list for getting the 'path' matching ones
|
||
|
cookieList->Start_Get();
|
||
|
|
||
|
while ((cookie = (HtCookie *)cookieList->Get_Next()))
|
||
|
{
|
||
|
const String cookiePath = cookie->GetPath();
|
||
|
const String urlPath = _url.path();
|
||
|
|
||
|
//
|
||
|
// Let's see if the cookie has expired
|
||
|
// by checking the Expires value of it
|
||
|
// If it's not empty and the datetime
|
||
|
// is before now.
|
||
|
//
|
||
|
// Another way of determining whether a
|
||
|
// cookie is expired is checking the
|
||
|
// max_age property that is to say:
|
||
|
// (now - issuetime <= maxage).
|
||
|
//
|
||
|
const bool expired =
|
||
|
(cookie->GetExpires() && (*(cookie->GetExpires()) < now)) // Expires
|
||
|
|| (HtDateTime::GetDiff(now, cookie->GetIssueTime())
|
||
|
<= cookie->GetMaxAge()); // Max-age
|
||
|
|
||
|
if (debug > 5)
|
||
|
cout << "Trying to match paths and expiration time: "
|
||
|
<< urlPath << " in " << cookiePath;
|
||
|
|
||
|
// Is the path matching
|
||
|
if (!expired && !strncmp(cookiePath, urlPath, cookiePath.length()))
|
||
|
{
|
||
|
|
||
|
if (debug > 5)
|
||
|
cout << " (passed)" << endl;
|
||
|
|
||
|
++NumCookies;
|
||
|
|
||
|
// Write the string by passing the cookie to the superclass' method
|
||
|
WriteCookieHTTPRequest(*cookie, RequestString, NumCookies);
|
||
|
|
||
|
}
|
||
|
else if (debug > 5) cout << " (discarded)" << endl;
|
||
|
|
||
|
}
|
||
|
|
||
|
// Have we sent one cookie at least?
|
||
|
if (NumCookies > 0)
|
||
|
RequestString <<"\r\n";
|
||
|
|
||
|
}
|
||
|
|
||
|
// That's the end of function
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Debug info
|
||
|
void HtCookieMemJar::printDebug()
|
||
|
{
|
||
|
char * key;
|
||
|
|
||
|
cookieDict->Start_Get();
|
||
|
|
||
|
cout << "Summary of the cookies stored so far" << endl;
|
||
|
|
||
|
while ((key = cookieDict->Get_Next()))
|
||
|
{
|
||
|
List * list;
|
||
|
HtCookie * cookie;
|
||
|
|
||
|
cout << " - View cookies for: '" << key << "'" << endl;
|
||
|
list = (List *)cookieDict->Find(key);
|
||
|
list->Start_Get();
|
||
|
|
||
|
while ((cookie = (HtCookie *)list->Get_Next()))
|
||
|
cookie->printDebug();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
///////
|
||
|
// Show the summary of the stored cookies
|
||
|
///////
|
||
|
|
||
|
ostream &HtCookieMemJar::ShowSummary(ostream &out)
|
||
|
{
|
||
|
|
||
|
char * key;
|
||
|
int num_cookies = 0; // Global number of cookies
|
||
|
int num_server = 0; // Number of servers with cookies
|
||
|
|
||
|
cookieDict->Start_Get();
|
||
|
|
||
|
out << endl << "Summary of the cookies" << endl;
|
||
|
out << "======================" << endl;
|
||
|
|
||
|
while ((key = cookieDict->Get_Next()))
|
||
|
{
|
||
|
List * list;
|
||
|
HtCookie * cookie;
|
||
|
int num_cookies_server = 0;
|
||
|
|
||
|
++num_server; // Number of servers with cookies
|
||
|
|
||
|
out << " Host: '" << key << "'" << endl;
|
||
|
list = (List *)cookieDict->Find(key);
|
||
|
list->Start_Get();
|
||
|
|
||
|
while ((cookie = (HtCookie *)list->Get_Next()))
|
||
|
{
|
||
|
++num_cookies_server;
|
||
|
cookie->printDebug();
|
||
|
}
|
||
|
|
||
|
out << " Number of cookies: " << num_cookies_server << endl << endl;
|
||
|
|
||
|
// Global number of cookies
|
||
|
num_cookies += num_cookies_server;
|
||
|
}
|
||
|
|
||
|
out << "Total number of cookies: " << num_cookies << endl;
|
||
|
out << "Servers with cookies: " << num_server << endl << endl;
|
||
|
|
||
|
return out;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
// Get the next cookie. It is a bit tricky, but for now it is good
|
||
|
const HtCookie* HtCookieMemJar::NextCookie()
|
||
|
{
|
||
|
if (!cookieDict)
|
||
|
return 0;
|
||
|
|
||
|
if (!_idx && (_key = cookieDict->Get_Next())
|
||
|
&& (_list = (List *)cookieDict->Find(_key)))
|
||
|
_list->Start_Get(); // the first time we position at the beginning
|
||
|
|
||
|
++_idx;
|
||
|
|
||
|
if (!_key)
|
||
|
return 0; // ends
|
||
|
|
||
|
if (!_list)
|
||
|
return 0; // ends
|
||
|
|
||
|
#ifdef _MSC_VER /* _WIN32 */
|
||
|
const HtCookie *cookie = ((const HtCookie*)_list->Get_Next()); // Cookie object
|
||
|
#else
|
||
|
const HtCookie* cookie( (const HtCookie*)(_list->Get_Next()) ); // Cookie object
|
||
|
#endif
|
||
|
|
||
|
if (cookie)
|
||
|
return cookie;
|
||
|
else
|
||
|
{
|
||
|
// Non ci sono cookie per l'host. Si passa a quello seguente
|
||
|
if ((_key = cookieDict->Get_Next()) &&
|
||
|
(_list = (List *)cookieDict->Find(_key)))
|
||
|
{
|
||
|
_list->Start_Get();
|
||
|
if ((cookie = (const HtCookie*)_list->Get_Next()))
|
||
|
return cookie;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// Reset the iterator
|
||
|
void HtCookieMemJar::ResetIterator()
|
||
|
{
|
||
|
cookieDict->Start_Get();
|
||
|
_idx = 0;
|
||
|
}
|
||
|
|