You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

577 lines
16 KiB

// HtCookieMemJar.cc
//
// HtCookieMemJar: This class stores/retrieves cookies.
//
// by Robert La Ferla. Started 12/9/2000.
// Reviewed by G.Bartolini - since 24 Feb 2001
//
////////////////////////////////////////////////////////////
//
// The HtCookieMemJar class stores/retrieves cookies
// directly into memory. It is derived from HtCookieJar class.
//
// See "PERSISTENT CLIENT STATE HTTP COOKIES" Specification
// at http://www.netscape.com/newsref/std/cookie_spec.html
// Modified according to RFC2109 (max age and version attributes)
//
///////
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Part of the ht://Check package <http://htcheck.sourceforge.net/>
// Copyright (c) 2001-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: HtCookieMemJar.cc,v 1.10 2004/05/28 13:15:23 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif
#include "HtCookieMemJar.h"
#include "HtCookie.h"
#include "List.h"
#include "Dictionary.h"
#include <stdlib.h>
#include <ctype.h>
#ifdef HAVE_STD
#include <iostream>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <iostream.h>
#endif /* HAVE_STD */
// Constructor
HtCookieMemJar::HtCookieMemJar()
: _key(0), _list(0), _idx(0)
{
cookieDict = new Dictionary();
cookieDict->Start_Get(); // reset the iterator
}
// Copy constructor
HtCookieMemJar::HtCookieMemJar(const HtCookieMemJar& rhs)
: _key(0), _list(0), _idx(0)
{
if (rhs.cookieDict)
{
// Let's perform a deep copy of the 'jar'
cookieDict = new Dictionary();
rhs.cookieDict->Start_Get();
// Let's walk the domains
while (char* d = rhs.cookieDict->Get_Next())
{
List* l = new List();
cookieDict->Add(d, l); // add that domain
// Let's walk the cookies for that domain
if (List* rhsl = (List*) rhs.cookieDict->Find(d))
{
rhsl->Start_Get();
while (HtCookie* cookie = ((HtCookie *)rhsl->Get_Next()))
{
HtCookie* new_cookie = new HtCookie(*cookie);
l->Add((Object *)new_cookie); // add this cookie
}
}
}
}
else
cookieDict = new Dictionary();
cookieDict->Start_Get(); // reset the iterator
}
// Destructor
HtCookieMemJar::~HtCookieMemJar()
{
if (debug>4)
printDebug();
if (cookieDict)
delete cookieDict;
}
// Add a cookie to the Jar
int HtCookieMemJar::AddCookie(const String &CookieString, const URL &url)
{
// Builds a new Cookie object
HtCookie *Cookie = new HtCookie(CookieString, url.get());
// Interface to the insert method
// If the cookie has not been added, we'd better delete it
if (!AddCookieForHost (Cookie, url.host()))
delete Cookie;
return true;
}
// Add a cookie to a host
int HtCookieMemJar::AddCookieForHost(HtCookie *cookie, String HostName)
{
List *list; // pointer to the Cookie list of an exact host
HtCookie *theCookie;
bool inList = false;
/////////////////////////////////////////////////////////////
// That's an abstract from the Netscape Cookies specification
/////////////////////////////////////////////////////////////
//
// When searching the cookie list for valid cookies,
// a comparison of the domain attributes of the cookie
// is made with the Internet domain name of the host from which the URL
// will be fetched. If there is a tail match, then the cookie
// will go through path matching to see if it should be sent.
//
// "Tail matching" means that domain attribute is matched against
// the tail of the fully qualified domain name of the host.
// A domain attribute of "acme.com" would match host names "anvil.acme.com"
// as well as "shipping.crate.acme.com".
//
// Only hosts within the specified domain can set a cookie
// for a domain and domains must have at least two (2)
// or three (3) periods in them to prevent domains of
// the form: ".com", ".edu", and "va.us".
//
// Any domain that fails within one of the seven special top level domains
// listed below only require two periods.
// Any other domain requires at least three.
//
// The seven special top level domains are:
// "COM", "EDU", "NET", "ORG", "GOV", "MIL", and "INT".
//
// The default value of domain is the host name of the
// server which generated the cookie response.
//
/////////////////////////////////////////////////////////////
// Let's get the domain of the cookie
String Domain(cookie->GetDomain());
// Lowercase the HostName
HostName.lowercase();
if (!Domain.length())
Domain = HostName;
else
{
Domain.lowercase(); // lowercase the domain
// The cookie's domain must have a minimum number of periods
// inside, as stated by the abstract cited above
int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
if (!minimum_periods)
{
if (debug > 2)
cout << "Cookie - Invalid domain "
<< "(minimum number of periods): " << Domain << endl;
cookie->SetIsDomainValid(false);
}
else
{
// Let's see if the domain is now valid
const char* s = Domain.get();
const char* r = s + strlen(s) - 1; // go to the last char
int num_periods = 1; // at minimum is one
while (r > s && *r)
{
if (*r == '.' && *(r+1) && *(r+1) != '.')
++num_periods; // when a 'dot' is found increment
// the number of periods
--r;
}
if (num_periods >= minimum_periods) // here is a so-far valid domain
{
while (*r && *r == '.')
++r; // goes beyond the first dot
if (r>s)
Domain.set((char*) r); // Set the new 'shorter' domain
if (HostName.indexOf(Domain.get()) != -1)
{
if (debug > 2)
cout << "Cookie - valid domain: "
<< Domain << endl;
}
else if (HostName.length() == 0)
{
if (debug > 2)
cout << "Imported cookie - valid domain: "
<< Domain << endl;
}
else
{
cookie->SetIsDomainValid(false);
if (debug > 2)
cout << "Cookie - Invalid domain "
<< "(host not within the specified domain): " << Domain << endl;
}
}
else
{
cookie->SetIsDomainValid(false);
if (debug > 2)
cout << "Cookie - Invalid domain "
<< "(minimum number of periods): " << Domain << endl;
}
}
}
if (! cookie->getIsDomainValid()) // Not a valid domain
Domain = HostName; // Set the default
// Is the host in the dictionary?
if (cookieDict->Exists(Domain) == 0)
{
// No, add a list instance
list = new List();
cookieDict->Add(Domain, list);
}
else list = (List *)cookieDict->Find(Domain);
// Is cookie already in list?
list->Start_Get();
// Let's start looking for it
// The match is made on the name and the path
if (debug > 5)
cout << "- Let's go searching for the cookie '"
<< cookie->GetName() << "' in the list" << endl;
while (!inList && (theCookie = (HtCookie *)list->Get_Next()))
{
if ( (theCookie->GetName().compare(cookie->GetName()) == 0 )
&& ( theCookie->GetPath().compare(cookie->GetPath()) == 0 ))
{
// The cookie has been found
inList = true;
// Let's update the expiration datetime
if (debug > 5)
cout << " - Found: Update cookie expire time." << endl;
theCookie->SetExpires(cookie->GetExpires());
}
}
// Well ... the cookie wasn't in the list. Until now! ;-)
// Let's go add it!
if (inList == false)
{
if (debug > 5)
cout << " - Not Found: let's go add it." << endl;
list->Add((Object *)cookie);
}
return !inList;
}
// Retrieve all cookies that are valid for a domain
List * HtCookieMemJar::cookiesForDomain(const String &DomainName)
{
List * list;
list = (List *)cookieDict->Find(DomainName);
return list;
}
int HtCookieMemJar::SetHTTPRequest_CookiesString(const URL &_url,
String &RequestString)
{
// Let's split the URL domain and get all of the subdomains.
// For instance:
// - bar.com
// - foo.bar.com
// - www.foo.bar.com
String Domain(_url.host());
Domain.lowercase();
int minimum_periods = GetDomainMinNumberOfPeriods(Domain);
if (debug > 3)
cout << "Looking for cookies - Domain: "
<< Domain
<< " (Minimum periods: " << minimum_periods << ")" << endl;
// Let's get the subdomains, starting from the end
const char* s = Domain.get();
const char* r = s + strlen(s) - 1; // go to the last char
int num_periods = 1; // at minimum is one
while (r > s && *r)
{
if (*r == '.' && *(r+1) && *(r+1) != '.')
{
++num_periods; // when a 'dot' is found increment
// the number of periods
if (num_periods > minimum_periods) // here is a so-far valid domain
{
const String SubDomain(r+1);
if (debug > 3)
cout << "Trying to find cookies for subdomain: "
<< SubDomain << endl;
if (cookieDict->Exists(SubDomain))
WriteDomainCookiesString(_url, SubDomain, RequestString);
}
}
--r;
}
if (num_periods >= minimum_periods
&& cookieDict->Exists(Domain))
// Let's send cookies for this domain to the Web server ...
WriteDomainCookiesString(_url, Domain, RequestString);
return true;
}
/////////////////////////////////////////////////////////////
// That's an abstract from the Netscape Cookies specification
/////////////////////////////////////////////////////////////
//
//
// When requesting a URL from an HTTP server, the browser will match
// the URL against all cookies and if any of them match,
// a line containing the name/value pairs of all matching cookies
// will be included in the HTTP request.
//
// Here is the format of that line:
// Cookie: NAME1=OPAQUE_STRING1; NAME2=OPAQUE_STRING2 ...
//
// This method writes on a string (RequestString) the headers
// for cookies settings as defined by Netscape standard
//
/////////////////////////////////////////////////////////////
int HtCookieMemJar::WriteDomainCookiesString(const URL &_url,
const String &Domain, String &RequestString)
{
// Cookie support. We need a list of cookies and a cookie object
List *cookieList;
HtCookie *cookie;
const HtDateTime now; // Instant time, used for checking
// cookies expiration time
// Let's find all the valid cookies depending on the specified domain
cookieList = cookiesForDomain(Domain);
if (cookieList)
{
// Let's store the number of cookies eventually sent
int NumCookies = 0;
if (debug > 5)
cout << "Found a cookie list for: '" << Domain << "'" << endl;
// Let's crawl the list for getting the 'path' matching ones
cookieList->Start_Get();
while ((cookie = (HtCookie *)cookieList->Get_Next()))
{
const String cookiePath = cookie->GetPath();
const String urlPath = _url.path();
//
// Let's see if the cookie has expired
// by checking the Expires value of it
// If it's not empty and the datetime
// is before now.
//
// Another way of determining whether a
// cookie is expired is checking the
// max_age property that is to say:
// (now - issuetime <= maxage).
//
const bool expired =
(cookie->GetExpires() && (*(cookie->GetExpires()) < now)) // Expires
|| (HtDateTime::GetDiff(now, cookie->GetIssueTime())
<= cookie->GetMaxAge()); // Max-age
if (debug > 5)
cout << "Trying to match paths and expiration time: "
<< urlPath << " in " << cookiePath;
// Is the path matching
if (!expired && !strncmp(cookiePath, urlPath, cookiePath.length()))
{
if (debug > 5)
cout << " (passed)" << endl;
++NumCookies;
// Write the string by passing the cookie to the superclass' method
WriteCookieHTTPRequest(*cookie, RequestString, NumCookies);
}
else if (debug > 5) cout << " (discarded)" << endl;
}
// Have we sent one cookie at least?
if (NumCookies > 0)
RequestString <<"\r\n";
}
// That's the end of function
return true;
}
// Debug info
void HtCookieMemJar::printDebug()
{
char * key;
cookieDict->Start_Get();
cout << "Summary of the cookies stored so far" << endl;
while ((key = cookieDict->Get_Next()))
{
List * list;
HtCookie * cookie;
cout << " - View cookies for: '" << key << "'" << endl;
list = (List *)cookieDict->Find(key);
list->Start_Get();
while ((cookie = (HtCookie *)list->Get_Next()))
cookie->printDebug();
}
}
///////
// Show the summary of the stored cookies
///////
ostream &HtCookieMemJar::ShowSummary(ostream &out)
{
char * key;
int num_cookies = 0; // Global number of cookies
int num_server = 0; // Number of servers with cookies
cookieDict->Start_Get();
out << endl << "Summary of the cookies" << endl;
out << "======================" << endl;
while ((key = cookieDict->Get_Next()))
{
List * list;
HtCookie * cookie;
int num_cookies_server = 0;
++num_server; // Number of servers with cookies
out << " Host: '" << key << "'" << endl;
list = (List *)cookieDict->Find(key);
list->Start_Get();
while ((cookie = (HtCookie *)list->Get_Next()))
{
++num_cookies_server;
cookie->printDebug();
}
out << " Number of cookies: " << num_cookies_server << endl << endl;
// Global number of cookies
num_cookies += num_cookies_server;
}
out << "Total number of cookies: " << num_cookies << endl;
out << "Servers with cookies: " << num_server << endl << endl;
return out;
}
// Get the next cookie. It is a bit tricky, but for now it is good
const HtCookie* HtCookieMemJar::NextCookie()
{
if (!cookieDict)
return 0;
if (!_idx && (_key = cookieDict->Get_Next())
&& (_list = (List *)cookieDict->Find(_key)))
_list->Start_Get(); // the first time we position at the beginning
++_idx;
if (!_key)
return 0; // ends
if (!_list)
return 0; // ends
#ifdef _MSC_VER /* _WIN32 */
const HtCookie *cookie = ((const HtCookie*)_list->Get_Next()); // Cookie object
#else
const HtCookie* cookie( (const HtCookie*)(_list->Get_Next()) ); // Cookie object
#endif
if (cookie)
return cookie;
else
{
// Non ci sono cookie per l'host. Si passa a quello seguente
if ((_key = cookieDict->Get_Next()) &&
(_list = (List *)cookieDict->Find(_key)))
{
_list->Start_Get();
if ((cookie = (const HtCookie*)_list->Get_Next()))
return cookie;
}
}
return 0;
}
// Reset the iterator
void HtCookieMemJar::ResetIterator()
{
cookieDict->Start_Get();
_idx = 0;
}