You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1085 lines
27 KiB
1085 lines
27 KiB
3 years ago
|
//
|
||
|
// HtHTTP.cc
|
||
|
//
|
||
|
// HtHTTP: Interface classes for HTTP messaging
|
||
|
//
|
||
|
// Including:
|
||
|
// - Generic class
|
||
|
// - Response message class
|
||
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Copyright (c) 1995-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
// $Id: HtHTTP.cc,v 1.27 2004/05/28 13:15:23 lha Exp $
|
||
|
//
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "htconfig.h"
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#include "lib.h"
|
||
|
#include "Transport.h"
|
||
|
#include "HtHTTP.h"
|
||
|
|
||
|
#include <signal.h>
|
||
|
#include <sys/types.h>
|
||
|
#include <ctype.h>
|
||
|
#include <stdio.h> // for sscanf
|
||
|
|
||
|
// for setw()
|
||
|
#ifdef HAVE_STD
|
||
|
#include <iomanip>
|
||
|
#ifdef HAVE_NAMESPACES
|
||
|
using namespace std;
|
||
|
#endif
|
||
|
#else
|
||
|
#include <iomanip.h>
|
||
|
#endif /* HAVE_STD */
|
||
|
|
||
|
#if 1
|
||
|
typedef void (*SIGNAL_HANDLER) (...);
|
||
|
#else
|
||
|
typedef SIG_PF SIGNAL_HANDLER;
|
||
|
#endif
|
||
|
|
||
|
// User Agent
|
||
|
String HtHTTP::_user_agent = 0;
|
||
|
|
||
|
// Stats information
|
||
|
int HtHTTP::_tot_seconds = 0;
|
||
|
int HtHTTP::_tot_requests = 0;
|
||
|
int HtHTTP::_tot_bytes = 0;
|
||
|
|
||
|
// flag that manage the option of 'HEAD' before 'GET'
|
||
|
bool HtHTTP::_head_before_get = true;
|
||
|
|
||
|
// Handler of the CanParse function
|
||
|
|
||
|
int (* HtHTTP::CanBeParsed) (char *) = 0;
|
||
|
|
||
|
// Cookies jar
|
||
|
HtCookieJar *HtHTTP::_cookie_jar = 0; // Set to 0 by default
|
||
|
|
||
|
///////
|
||
|
// HtHTTP_Response class
|
||
|
//
|
||
|
// Response message sent by the remote HTTP server
|
||
|
///////
|
||
|
|
||
|
|
||
|
// Construction
|
||
|
|
||
|
HtHTTP_Response::HtHTTP_Response()
|
||
|
: _version(0),
|
||
|
_transfer_encoding(0),
|
||
|
_server(0),
|
||
|
_hdrconnection(0),
|
||
|
_content_language(0)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
// Destruction
|
||
|
|
||
|
HtHTTP_Response::~HtHTTP_Response()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
void HtHTTP_Response::Reset()
|
||
|
{
|
||
|
|
||
|
// Call the base class method in order to reset
|
||
|
// the base class attributes
|
||
|
|
||
|
Transport_Response::Reset();
|
||
|
|
||
|
// Initialize the version, transfer-encoding, location and server strings
|
||
|
_version.trunc();
|
||
|
_transfer_encoding.trunc();
|
||
|
_hdrconnection.trunc();
|
||
|
_server.trunc();
|
||
|
_content_language.trunc();
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
///////
|
||
|
// HtHTTP generic class
|
||
|
//
|
||
|
//
|
||
|
///////
|
||
|
|
||
|
|
||
|
// Construction
|
||
|
|
||
|
HtHTTP::HtHTTP(Connection& connection)
|
||
|
: Transport(&connection),
|
||
|
_Method(Method_GET), // Default Method Request
|
||
|
_bytes_read(0),
|
||
|
_accept_language(0),
|
||
|
_persistent_connection_allowed(true),
|
||
|
_persistent_connection_possible(false),
|
||
|
_send_cookies(true)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
// Destruction
|
||
|
|
||
|
HtHTTP::~HtHTTP()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
///////
|
||
|
// Manages the requesting process
|
||
|
///////
|
||
|
|
||
|
Transport::DocStatus HtHTTP::Request()
|
||
|
{
|
||
|
|
||
|
DocStatus result = Document_ok;
|
||
|
|
||
|
///////
|
||
|
// We make a double request (HEAD and, maybe, GET)
|
||
|
// Depending on the
|
||
|
///////
|
||
|
|
||
|
if (HeadBeforeGet() && // Option value to true
|
||
|
_Method == Method_GET) // Initial request method is GET
|
||
|
{
|
||
|
|
||
|
if (debug>3)
|
||
|
cout << " Making a HEAD call before the GET" << endl;
|
||
|
|
||
|
_Method = Method_HEAD;
|
||
|
|
||
|
result = HTTPRequest();
|
||
|
|
||
|
_Method = Method_GET;
|
||
|
}
|
||
|
|
||
|
if (result == Document_ok)
|
||
|
result = HTTPRequest();
|
||
|
|
||
|
if(result == Document_no_header
|
||
|
&& isPersistentConnectionAllowed())
|
||
|
{
|
||
|
|
||
|
// Sometimes, the parsing phase of the header of the response
|
||
|
// that the server gives us back, fails and a <no header>
|
||
|
// error is raised. This happens with HTTP/1.1 persistent
|
||
|
// connections, usually because the previous response stream
|
||
|
// has not yet been flushed, so the buffer still contains
|
||
|
// data regarding the last document retrieved. That sucks alot!
|
||
|
// The only thing to do is to lose persistent connections benefits
|
||
|
// for this document, so close the connection and 'GET' it again.
|
||
|
|
||
|
CloseConnection(); // Close a previous connection
|
||
|
|
||
|
if (debug>0)
|
||
|
cout << "! Impossible to get the HTTP header line." << endl
|
||
|
<< " Connection closed. Try to get it again." << endl;
|
||
|
|
||
|
result = HTTPRequest(); // Get the document again
|
||
|
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
///////
|
||
|
// Sends an HTTP 1/1 request
|
||
|
///////
|
||
|
|
||
|
Transport::DocStatus HtHTTP::HTTPRequest()
|
||
|
{
|
||
|
|
||
|
static Transport::DocStatus DocumentStatus;
|
||
|
bool ShouldTheBodyBeRead = true;
|
||
|
|
||
|
SetBodyReadingController(&HtHTTP::ReadBody);
|
||
|
|
||
|
// Reset the response
|
||
|
_response.Reset();
|
||
|
|
||
|
// Flush the connection
|
||
|
FlushConnection();
|
||
|
|
||
|
_bytes_read=0;
|
||
|
|
||
|
if( debug > 4)
|
||
|
cout << "Try to get through to host "
|
||
|
<< _url.host() << " (port " << _url.port() << ")" << endl;
|
||
|
|
||
|
ConnectionStatus result;
|
||
|
|
||
|
// Assign the timeout
|
||
|
AssignConnectionTimeOut();
|
||
|
|
||
|
// Assign number of retries
|
||
|
AssignConnectionRetries();
|
||
|
|
||
|
// Assign connection wait time
|
||
|
AssignConnectionWaitTime();
|
||
|
|
||
|
// Start the timer
|
||
|
_start_time.SettoNow();
|
||
|
|
||
|
result = EstablishConnection();
|
||
|
|
||
|
if(result != Connection_ok && result != Connection_already_up)
|
||
|
{
|
||
|
|
||
|
switch (result)
|
||
|
{
|
||
|
// Open failed
|
||
|
|
||
|
case Connection_open_failed:
|
||
|
if (debug>1)
|
||
|
cout << "Unable to open the connection with host: "
|
||
|
<< _url.host() << " (port " << _url.port() << ")" << endl;
|
||
|
CloseConnection();
|
||
|
return FinishRequest(Document_no_connection);
|
||
|
break;
|
||
|
|
||
|
// Server not reached
|
||
|
case Connection_no_server:
|
||
|
if (debug>1)
|
||
|
cout << "Unable to find the host: "
|
||
|
<< _url.host() << " (port " << _url.port() << ")" << endl;
|
||
|
CloseConnection();
|
||
|
return FinishRequest(Document_no_host);
|
||
|
break;
|
||
|
|
||
|
// Port not reached
|
||
|
case Connection_no_port:
|
||
|
if (debug>1)
|
||
|
cout << "Unable to connect with the port " << _url.port()
|
||
|
<< " of the host: " << _url.host() << endl;
|
||
|
CloseConnection();
|
||
|
return FinishRequest(Document_no_port);
|
||
|
break;
|
||
|
|
||
|
// Connection failed
|
||
|
case Connection_failed:
|
||
|
if (debug>1)
|
||
|
cout << "Unable to establish the connection with host: "
|
||
|
<< _url.host() << " (port " << _url.port() << ")" << endl;
|
||
|
CloseConnection();
|
||
|
return FinishRequest(Document_no_connection);
|
||
|
break;
|
||
|
|
||
|
// Other reason
|
||
|
default:
|
||
|
if (debug>1)
|
||
|
cout << "connection failed with unexpected result: result = "
|
||
|
<< (int)result << ", "
|
||
|
<< _url.host() << " (port " << _url.port() << ")" << endl;
|
||
|
CloseConnection();
|
||
|
return FinishRequest(Document_other_error);
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return FinishRequest(Document_other_error);
|
||
|
|
||
|
}
|
||
|
|
||
|
// Visual comments about the result of the connection
|
||
|
if (debug > 5)
|
||
|
switch(result)
|
||
|
{
|
||
|
case Connection_already_up:
|
||
|
cout << "Taking advantage of persistent connections" << endl;
|
||
|
break;
|
||
|
case Connection_ok:
|
||
|
cout << "New connection open successfully" << endl;
|
||
|
break;
|
||
|
default:
|
||
|
cout << "Unexptected value: " << (int)result << endl;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
String command;
|
||
|
|
||
|
switch(_Method)
|
||
|
{
|
||
|
case Method_GET:
|
||
|
command = "GET ";
|
||
|
break;
|
||
|
case Method_HEAD:
|
||
|
command = "HEAD ";
|
||
|
ShouldTheBodyBeRead = false;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Set the request command
|
||
|
|
||
|
SetRequestCommand(command);
|
||
|
|
||
|
if (debug > 6)
|
||
|
cout << "Request\n" << command;
|
||
|
|
||
|
// Writes the command
|
||
|
ConnectionWrite(command);
|
||
|
|
||
|
// Parse the header
|
||
|
if (ParseHeader() == -1) // Connection down
|
||
|
{
|
||
|
// The connection probably fell down !?!
|
||
|
if ( debug > 4 )
|
||
|
cout << setw(5) << Transport::GetTotOpen() << " - "
|
||
|
<< "Connection fell down ... let's close it" << endl;
|
||
|
|
||
|
CloseConnection(); // Let's close the connection which is down now
|
||
|
|
||
|
// Return that the connection has fallen down during the request
|
||
|
return FinishRequest(Document_connection_down);
|
||
|
}
|
||
|
|
||
|
|
||
|
if (_response._status_code == -1)
|
||
|
{
|
||
|
// Unable to retrieve the status line
|
||
|
|
||
|
if ( debug > 4 )
|
||
|
cout << "Unable to retrieve or parse the status line" << endl;
|
||
|
|
||
|
return FinishRequest(Document_no_header);
|
||
|
}
|
||
|
|
||
|
|
||
|
if (debug > 3)
|
||
|
{
|
||
|
|
||
|
cout << "Retrieving document " << _url.path() << " on host: "
|
||
|
<< _url.host() << ":" << _url.port() << endl;
|
||
|
|
||
|
cout << "Http version : " << _response._version << endl;
|
||
|
cout << "Server : " << _response._version << endl;
|
||
|
cout << "Status Code : " << _response._status_code << endl;
|
||
|
cout << "Reason : " << _response._reason_phrase << endl;
|
||
|
|
||
|
if (_response.GetAccessTime())
|
||
|
cout << "Access Time : " << _response.GetAccessTime()->GetRFC1123() << endl;
|
||
|
|
||
|
if (_response.GetModificationTime())
|
||
|
cout << "Modification Time : " << _response.GetModificationTime()->GetRFC1123() << endl;
|
||
|
|
||
|
cout << "Content-type : " << _response.GetContentType() << endl;
|
||
|
|
||
|
if (_response._transfer_encoding.length())
|
||
|
cout << "Transfer-encoding : " << _response._transfer_encoding << endl;
|
||
|
|
||
|
if (_response._content_language.length())
|
||
|
cout << "Content-Language : " << _response._content_language << endl;
|
||
|
|
||
|
if (_response._hdrconnection.length())
|
||
|
cout << "Connection : " << _response._hdrconnection << endl;
|
||
|
|
||
|
}
|
||
|
|
||
|
// Check if persistent connection are possible
|
||
|
CheckPersistentConnection(_response);
|
||
|
|
||
|
if (debug > 4)
|
||
|
cout << "Persistent connection: "
|
||
|
<< (_persistent_connection_possible ? "would be accepted" : "not accepted")
|
||
|
<< endl;
|
||
|
|
||
|
DocumentStatus = GetDocumentStatus(_response);
|
||
|
|
||
|
// We read the body only if the document has been found
|
||
|
if (DocumentStatus != Document_ok)
|
||
|
{
|
||
|
ShouldTheBodyBeRead=false;
|
||
|
}
|
||
|
|
||
|
// For now a chunked response MUST BE retrieved
|
||
|
if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) == 0)
|
||
|
{
|
||
|
// Change the controller of the body reading
|
||
|
SetBodyReadingController(&HtHTTP::ReadChunkedBody);
|
||
|
}
|
||
|
|
||
|
// If "ShouldTheBodyBeRead" is set to true and
|
||
|
// If the document is parsable, we can read the body
|
||
|
// otherwise it is not worthwhile
|
||
|
|
||
|
if (ShouldTheBodyBeRead)
|
||
|
{
|
||
|
if ( debug > 4 )
|
||
|
cout << "Reading the body of the response" << endl;
|
||
|
|
||
|
// We use a int (HtHTTP::*)() function pointer
|
||
|
if ( (this->*_readbody)() == -1 )
|
||
|
{
|
||
|
// The connection probably fell down !?!
|
||
|
if ( debug > 4 )
|
||
|
cout << setw(5) << Transport::GetTotOpen() << " - "
|
||
|
<< "Connection fell down ... let's close it" << endl;
|
||
|
|
||
|
CloseConnection(); // Let's close the connection which is down now
|
||
|
|
||
|
// Return that the connection has fallen down during the request
|
||
|
return FinishRequest(Document_connection_down);
|
||
|
}
|
||
|
|
||
|
if ( debug > 6 )
|
||
|
cout << "Contents:" << endl << _response.GetContents();
|
||
|
|
||
|
// Check if the stream returned by the server has not been completely read
|
||
|
|
||
|
if (_response._document_length != _response._content_length &&
|
||
|
_response._document_length == _max_document_size)
|
||
|
{
|
||
|
// Max document size reached
|
||
|
|
||
|
if (debug > 4)
|
||
|
cout << "Max document size (" << GetRequestMaxDocumentSize()
|
||
|
<< ") reached ";
|
||
|
|
||
|
if (isPersistentConnectionUp())
|
||
|
{
|
||
|
// Only have to close persistent connection when we didn't read
|
||
|
// all the input. For now, we always read all chunked input...
|
||
|
if (mystrncasecmp ((char*)_response._transfer_encoding, "chunked", 7) != 0)
|
||
|
{
|
||
|
if (debug > 4)
|
||
|
cout << "- connection closed. ";
|
||
|
|
||
|
CloseConnection();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (debug > 4)
|
||
|
cout << endl;
|
||
|
}
|
||
|
|
||
|
// Make sure our content-length makes sense, if none given...
|
||
|
if (_response._content_length < _response._document_length)
|
||
|
_response._content_length = _response._document_length;
|
||
|
|
||
|
}
|
||
|
else if ( debug > 4 )
|
||
|
cout << "Body not retrieved" << endl;
|
||
|
|
||
|
|
||
|
// Close the connection (if there's no persistent connection)
|
||
|
|
||
|
if( ! isPersistentConnectionUp() )
|
||
|
{
|
||
|
if ( debug > 4 )
|
||
|
cout << setw(5) << Transport::GetTotOpen() << " - "
|
||
|
<< "Connection closed (No persistent connection)" << endl;
|
||
|
|
||
|
CloseConnection();
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Persistent connection is active
|
||
|
|
||
|
// If the document is not parsable and we asked for it with a 'GET'
|
||
|
// method, the stream's not been completely read.
|
||
|
|
||
|
if (DocumentStatus == Document_not_parsable && _Method == Method_GET)
|
||
|
{
|
||
|
// We have to close the connection.
|
||
|
if ( debug > 4 )
|
||
|
cout << "Connection must be closed (stream not completely read)"
|
||
|
<< endl;
|
||
|
|
||
|
CloseConnection();
|
||
|
|
||
|
}
|
||
|
else
|
||
|
if ( debug > 4 )
|
||
|
cout << "Connection stays up ... (Persistent connection)" << endl;
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check the doc_status and return a value
|
||
|
|
||
|
return FinishRequest(DocumentStatus);
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
HtHTTP::ConnectionStatus HtHTTP::EstablishConnection()
|
||
|
{
|
||
|
|
||
|
int result;
|
||
|
|
||
|
// Open the connection
|
||
|
result=OpenConnection();
|
||
|
|
||
|
if (!result)
|
||
|
return Connection_open_failed; // Connection failed
|
||
|
else if(debug > 4)
|
||
|
{
|
||
|
cout << setw(5) << Transport::GetTotOpen() << " - ";
|
||
|
|
||
|
if (result == -1)
|
||
|
cout << "Connection already open. No need to re-open." << endl;
|
||
|
else
|
||
|
cout << "Open of the connection ok" << endl;
|
||
|
}
|
||
|
|
||
|
|
||
|
if(result==1) // New connection open
|
||
|
{
|
||
|
|
||
|
// Assign the remote host to the connection
|
||
|
if ( !AssignConnectionServer() )
|
||
|
return Connection_no_server;
|
||
|
else if (debug > 4)
|
||
|
cout << "\tAssigned the remote host " << _url.host() << endl;
|
||
|
|
||
|
// Assign the port of the remote host
|
||
|
if ( !AssignConnectionPort() )
|
||
|
return Connection_no_port;
|
||
|
else if (debug > 4)
|
||
|
cout << "\tAssigned the port " << _url.port() << endl;
|
||
|
}
|
||
|
|
||
|
// Connect
|
||
|
if (! (result = Connect()))
|
||
|
return Connection_failed;
|
||
|
else if (result == -1) return Connection_already_up; // Persistent
|
||
|
else return Connection_ok; // New connection
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
// Set the string of the HTTP message request
|
||
|
|
||
|
void HtHTTP::SetRequestCommand(String &cmd)
|
||
|
{
|
||
|
|
||
|
// Initialize it
|
||
|
|
||
|
if (_useproxy) {
|
||
|
cmd << _url.get() << " HTTP/1.1\r\n";
|
||
|
} else
|
||
|
cmd << _url.path() << " HTTP/1.1\r\n";
|
||
|
|
||
|
// Insert the "virtual" host to which ask the document
|
||
|
|
||
|
cmd << "Host: " << _url.host();
|
||
|
if (_url.port() != 0 && _url.port() != _url.DefaultPort())
|
||
|
cmd << ":" << _url.port();
|
||
|
cmd << "\r\n";
|
||
|
|
||
|
|
||
|
// Insert the User Agent
|
||
|
|
||
|
if (_user_agent.length())
|
||
|
cmd << "User-Agent: " << _user_agent << "\r\n";
|
||
|
|
||
|
|
||
|
// Referer
|
||
|
if (_referer.get().length())
|
||
|
cmd << "Referer: " << _referer.get() << "\r\n";
|
||
|
|
||
|
// Accept-Language
|
||
|
if (_accept_language.length())
|
||
|
cmd << "Accept-language: " << _accept_language << "\r\n";
|
||
|
|
||
|
// Authentication
|
||
|
if (_credentials.length())
|
||
|
cmd << "Authorization: Basic " << _credentials << "\r\n";
|
||
|
|
||
|
// Proxy Authentication
|
||
|
if (_useproxy && _proxy_credentials.length())
|
||
|
cmd << "Proxy-Authorization: Basic " << _proxy_credentials << "\r\n";
|
||
|
|
||
|
// Accept-Encoding: waiting to handle the gzip and compress formats, we
|
||
|
// just send an empty header which, according to the HTTP 1/1 standard,
|
||
|
// should let the server know that we only accept the 'identity' case
|
||
|
// (no encoding of the document)
|
||
|
cmd << "Accept-Encoding: \r\n";
|
||
|
|
||
|
// A date has been passed to check if the server one is newer than
|
||
|
// the one we already own.
|
||
|
|
||
|
if(_modification_time && *_modification_time > 0)
|
||
|
{
|
||
|
_modification_time->ToGMTime();
|
||
|
cmd << "If-Modified-Since: " << _modification_time->GetRFC1123() << "\r\n";
|
||
|
}
|
||
|
|
||
|
///////
|
||
|
// Cookies! Let's go eat them! ;-)
|
||
|
///////
|
||
|
|
||
|
// The method returns all the valid cookies and writes them
|
||
|
// directly into the request string, as a list of headers
|
||
|
if (_send_cookies && _cookie_jar)
|
||
|
_cookie_jar->SetHTTPRequest_CookiesString(_url, cmd);
|
||
|
|
||
|
|
||
|
// Let's close the command
|
||
|
cmd << "\r\n";
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
//*****************************************************************************
|
||
|
// int HtHTTP::ParseHeader()
|
||
|
// Parse the header of the document
|
||
|
//
|
||
|
int HtHTTP::ParseHeader()
|
||
|
{
|
||
|
String line = 0;
|
||
|
int inHeader = 1;
|
||
|
|
||
|
if (_response._modification_time)
|
||
|
{
|
||
|
delete _response._modification_time;
|
||
|
_response._modification_time=0;
|
||
|
}
|
||
|
while (inHeader)
|
||
|
{
|
||
|
|
||
|
line.trunc();
|
||
|
|
||
|
if(! _connection->Read_Line(line, "\n"))
|
||
|
return -1; // Connection down
|
||
|
|
||
|
_bytes_read+=line.length();
|
||
|
line.chop('\r');
|
||
|
|
||
|
if (line.length() == 0)
|
||
|
inHeader = 0;
|
||
|
else
|
||
|
{
|
||
|
// Found a not-empty line
|
||
|
|
||
|
if (debug > 2)
|
||
|
cout << "Header line: " << line << endl;
|
||
|
|
||
|
// Status - Line check
|
||
|
char *token = line.get();
|
||
|
|
||
|
while (*token && !isspace(*token) && *token != ':')
|
||
|
++token;
|
||
|
|
||
|
while (*token && (isspace(*token) || *token == ':'))
|
||
|
++token;
|
||
|
|
||
|
if(!strncmp((char*)line, "HTTP/", 5))
|
||
|
{
|
||
|
// Here is the status-line
|
||
|
|
||
|
// store the HTTP version returned by the server
|
||
|
_response._version = strtok(line, " ");
|
||
|
|
||
|
// Store the status code
|
||
|
_response._status_code = atoi(strtok(0, " "));
|
||
|
|
||
|
// Store the reason phrase
|
||
|
_response._reason_phrase = strtok(0, "\n");
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "server:", 7))
|
||
|
{
|
||
|
// Server info
|
||
|
|
||
|
// Set the server info
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._server = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "last-modified:", 14))
|
||
|
{
|
||
|
// Modification date sent by the server
|
||
|
|
||
|
// Set the response modification time
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._modification_time = NewDate(token);
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "date:", 5))
|
||
|
{
|
||
|
// Access date time sent by the server
|
||
|
|
||
|
// Set the response access time
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._access_time = NewDate(token);
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "content-type:", 13))
|
||
|
{
|
||
|
// Content - type
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._content_type = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "content-length:", 15))
|
||
|
{
|
||
|
// Content - length
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._content_length = atoi(token);
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "transfer-encoding:", 18))
|
||
|
{
|
||
|
// Transfer-encoding
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._transfer_encoding = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "location:", 9))
|
||
|
{
|
||
|
// Found a location directive - redirect in act
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._location = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "connection:", 11))
|
||
|
{
|
||
|
// Ooops ... found a Connection clause
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._hdrconnection = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "content-language:", 17))
|
||
|
{
|
||
|
// Found a content-language directive
|
||
|
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
_response._content_language = token;
|
||
|
|
||
|
}
|
||
|
else if( ! mystrncasecmp((char*)line, "set-cookie:", 11))
|
||
|
{
|
||
|
// Found a cookie
|
||
|
|
||
|
// Are cookies enabled?
|
||
|
if (_send_cookies && _cookie_jar)
|
||
|
{
|
||
|
token = strtok(token, "\n\t");
|
||
|
|
||
|
if (token && *token)
|
||
|
{
|
||
|
// Insert the cookie into the jar
|
||
|
_cookie_jar->AddCookie(token, _url);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Discarded
|
||
|
|
||
|
if (debug > 3)
|
||
|
cout << "Discarded header line: " << line << endl;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (_response._modification_time == 0)
|
||
|
{
|
||
|
if (debug > 3)
|
||
|
cout << "No modification time returned: assuming now" << endl;
|
||
|
|
||
|
//Set the modification time
|
||
|
_response._modification_time = new HtDateTime;
|
||
|
_response._modification_time->ToGMTime(); // Set to GM time
|
||
|
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check for a document to be parsable
|
||
|
// It all depends on the content-type directive returned by the server
|
||
|
|
||
|
bool HtHTTP::isParsable(const char *content_type)
|
||
|
{
|
||
|
|
||
|
// Here I can decide what kind of document I can parse
|
||
|
// depending on the value of Transport:_default_parser_content_type
|
||
|
// and the rest are determined by the external_parser settings
|
||
|
|
||
|
if( ! mystrncasecmp (_default_parser_content_type.get(), content_type,
|
||
|
_default_parser_content_type.length()) )
|
||
|
return true;
|
||
|
|
||
|
// External function that checks if a document is parsable or not.
|
||
|
// CanBeParsed should point to a function that returns an int value,
|
||
|
// given a char * containing the content-type.
|
||
|
|
||
|
if (CanBeParsed && (*CanBeParsed)( (char *) content_type) )
|
||
|
return true;
|
||
|
|
||
|
return false;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
// Check for a possibile persistent connection
|
||
|
// on the return message's HTTP version basis
|
||
|
|
||
|
void HtHTTP::CheckPersistentConnection(HtHTTP_Response &response)
|
||
|
{
|
||
|
|
||
|
const char *version = response.GetVersion();
|
||
|
|
||
|
if( ! mystrncasecmp ("HTTP/1.1", version, 8))
|
||
|
{
|
||
|
const char *connection = response.GetConnectionInfo();
|
||
|
|
||
|
if( ! mystrncasecmp ("close", connection, 5))
|
||
|
_persistent_connection_possible=false; // Server wants to close
|
||
|
else _persistent_connection_possible=true;
|
||
|
|
||
|
}
|
||
|
else
|
||
|
_persistent_connection_possible=false;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
HtHTTP::DocStatus HtHTTP::FinishRequest (HtHTTP::DocStatus ds)
|
||
|
{
|
||
|
|
||
|
int seconds;
|
||
|
|
||
|
// Set the finish time
|
||
|
_end_time.SettoNow();
|
||
|
|
||
|
// Let's add the number of seconds needed by the request
|
||
|
seconds=HtDateTime::GetDiff(_end_time, _start_time);
|
||
|
|
||
|
_tot_seconds += seconds;
|
||
|
_tot_requests ++;
|
||
|
_tot_bytes += _bytes_read;
|
||
|
|
||
|
if (debug > 2)
|
||
|
cout << "Request time: " << seconds << " secs" << endl;
|
||
|
|
||
|
return ds;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
HtHTTP::DocStatus HtHTTP::GetDocumentStatus(HtHTTP_Response &r)
|
||
|
{
|
||
|
|
||
|
// Let's give a look at the return status code
|
||
|
|
||
|
HtHTTP::DocStatus returnStatus=Document_not_found;
|
||
|
int statuscode;
|
||
|
|
||
|
statuscode=r.GetStatusCode();
|
||
|
|
||
|
if(statuscode==200)
|
||
|
{
|
||
|
returnStatus = Document_ok; // OK
|
||
|
|
||
|
// Is it parsable?
|
||
|
|
||
|
if (! isParsable ((const char*)r.GetContentType()) )
|
||
|
returnStatus=Document_not_parsable;
|
||
|
}
|
||
|
else if(statuscode > 200 && statuscode < 300)
|
||
|
returnStatus = Document_ok; // Successful 2xx
|
||
|
else if(statuscode==304)
|
||
|
returnStatus = Document_not_changed; // Not modified
|
||
|
else if(statuscode > 300 && statuscode < 400)
|
||
|
returnStatus = Document_redirect; // Redirection 3xx
|
||
|
else if(statuscode==401)
|
||
|
returnStatus = Document_not_authorized; // Unauthorized
|
||
|
|
||
|
// Exit the function
|
||
|
return returnStatus;
|
||
|
|
||
|
}
|
||
|
|
||
|
void HtHTTP::SetCredentials (const String& s)
|
||
|
{
|
||
|
Transport::SetHTTPBasicAccessAuthorizationString(_credentials, s);
|
||
|
}
|
||
|
|
||
|
|
||
|
void HtHTTP::SetProxyCredentials (const String& s)
|
||
|
{
|
||
|
Transport::SetHTTPBasicAccessAuthorizationString(_proxy_credentials, s);
|
||
|
}
|
||
|
|
||
|
int HtHTTP::ReadBody()
|
||
|
{
|
||
|
|
||
|
_response._contents = 0; // Initialize the string
|
||
|
|
||
|
char docBuffer[8192];
|
||
|
int bytesRead = 0;
|
||
|
int bytesToGo = _response._content_length;
|
||
|
|
||
|
if (bytesToGo < 0 || bytesToGo > _max_document_size)
|
||
|
bytesToGo = _max_document_size;
|
||
|
|
||
|
while (bytesToGo > 0)
|
||
|
{
|
||
|
int len = bytesToGo< (int)sizeof(docBuffer) ? bytesToGo : (int)sizeof(docBuffer);
|
||
|
bytesRead = _connection->Read(docBuffer, len);
|
||
|
if (bytesRead <= 0)
|
||
|
break;
|
||
|
|
||
|
_response._contents.append(docBuffer, bytesRead);
|
||
|
|
||
|
bytesToGo -= bytesRead;
|
||
|
|
||
|
_bytes_read+=bytesRead;
|
||
|
|
||
|
}
|
||
|
|
||
|
// Set document length
|
||
|
_response._document_length = _response._contents.length();
|
||
|
|
||
|
return bytesRead;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
int HtHTTP::ReadChunkedBody()
|
||
|
{
|
||
|
// Chunked Transfer decoding
|
||
|
// as shown in the RFC2616 (HTTP/1.1) - 19.4.6
|
||
|
|
||
|
#define BSIZE 8192
|
||
|
|
||
|
int length = 0; // initialize the length
|
||
|
unsigned int chunk_size;
|
||
|
String ChunkHeader = 0;
|
||
|
char buffer[BSIZE+1];
|
||
|
int chunk, rsize;
|
||
|
|
||
|
_response._contents.trunc(); // Initialize the string
|
||
|
|
||
|
// Read chunk-size and CRLF
|
||
|
if (!_connection->Read_Line(ChunkHeader, "\r\n"))
|
||
|
return -1;
|
||
|
|
||
|
sscanf ((char *)ChunkHeader, "%x", &chunk_size);
|
||
|
|
||
|
if (debug>4)
|
||
|
cout << "Initial chunk-size: " << chunk_size << endl;
|
||
|
|
||
|
while (chunk_size > 0)
|
||
|
{
|
||
|
chunk = chunk_size;
|
||
|
|
||
|
do {
|
||
|
if (chunk > BSIZE) {
|
||
|
rsize = BSIZE;
|
||
|
if (debug>4)
|
||
|
cout << "Read chunk partial: left=" << chunk << endl;
|
||
|
} else {
|
||
|
rsize = chunk;
|
||
|
}
|
||
|
chunk -= rsize;
|
||
|
|
||
|
// Read Chunk data
|
||
|
if (_connection->Read(buffer, rsize) == -1)
|
||
|
return -1;
|
||
|
|
||
|
length+=rsize;
|
||
|
|
||
|
// Append the chunk-data to the contents of the response
|
||
|
// ... but not more than _max_document_size...
|
||
|
if (rsize > _max_document_size-_response._contents.length())
|
||
|
rsize = _max_document_size-_response._contents.length();
|
||
|
buffer[rsize] = 0;
|
||
|
_response._contents.append(buffer, rsize);
|
||
|
|
||
|
} while (chunk);
|
||
|
|
||
|
// if (_connection->Read(buffer, chunk_size) == -1)
|
||
|
// return -1;
|
||
|
|
||
|
// Read CRLF - to be ignored
|
||
|
if (!_connection->Read_Line(ChunkHeader, "\r\n"))
|
||
|
return -1;
|
||
|
|
||
|
// Read chunk-size and CRLF
|
||
|
if (!_connection->Read_Line(ChunkHeader, "\r\n"))
|
||
|
return -1;
|
||
|
|
||
|
sscanf ((char *)ChunkHeader, "%x", &chunk_size);
|
||
|
|
||
|
if (debug>4)
|
||
|
cout << "Chunk-size: " << chunk_size << endl;
|
||
|
}
|
||
|
|
||
|
ChunkHeader = 0;
|
||
|
|
||
|
// Ignoring next part of the body - the TRAILER
|
||
|
// (it contains further headers - not implemented)
|
||
|
|
||
|
// Set content length
|
||
|
_response._content_length = length;
|
||
|
|
||
|
// Set document length
|
||
|
_response._document_length = _response._contents.length();
|
||
|
|
||
|
return length;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
///////
|
||
|
// Show the statistics
|
||
|
///////
|
||
|
|
||
|
ostream &HtHTTP::ShowStatistics (ostream &out)
|
||
|
{
|
||
|
Transport::ShowStatistics(out); // call the base class method
|
||
|
|
||
|
out << " HTTP Requests : " << GetTotRequests() << endl;
|
||
|
out << " HTTP KBytes requested : " << (double)GetTotBytes()/1024 << endl;
|
||
|
out << " HTTP Average request time : " << GetAverageRequestTime()
|
||
|
<< " secs" << endl;
|
||
|
|
||
|
out << " HTTP Average speed : " << GetAverageSpeed()/1024
|
||
|
<< " KBytes/secs" << endl;
|
||
|
|
||
|
return out;
|
||
|
}
|