2456 lines
62 KiB
C
2456 lines
62 KiB
C
|
/*
|
||
|
skipfish - high-performance, single-process asynchronous HTTP client
|
||
|
--------------------------------------------------------------------
|
||
|
|
||
|
Author: Michal Zalewski <lcamtuf@google.com>
|
||
|
|
||
|
Copyright 2009, 2010 by Google Inc. All Rights Reserved.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <ctype.h>
|
||
|
#include <string.h>
|
||
|
#include <netdb.h>
|
||
|
#include <sys/socket.h>
|
||
|
#include <netinet/in.h>
|
||
|
#include <arpa/inet.h>
|
||
|
#include <sys/poll.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <sys/time.h>
|
||
|
#include <time.h>
|
||
|
|
||
|
#include <openssl/ssl.h>
|
||
|
#include <openssl/err.h>
|
||
|
#include <idna.h>
|
||
|
#include <zlib.h>
|
||
|
|
||
|
#include "types.h"
|
||
|
#include "alloc-inl.h"
|
||
|
#include "string-inl.h"
|
||
|
#include "database.h"
|
||
|
|
||
|
#include "http_client.h"
|
||
|
|
||
|
/* Assorted exported settings: */
|
||
|
|
||
|
u32 max_connections = MAX_CONNECTIONS,
|
||
|
max_conn_host = MAX_CONN_HOST,
|
||
|
max_requests = MAX_REQUESTS,
|
||
|
max_fail = MAX_FAIL,
|
||
|
idle_tmout = IDLE_TMOUT,
|
||
|
resp_tmout = RESP_TMOUT,
|
||
|
rw_tmout = RW_TMOUT,
|
||
|
size_limit = SIZE_LIMIT;
|
||
|
|
||
|
u8 browser_type = BROWSER_FAST;
|
||
|
u8 auth_type = AUTH_NONE;
|
||
|
|
||
|
struct param_array global_http_par;
|
||
|
|
||
|
/* Counters: */
|
||
|
|
||
|
u32 req_errors_net,
|
||
|
req_errors_http,
|
||
|
req_errors_cur,
|
||
|
req_count,
|
||
|
req_dropped,
|
||
|
queue_cur,
|
||
|
conn_cur,
|
||
|
conn_count,
|
||
|
conn_idle_tmout,
|
||
|
conn_busy_tmout,
|
||
|
conn_failed,
|
||
|
req_retried,
|
||
|
url_scope;
|
||
|
|
||
|
u64 bytes_sent,
|
||
|
bytes_recv,
|
||
|
bytes_deflated,
|
||
|
bytes_inflated;
|
||
|
|
||
|
u8 *auth_user,
|
||
|
*auth_pass;
|
||
|
|
||
|
u8 ignore_cookies;
|
||
|
|
||
|
/* Internal globals for queue management: */
|
||
|
|
||
|
static struct queue_entry* queue;
|
||
|
static struct conn_entry* conn;
|
||
|
static struct dns_entry* dns;
|
||
|
|
||
|
#ifdef QUEUE_FILO
|
||
|
static struct queue_entry* q_tail;
|
||
|
#endif /* QUEUE_FILO */
|
||
|
|
||
|
static u8 tear_down_idle;
|
||
|
|
||
|
|
||
|
/* Extracts parameter value from param_array. Name is matched if
|
||
|
non-NULL. Returns pointer to value data, not a duplicate string;
|
||
|
NULL if no match found. */
|
||
|
|
||
|
u8* get_value(u8 type, u8* name, u32 offset,
|
||
|
struct param_array* par) {
|
||
|
|
||
|
u32 i, coff = 0;
|
||
|
|
||
|
for (i=0;i<par->c;i++) {
|
||
|
if (type != par->t[i]) continue;
|
||
|
if (name && strcasecmp((char*)par->n[i], (char*)name)) continue;
|
||
|
if (offset != coff) { coff++; continue; }
|
||
|
return par->v[i];
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Inserts or overwrites parameter value in param_array. If offset
|
||
|
== -1, will append parameter to list. Duplicates strings,
|
||
|
name and val can be NULL. */
|
||
|
|
||
|
void set_value(u8 type, u8* name, u8* val,
|
||
|
s32 offset, struct param_array* par) {
|
||
|
|
||
|
u32 i, coff = 0, matched = -1;
|
||
|
|
||
|
/* If offset specified, try to find an entry to replace. */
|
||
|
|
||
|
if (offset >= 0)
|
||
|
for (i=0;i<par->c;i++) {
|
||
|
if (type != par->t[i]) continue;
|
||
|
if (name && strcasecmp((char*)par->n[i], (char*)name)) continue;
|
||
|
if (offset != coff) { coff++; continue; }
|
||
|
matched = i;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (matched == -1) {
|
||
|
|
||
|
/* No offset or no match - append to the end of list. */
|
||
|
|
||
|
par->t = ck_realloc(par->t, (par->c + 1) * sizeof(u8));
|
||
|
par->n = ck_realloc(par->n, (par->c + 1) * sizeof(u8*));
|
||
|
par->v = ck_realloc(par->v, (par->c + 1) * sizeof(u8*));
|
||
|
par->t[par->c] = type;
|
||
|
par->n[par->c] = ck_strdup(name);
|
||
|
par->v[par->c] = ck_strdup(val);
|
||
|
par->c++;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
/* Matched - replace name & value. */
|
||
|
|
||
|
ck_free(par->n[matched]);
|
||
|
ck_free(par->v[matched]);
|
||
|
par->n[matched] = ck_strdup(name);
|
||
|
par->v[matched] = ck_strdup(val);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Convert a fully-qualified or relative URL string to a proper http_request
|
||
|
representation. Returns 0 on success, 1 on format error. */
|
||
|
|
||
|
u8 parse_url(u8* url, struct http_request* req, struct http_request* ref) {
|
||
|
|
||
|
u8* cur = url;
|
||
|
u32 maybe_proto = strcspn((char*)url, ":/?#@");
|
||
|
u8 has_host = 0, add_slash = 1;
|
||
|
|
||
|
if (strlen((char*)url) > MAX_URL_LEN) return 1;
|
||
|
req->orig_url = ck_strdup(url);
|
||
|
|
||
|
/* Interpret, skip protocol string if the URL seems to be fully-qualified;
|
||
|
otherwise, copy from referring URL. We could be stricter here, as
|
||
|
browsers bail out on seemingly invalid chars in proto names, but... */
|
||
|
|
||
|
if (maybe_proto && url[maybe_proto] == ':') {
|
||
|
|
||
|
if (!strncasecmp((char*)url, "http:", 5)) {
|
||
|
req->proto = PROTO_HTTP;
|
||
|
cur += 5;
|
||
|
} else if (!strncasecmp((char*)url, "https:", 6)) {
|
||
|
req->proto = PROTO_HTTPS;
|
||
|
cur += 6;
|
||
|
} else return 1;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
if (!ref || !ref->proto) return 1;
|
||
|
req->proto = ref->proto;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Interpret, skip //[login[:pass@](\[ipv4\]|\[ipv6\]|host)[:port] part of the
|
||
|
URL, if present. Note that "http:blarg" is a valid relative URL to most
|
||
|
browsers, and "//example.com/blarg" is a valid non-FQDN absolute one.
|
||
|
We need to mimick this, which complicates the code a bit. */
|
||
|
|
||
|
if (cur[0] == '/' && cur[1] == '/') {
|
||
|
|
||
|
u32 path_st;
|
||
|
u8 *at_sign, *host, *x;
|
||
|
u8 has_utf = 0;
|
||
|
|
||
|
cur += 2;
|
||
|
|
||
|
/* Detect, skip login[:pass]@; we only use cmdline-supplied credentials or
|
||
|
wordlists into account. Be sure to report any embedded auth, though. */
|
||
|
|
||
|
at_sign = (u8*)strchr((char*)cur, '@');
|
||
|
path_st = strcspn((char*)cur, ":/?#");
|
||
|
|
||
|
if (at_sign && path_st > (at_sign - cur)) {
|
||
|
cur = at_sign + 1;
|
||
|
if (!req->pivot) return 1;
|
||
|
problem(PROB_URL_AUTH, ref, 0, url, req->pivot, 0);
|
||
|
}
|
||
|
|
||
|
/* No support for IPv6 or [ip] notation for now, so let's just refuse to
|
||
|
parse the URL. Also, refuse excessively long domain names for sanity. */
|
||
|
|
||
|
if (*cur == '[') return 1;
|
||
|
if (path_st > MAX_DNS_LEN) return 1;
|
||
|
|
||
|
x = host = ck_memdup(cur, path_st + 1);
|
||
|
host[path_st] = 0;
|
||
|
|
||
|
/* Scan, normalize extracted host name. */
|
||
|
|
||
|
while (*x) {
|
||
|
|
||
|
switch (*x) {
|
||
|
|
||
|
case 'A' ... 'Z':
|
||
|
*x = tolower(*x);
|
||
|
break;
|
||
|
|
||
|
case 'a' ... 'z':
|
||
|
case '0' ... '9':
|
||
|
case '.':
|
||
|
case '-':
|
||
|
case '_':
|
||
|
break;
|
||
|
|
||
|
case 0x80 ... 0xff:
|
||
|
has_utf = 1;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
/* Uh-oh, invalid characters in a host name - abandon ship. */
|
||
|
return 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
x++;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Host names that contained high bits need to be converted to Punycode
|
||
|
in order to resolve properly. */
|
||
|
|
||
|
if (has_utf) {
|
||
|
|
||
|
char* output = 0;
|
||
|
|
||
|
if (idna_to_ascii_8z((char*)host, &output, 0) != IDNA_SUCCESS ||
|
||
|
strlen(output) > MAX_DNS_LEN) {
|
||
|
ck_free(output);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
ck_free(host);
|
||
|
host = (u8*)output;
|
||
|
|
||
|
}
|
||
|
|
||
|
req->host = host;
|
||
|
cur += path_st;
|
||
|
|
||
|
/* All right, moving on: if host name is followed by :, let's try to
|
||
|
parse and validate port number; otherwise, assume 80 / 443, depending
|
||
|
on protocol. */
|
||
|
|
||
|
if (*cur == ':') {
|
||
|
|
||
|
u32 digit_cnt = strspn((char*)++cur, "0123456789");
|
||
|
u32 port = atoi((char*)cur);
|
||
|
if (!digit_cnt || (cur[digit_cnt] && !strchr("/?#", cur[digit_cnt])))
|
||
|
return 1;
|
||
|
req->port = port;
|
||
|
cur += digit_cnt;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
if (req->proto == PROTO_HTTPS) req->port = 443; else req->port = 80;
|
||
|
|
||
|
}
|
||
|
|
||
|
has_host = 1;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
/* No host name found - copy from referring request instead. */
|
||
|
|
||
|
if (!ref || !ref->host) return 1;
|
||
|
|
||
|
req->host = ck_strdup(ref->host);
|
||
|
req->addr = ref->addr;
|
||
|
req->port = ref->port;
|
||
|
|
||
|
}
|
||
|
|
||
|
if (!*cur || *cur == '#') {
|
||
|
u32 i;
|
||
|
|
||
|
/* No-op path. If the URL does not specify host (e.g., #foo), copy
|
||
|
everything from referring request, call it a day. Otherwise
|
||
|
(e.g., http://example.com#foo), let tokenize_path() run to
|
||
|
add NULL-"" entry to the list. */
|
||
|
|
||
|
if (!has_host) {
|
||
|
for (i=0;i<ref->par.c;i++)
|
||
|
if (PATH_SUBTYPE(ref->par.t[i]) || QUERY_SUBTYPE(ref->par.t[i]))
|
||
|
set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
if (!has_host && *cur == '?') {
|
||
|
u32 i;
|
||
|
|
||
|
/* URL begins with ? and does not specify host (e.g., ?foo=bar). Copy all
|
||
|
path segments, but no query, then fall through to parse the query
|
||
|
string. */
|
||
|
|
||
|
for (i=0;i<ref->par.c;i++)
|
||
|
if (PATH_SUBTYPE(ref->par.t[i]))
|
||
|
set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
|
||
|
|
||
|
/* In this case, we do not want tokenize_path() to tinker with the path
|
||
|
in any way. */
|
||
|
|
||
|
add_slash = 0;
|
||
|
|
||
|
} else if (!has_host && *cur != '/') {
|
||
|
|
||
|
/* The URL does not begin with / or ?, and does not specify host (e.g.,
|
||
|
foo/bar?baz). Copy path from referrer, but drop the last "proper"
|
||
|
path segment and everything that follows it. This mimicks browser
|
||
|
behavior (for URLs ending with /, it just drops the final NULL-""
|
||
|
pair). */
|
||
|
|
||
|
u32 i;
|
||
|
u32 path_cnt = 0, path_cur = 0;
|
||
|
|
||
|
for (i=0;i<ref->par.c;i++)
|
||
|
if (ref->par.t[i] == PARAM_PATH) path_cnt++;
|
||
|
|
||
|
for (i=0;i<ref->par.c;i++) {
|
||
|
if (ref->par.t[i] == PARAM_PATH) path_cur++;
|
||
|
if (path_cur < path_cnt && PATH_SUBTYPE(ref->par.t[i]))
|
||
|
set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Tokenize the remaining path on top of what we parsed / copied over. */
|
||
|
|
||
|
tokenize_path(cur, req, add_slash);
|
||
|
return 0;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* URL-decodes a string. 'Plus' parameter governs the behavior on +
|
||
|
signs (as they have a special meaning only in query params, not in path). */
|
||
|
|
||
|
u8* url_decode_token(u8* str, u32 len, u8 plus) {
|
||
|
u8 *ret = ck_alloc(len + 1);
|
||
|
u8 *src = str, *dst = ret;
|
||
|
char *hex_str = "0123456789abcdef";
|
||
|
|
||
|
while (len--) {
|
||
|
u8 c = *(src++);
|
||
|
char *f, *s;
|
||
|
|
||
|
if (plus && c == '+') c = ' ';
|
||
|
|
||
|
if (c == '%' && len >= 2 &&
|
||
|
(f = strchr(hex_str, tolower(src[0]))) &&
|
||
|
(s = strchr(hex_str, tolower(src[1])))) {
|
||
|
c = ((f - hex_str) << 4) | (s - hex_str);
|
||
|
src += 2; len -= 2;
|
||
|
}
|
||
|
|
||
|
/* We can't handle NUL-terminators gracefully when deserializing request
|
||
|
parameters, because param_array values are NUL-terminated themselves.
|
||
|
Let's encode \0 as \xFF instead, and hope nobody notices. */
|
||
|
|
||
|
if (!c) c = 0xff;
|
||
|
|
||
|
*(dst++) = c;
|
||
|
|
||
|
}
|
||
|
|
||
|
*(dst++) = 0;
|
||
|
|
||
|
ret = ck_realloc(ret, dst - ret);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* URL-encodes a string according to custom rules. The assumption here is that
|
||
|
the data is already tokenized as "special" boundaries such as ?, =, &, /,
|
||
|
;, !, $, and , so these characters must always be escaped if present in
|
||
|
tokens. We otherwise let pretty much everything else go through, as it
|
||
|
may help with the exploitation of certain vulnerabilities. */
|
||
|
|
||
|
u8* url_encode_token(u8* str, u32 len) {
|
||
|
|
||
|
u8 *ret = ck_alloc(len * 3 + 1);
|
||
|
u8 *src = str, *dst = ret;
|
||
|
|
||
|
while (len--) {
|
||
|
u8 c = *(src++);
|
||
|
|
||
|
if (c <= 0x20 || c >= 0x80 || strchr("#%&=/+;,!$?", c)) {
|
||
|
if (c == 0xFF) c = 0;
|
||
|
sprintf((char*)dst, "%%%02X", c);
|
||
|
dst += 3;
|
||
|
} else *(dst++) = c;
|
||
|
|
||
|
}
|
||
|
|
||
|
*(dst++) = 0;
|
||
|
|
||
|
ret = ck_realloc(ret, dst - ret);
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Split path at known "special" character boundaries, URL decode values,
|
||
|
then put them in the provided http_request struct. */
|
||
|
|
||
|
void tokenize_path(u8* str, struct http_request* req, u8 add_slash) {
|
||
|
|
||
|
u8* cur;
|
||
|
u8 know_dir = 0;
|
||
|
|
||
|
while (*str == '/') str++;
|
||
|
cur = str;
|
||
|
|
||
|
/* Parse path elements first. */
|
||
|
|
||
|
while (*cur && !strchr("?#", *cur)) {
|
||
|
|
||
|
u32 next_seg, next_eq;
|
||
|
|
||
|
u8 *name = NULL, *value = NULL;
|
||
|
u8 first_el = (str == cur);
|
||
|
|
||
|
if (first_el || *cur == '/') {
|
||
|
|
||
|
/* Optimize out //, /\0, /./, and /.\0. They do indicate
|
||
|
we are looking at a directory, so mark this. */
|
||
|
|
||
|
if (!first_el && (cur[1] == '/' || !cur[1])) {
|
||
|
cur++;
|
||
|
know_dir = 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (cur[0 + !first_el] == '.' && (cur[1 + !first_el] == '/' ||
|
||
|
!cur[1 + !first_el])) {
|
||
|
cur += 1 + !first_el;
|
||
|
know_dir = 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* If we encountered /../ or /..\0, remove everything up to and
|
||
|
including the last "true" path element. It's also indicative
|
||
|
of a directory, by the way. */
|
||
|
|
||
|
if (cur[0 + !first_el] == '.' && cur[1 + !first_el] == '.' &&
|
||
|
(cur[2 + !first_el] == '/' || !cur[2 + !first_el])) {
|
||
|
|
||
|
u32 i, last_p = req->par.c;
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (req->par.t[i] == PARAM_PATH) last_p = i;
|
||
|
|
||
|
for (i=last_p;i<req->par.c;i++) {
|
||
|
req->par.t[i] = PARAM_NONE;
|
||
|
}
|
||
|
|
||
|
cur += 2 + !first_el;
|
||
|
know_dir = 1;
|
||
|
continue;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* If we're here, we have an actual item to add; cur points to
|
||
|
the string if it's the first element, or to field separator
|
||
|
if one of the subsequent ones. */
|
||
|
|
||
|
next_seg = strcspn((char*)cur + 1, "/;,!$?#") + 1,
|
||
|
next_eq = strcspn((char*)cur + 1, "=/;,!$?#") + 1;
|
||
|
know_dir = 0;
|
||
|
|
||
|
if (next_eq < next_seg) {
|
||
|
name = url_decode_token(cur + !first_el, next_eq - !first_el, 0);
|
||
|
value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 0);
|
||
|
} else {
|
||
|
value = url_decode_token(cur + !first_el, next_seg - !first_el, 0);
|
||
|
}
|
||
|
|
||
|
switch (first_el ? '/' : *cur) {
|
||
|
|
||
|
case ';': set_value(PARAM_PATH_S, name, value, -1, &req->par); break;
|
||
|
case ',': set_value(PARAM_PATH_C, name, value, -1, &req->par); break;
|
||
|
case '!': set_value(PARAM_PATH_E, name, value, -1, &req->par); break;
|
||
|
case '$': set_value(PARAM_PATH_D, name, value, -1, &req->par); break;
|
||
|
default: set_value(PARAM_PATH, name, value, -1, &req->par);
|
||
|
|
||
|
}
|
||
|
|
||
|
ck_free(name);
|
||
|
ck_free(value);
|
||
|
|
||
|
cur += next_seg;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* If the last segment was /, /./, or /../, *or* if we never added
|
||
|
anything to the path to begin with, we want to store a NULL-""
|
||
|
entry to denote it's a directory. */
|
||
|
|
||
|
if (know_dir || (add_slash && (!*str || strchr("?#", *str))))
|
||
|
set_value(PARAM_PATH, NULL, (u8*)"", -1, &req->par);
|
||
|
|
||
|
/* Deal with regular query parameters now. This is much simpler,
|
||
|
obviously. */
|
||
|
|
||
|
while (*cur && !strchr("#", *cur)) {
|
||
|
|
||
|
u32 next_seg = strcspn((char*)cur + 1, "#&;,!$") + 1;
|
||
|
u32 next_eq = strcspn((char*)cur + 1, "=#&;,!$") + 1;
|
||
|
u8 *name = NULL, *value = NULL;
|
||
|
|
||
|
/* foo=bar syntax... */
|
||
|
|
||
|
if (next_eq < next_seg) {
|
||
|
name = url_decode_token(cur + 1, next_eq - 1, 1);
|
||
|
value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 1);
|
||
|
} else {
|
||
|
value = url_decode_token(cur + 1, next_seg - 1, 1);
|
||
|
}
|
||
|
|
||
|
switch (*cur) {
|
||
|
|
||
|
case ';': set_value(PARAM_QUERY_S, name, value, -1, &req->par); break;
|
||
|
case ',': set_value(PARAM_QUERY_C, name, value, -1, &req->par); break;
|
||
|
case '!': set_value(PARAM_QUERY_E, name, value, -1, &req->par); break;
|
||
|
case '$': set_value(PARAM_QUERY_D, name, value, -1, &req->par); break;
|
||
|
default: set_value(PARAM_QUERY, name, value, -1, &req->par);
|
||
|
|
||
|
}
|
||
|
|
||
|
ck_free(name);
|
||
|
ck_free(value);
|
||
|
|
||
|
cur += next_seg;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Reconstructs URI from http_request data. Includes protocol and host
|
||
|
if with_host is non-zero. */
|
||
|
|
||
|
u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post) {
|
||
|
u32 i, cur_pos;
|
||
|
u8 got_search = 0;
|
||
|
u8* ret;
|
||
|
|
||
|
NEW_STR(ret, cur_pos);
|
||
|
|
||
|
#define ASD(_p3) ADD_STR_DATA(ret, cur_pos, _p3)
|
||
|
|
||
|
/* For human-readable uses... */
|
||
|
|
||
|
if (with_host) {
|
||
|
ASD("http");
|
||
|
if (req->proto == PROTO_HTTPS) ASD("s");
|
||
|
ASD("://");
|
||
|
ASD(req->host);
|
||
|
|
||
|
if ((req->proto == PROTO_HTTP && req->port != 80) ||
|
||
|
(req->proto == PROTO_HTTPS && req->port != 443)) {
|
||
|
u8 port[7];
|
||
|
sprintf((char*)port, ":%u", req->port);
|
||
|
ASD(port);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* First print path... */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (PATH_SUBTYPE(req->par.t[i])) {
|
||
|
|
||
|
switch (req->par.t[i]) {
|
||
|
|
||
|
case PARAM_PATH_S: ASD(";"); break;
|
||
|
case PARAM_PATH_C: ASD(","); break;
|
||
|
case PARAM_PATH_E: ASD("!"); break;
|
||
|
case PARAM_PATH_D: ASD("$"); break;
|
||
|
default: ASD("/");
|
||
|
|
||
|
}
|
||
|
|
||
|
if (req->par.n[i]) {
|
||
|
u32 len = strlen((char*)req->par.n[i]);
|
||
|
u8* str = url_encode_token(req->par.n[i], len);
|
||
|
ASD(str); ASD("=");
|
||
|
ck_free(str);
|
||
|
}
|
||
|
if (req->par.v[i]) {
|
||
|
u32 len = strlen((char*)req->par.v[i]);
|
||
|
u8* str = url_encode_token(req->par.v[i], len);
|
||
|
ASD(str);
|
||
|
ck_free(str);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Then actual parameters. */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (QUERY_SUBTYPE(req->par.t[i])) {
|
||
|
|
||
|
if (!got_search) {
|
||
|
ASD("?");
|
||
|
got_search = 1;
|
||
|
} else switch (req->par.t[i]) {
|
||
|
|
||
|
case PARAM_QUERY_S: ASD(";"); break;
|
||
|
case PARAM_QUERY_C: ASD(","); break;
|
||
|
case PARAM_QUERY_E: ASD("!"); break;
|
||
|
case PARAM_QUERY_D: ASD("$"); break;
|
||
|
default: ASD("&");
|
||
|
|
||
|
}
|
||
|
|
||
|
if (req->par.n[i]) {
|
||
|
u32 len = strlen((char*)req->par.n[i]);
|
||
|
u8* str = url_encode_token(req->par.n[i], len);
|
||
|
ASD(str); ASD("=");
|
||
|
ck_free(str);
|
||
|
}
|
||
|
if (req->par.v[i]) {
|
||
|
u32 len = strlen((char*)req->par.v[i]);
|
||
|
u8* str = url_encode_token(req->par.v[i], len);
|
||
|
ASD(str);
|
||
|
ck_free(str);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
got_search = 0;
|
||
|
|
||
|
if (with_post)
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (POST_SUBTYPE(req->par.t[i])) {
|
||
|
|
||
|
if (!got_search) {
|
||
|
ASD(" POST: ");
|
||
|
got_search = 1;
|
||
|
} else ASD("&");
|
||
|
|
||
|
if (req->par.n[i]) {
|
||
|
u32 len = strlen((char*)req->par.n[i]);
|
||
|
u8* str = url_encode_token(req->par.n[i], len);
|
||
|
ASD(str); ASD("=");
|
||
|
ck_free(str);
|
||
|
}
|
||
|
if (req->par.v[i]) {
|
||
|
u32 len = strlen((char*)req->par.v[i]);
|
||
|
u8* str = url_encode_token(req->par.v[i], len);
|
||
|
ASD(str);
|
||
|
ck_free(str);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
#undef ASD
|
||
|
|
||
|
TRIM_STR(ret, cur_pos);
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Looks up IP for a particular host, returns data in network order.
|
||
|
Uses standard resolver, so it is slow and blocking, but we only
|
||
|
expect to call it a couple of times during a typical assessment.
|
||
|
There are some good async DNS libraries to consider in the long run. */
|
||
|
|
||
|
u32 maybe_lookup_host(u8* name) {
|
||
|
struct hostent* h;
|
||
|
struct dns_entry *d = dns, *prev = NULL;
|
||
|
u32 ret_addr = 0;
|
||
|
struct in_addr in;
|
||
|
|
||
|
/* Don't bother resolving raw IP addresses, naturally. */
|
||
|
|
||
|
if (inet_aton((char*)name, &in))
|
||
|
return (u32)in.s_addr;
|
||
|
|
||
|
while (d) {
|
||
|
if (!strcasecmp((char*)name, (char*)d->name)) return d->addr;
|
||
|
prev = d;
|
||
|
d = d->next;
|
||
|
}
|
||
|
|
||
|
h = gethostbyname((char*)name);
|
||
|
|
||
|
/* If lookup fails with a transient error, be nice - try again. */
|
||
|
|
||
|
if (!h && h_errno == TRY_AGAIN) h = gethostbyname((char*)name);
|
||
|
|
||
|
if (h) {
|
||
|
|
||
|
u32 i = 0;
|
||
|
|
||
|
/* For each address associated with the host, see if we have any
|
||
|
other hosts that resolved to that same IP. If yes, return
|
||
|
that address; otherwise, just return first. This is for HTTP
|
||
|
performance and bookkeeping reasons. */
|
||
|
|
||
|
while (h->h_addr_list[i]) {
|
||
|
d = dns;
|
||
|
while (d) {
|
||
|
if (d->addr == *(u32*)h->h_addr_list[i]) {
|
||
|
ret_addr = d->addr;
|
||
|
goto dns_got_name;
|
||
|
}
|
||
|
d = d->next;
|
||
|
}
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
ret_addr = *(u32*)h->h_addr_list[0];
|
||
|
|
||
|
}
|
||
|
|
||
|
dns_got_name:
|
||
|
|
||
|
if (!prev) d = dns = ck_alloc(sizeof(struct dns_entry));
|
||
|
else d = prev->next = ck_alloc(sizeof(struct dns_entry));
|
||
|
|
||
|
d->name = ck_strdup(name);
|
||
|
d->addr = ret_addr;
|
||
|
|
||
|
return ret_addr;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Creates an ad hoc DNS cache entry, to override NS lookups. */
|
||
|
|
||
|
void fake_host(u8* name, u32 addr) {
|
||
|
struct dns_entry *d = dns, *prev = 0;
|
||
|
|
||
|
while (d && d->next) { prev = d ; d = d->next;}
|
||
|
|
||
|
if (!dns) d = dns = ck_alloc(sizeof(struct dns_entry));
|
||
|
else d = prev->next = ck_alloc(sizeof(struct dns_entry));
|
||
|
|
||
|
d->name = ck_strdup(name);
|
||
|
d->addr = addr;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Prepares a serialized HTTP buffer to be sent over the network. */
|
||
|
|
||
|
u8* build_request_data(struct http_request* req) {
|
||
|
|
||
|
u8 *ret_buf, *ck_buf, *pay_buf, *path;
|
||
|
u32 ret_pos, ck_pos, pay_pos, i;
|
||
|
u8 req_type = PARAM_NONE;
|
||
|
|
||
|
if (req->proto == PROTO_NONE)
|
||
|
FATAL("uninitialized http_request");
|
||
|
|
||
|
NEW_STR(ret_buf, ret_pos);
|
||
|
|
||
|
path = serialize_path(req, 0, 0);
|
||
|
|
||
|
#define ASD(_p3) ADD_STR_DATA(ret_buf, ret_pos, _p3)
|
||
|
|
||
|
if (req->method) ASD(req->method); else ASD((u8*)"GET");
|
||
|
ASD(" ");
|
||
|
ASD(path);
|
||
|
ASD(" HTTP/1.1\r\n");
|
||
|
ck_free(path);
|
||
|
|
||
|
ASD("Host: ");
|
||
|
ASD(req->host);
|
||
|
|
||
|
if ((req->proto == PROTO_HTTP && req->port != 80) ||
|
||
|
(req->proto == PROTO_HTTPS && req->port != 443)) {
|
||
|
char port[7];
|
||
|
sprintf((char*)port, ":%u", req->port);
|
||
|
ASD(port);
|
||
|
}
|
||
|
|
||
|
ASD("\r\n");
|
||
|
|
||
|
/* Insert generic browser headers first. */
|
||
|
|
||
|
if (browser_type == BROWSER_FAST) {
|
||
|
|
||
|
ASD("Accept-Encoding: gzip\r\n");
|
||
|
ASD("Connection: keep-alive\r\n");
|
||
|
ASD("User-Agent: Mozilla/5.0 SF/" VERSION "\r\n");
|
||
|
|
||
|
/* Some servers will reject to gzip responses unless "Mozilla/..."
|
||
|
is seen in User-Agent. Bleh. */
|
||
|
|
||
|
} else if (browser_type == BROWSER_FFOX) {
|
||
|
|
||
|
if (!GET_HDR((u8*)"User-Agent", &req->par))
|
||
|
ASD("User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; "
|
||
|
"rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 SF/" VERSION "\r\n");
|
||
|
|
||
|
ASD("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;"
|
||
|
"q=0.8\r\n");
|
||
|
|
||
|
if (!GET_HDR((u8*)"Accept-Language", &req->par))
|
||
|
ASD("Accept-Language: en-us,en\r\n");
|
||
|
|
||
|
ASD("Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n");
|
||
|
ASD("Keep-Alive: 300\r\n");
|
||
|
ASD("Connction: keep-alive\r\n");
|
||
|
|
||
|
} else /* MSIE */ {
|
||
|
|
||
|
ASD("Accept: */*\r\n");
|
||
|
|
||
|
if (!GET_HDR((u8*)"Accept-Language", &req->par))
|
||
|
ASD("Accept-Language: en,en-US;q=0.5\r\n");
|
||
|
|
||
|
if (!GET_HDR((u8*)"User-Agent", &req->par))
|
||
|
ASD("User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; "
|
||
|
"Trident/4.0; .NET CLR 1.1.4322; InfoPath.1; .NET CLR "
|
||
|
"2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; SF/"
|
||
|
VERSION ")\r\n");
|
||
|
|
||
|
ASD("Accept-Encoding: gzip, deflate\r\n");
|
||
|
ASD("Connection: Keep-Alive\r\n");
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Request a limited range up front to minimize unwanted traffic. */
|
||
|
|
||
|
if (size_limit) {
|
||
|
u8 limit[32];
|
||
|
sprintf((char*)limit, "Range: bytes=0-%u\r\n", size_limit - 1);
|
||
|
ASD(limit);
|
||
|
}
|
||
|
|
||
|
/* Include a dummy "Referer" header, to avoid certain XSRF checks. */
|
||
|
|
||
|
if (!GET_HDR((u8*)"Referer", &req->par)) {
|
||
|
ASD("Referer: http");
|
||
|
if (req->proto == PROTO_HTTPS) ASD("s");
|
||
|
ASD("://");
|
||
|
ASD(req->host);
|
||
|
ASD("/\r\n");
|
||
|
}
|
||
|
|
||
|
/* Take care of HTTP authentication next. */
|
||
|
|
||
|
if (auth_type == AUTH_BASIC) {
|
||
|
u8* lp = ck_alloc(strlen((char*)auth_user) + strlen((char*)auth_pass) + 2);
|
||
|
u8* lpb64;
|
||
|
|
||
|
sprintf((char*)lp, "%s:%s", auth_user, auth_pass);
|
||
|
|
||
|
lpb64 = b64_encode(lp, strlen((char*)lp));
|
||
|
|
||
|
ASD("Authorization: basic ");
|
||
|
ASD(lpb64);
|
||
|
ASD("\r\n");
|
||
|
|
||
|
ck_free(lpb64);
|
||
|
ck_free(lp);
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Append any other requested headers and cookies. */
|
||
|
|
||
|
NEW_STR(ck_buf, ck_pos);
|
||
|
|
||
|
for (i=0;i<req->par.c;i++) {
|
||
|
if (req->par.t[i] == PARAM_HEADER) {
|
||
|
ASD(req->par.n[i]);
|
||
|
ASD(": ");
|
||
|
ASD(req->par.v[i]);
|
||
|
ASD("\r\n");
|
||
|
} else if (req->par.t[i] == PARAM_COOKIE) {
|
||
|
if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";");
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, req->par.n[i]);
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, "=");
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, req->par.v[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Also include extra globals, if any (but avoid dupes). */
|
||
|
|
||
|
for (i=0;i<global_http_par.c;i++) {
|
||
|
if (global_http_par.t[i] == PARAM_HEADER &&
|
||
|
!GET_HDR(global_http_par.n[i], &req->par)) {
|
||
|
ASD(global_http_par.n[i]);
|
||
|
ASD(": ");
|
||
|
ASD(global_http_par.v[i]);
|
||
|
ASD("\r\n");
|
||
|
} else if (global_http_par.t[i] == PARAM_COOKIE &&
|
||
|
!GET_CK(global_http_par.n[i], &req->par)) {
|
||
|
if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";");
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, global_http_par.n[i]);
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, "=");
|
||
|
ADD_STR_DATA(ck_buf, ck_pos, global_http_par.v[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (ck_pos) {
|
||
|
ASD("Cookie: ");
|
||
|
ASD(ck_buf);
|
||
|
ASD("\r\n");
|
||
|
}
|
||
|
|
||
|
ck_free(ck_buf);
|
||
|
|
||
|
/* Now, let's serialize the payload, if necessary. */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++) {
|
||
|
switch (req->par.t[i]) {
|
||
|
case PARAM_POST_F:
|
||
|
case PARAM_POST_O:
|
||
|
req_type = req->par.t[i];
|
||
|
break;
|
||
|
case PARAM_POST:
|
||
|
if (req_type == PARAM_NONE) req_type = PARAM_POST;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
NEW_STR(pay_buf, pay_pos);
|
||
|
|
||
|
if (req_type == PARAM_POST) {
|
||
|
|
||
|
/* The default case: application/x-www-form-urlencoded. */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (req->par.t[i] == PARAM_POST) {
|
||
|
if (pay_pos) ADD_STR_DATA(pay_buf, pay_pos, "&");
|
||
|
if (req->par.n[i]) {
|
||
|
u32 len = strlen((char*)req->par.n[i]);
|
||
|
u8* str = url_encode_token(req->par.n[i], len);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, str);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "=");
|
||
|
ck_free(str);
|
||
|
}
|
||
|
if (req->par.v[i]) {
|
||
|
u32 len = strlen((char*)req->par.v[i]);
|
||
|
u8* str = url_encode_token(req->par.v[i], len);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, str);
|
||
|
ck_free(str);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ASD("Content-Type: application/x-www-form-urlencoded\r\n");
|
||
|
|
||
|
} else if (req_type == PARAM_POST_O) {
|
||
|
|
||
|
/* Opaque, non-escaped data of some sort. */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (req->par.t[i] == PARAM_POST_O && req->par.v[i])
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]);
|
||
|
|
||
|
ASD("Content-Type: text/plain\r\n");
|
||
|
|
||
|
} else if (req_type == PARAM_POST_F) {
|
||
|
u8 bound[20];
|
||
|
|
||
|
/* MIME envelopes: multipart/form-data */
|
||
|
|
||
|
sprintf((char*)bound, "sf%u", R(1000000));
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (req->par.t[i] == PARAM_POST || req->par.t[i] == PARAM_POST_F) {
|
||
|
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "--");
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, bound);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "\r\n"
|
||
|
"Content-Disposition: form-data; name=\"");
|
||
|
if (req->par.n[i])
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, req->par.n[i]);
|
||
|
|
||
|
if (req->par.t[i] == PARAM_POST_F) {
|
||
|
u8 tmp[64];
|
||
|
sprintf((char*)tmp, "\"; filename=\"sfish%u." DUMMY_EXT "\"\r\n"
|
||
|
"Content-Type: " DUMMY_MIME "\r\n\r\n", R(16));
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, tmp);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, new_xss_tag((u8*)DUMMY_FILE));
|
||
|
register_xss_tag(req);
|
||
|
} else {
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "\"\r\n\r\n");
|
||
|
if (req->par.v[i])
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]);
|
||
|
}
|
||
|
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "\r\n");
|
||
|
}
|
||
|
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "--");
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, bound);
|
||
|
ADD_STR_DATA(pay_buf, pay_pos, "--\r\n");
|
||
|
|
||
|
ASD("Content-Type: multipart/form-data; boundary=");
|
||
|
ASD(bound);
|
||
|
ASD("\r\n");
|
||
|
|
||
|
} else if (req_type == 0) ASD("\r\n");
|
||
|
|
||
|
/* Finalize HTTP payload... */
|
||
|
|
||
|
for (i=0;i<pay_pos;i++)
|
||
|
if (pay_buf[i] == 0xff) pay_buf[i] = 0x00;
|
||
|
|
||
|
TRIM_STR(pay_buf, pay_pos);
|
||
|
|
||
|
if (pay_pos) {
|
||
|
u8 cl[40];
|
||
|
sprintf((char*)cl, "Content-Length: %u\r\n\r\n", pay_pos);
|
||
|
ASD(cl);
|
||
|
ASD(pay_buf);
|
||
|
}
|
||
|
|
||
|
ck_free(pay_buf);
|
||
|
|
||
|
#undef ASD
|
||
|
|
||
|
/* Phew! */
|
||
|
|
||
|
TRIM_STR(ret_buf, ret_pos);
|
||
|
return ret_buf;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Internal helper for parsing lines for parse_response(), etc. */
|
||
|
|
||
|
static u8* grab_line(u8* data, u32* cur_pos, u32 data_len) {
|
||
|
u8 *cur_ptr = data + *cur_pos,
|
||
|
*start_ptr = cur_ptr,
|
||
|
*end_ptr = data + data_len,
|
||
|
*ret;
|
||
|
|
||
|
if (start_ptr == end_ptr) return 0;
|
||
|
while (cur_ptr < end_ptr && *cur_ptr != '\n') cur_ptr++;
|
||
|
if (cur_ptr != end_ptr) cur_ptr++;
|
||
|
|
||
|
*cur_pos += cur_ptr - start_ptr;
|
||
|
|
||
|
while (cur_ptr > start_ptr && strchr("\r\n", *(cur_ptr-1))) cur_ptr--;
|
||
|
|
||
|
ret = ck_alloc(cur_ptr - start_ptr + 1);
|
||
|
memcpy(ret, start_ptr, cur_ptr - start_ptr);
|
||
|
ret[cur_ptr - start_ptr] = 0;
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Builds response fingerprint data. These fingerprints are used to
|
||
|
find "roughly comparable" pages based on their word length
|
||
|
distributions (divided into FP_SIZE buckets). */
|
||
|
|
||
|
void fprint_response(struct http_response* res) {
|
||
|
u32 i, c_len = 0, in_space = 0;
|
||
|
|
||
|
res->sig.code = res->code;
|
||
|
|
||
|
for (i=0;i<res->pay_len;i++)
|
||
|
|
||
|
if (res->payload[i] <= 0x20 || strchr("<>'\"", res->payload[i])) {
|
||
|
if (!in_space) {
|
||
|
in_space = 1;
|
||
|
if (c_len <= FP_MAX_LEN)
|
||
|
res->sig.data[c_len % FP_SIZE]++;
|
||
|
c_len = 0;
|
||
|
} else c_len++;
|
||
|
} else {
|
||
|
if (in_space) {
|
||
|
in_space = 0;
|
||
|
if (c_len <= FP_MAX_LEN)
|
||
|
res->sig.data[c_len % FP_SIZE]++;
|
||
|
c_len = 0;
|
||
|
} else c_len++;
|
||
|
}
|
||
|
|
||
|
res->sig.data[c_len % FP_SIZE]++;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Parses a network buffer containing raw HTTP response received over the
|
||
|
network ('more' == the socket is still available for reading). Returns 0
|
||
|
if response parses OK, 1 if more data should be read from the socket,
|
||
|
2 if the response seems invalid, 3 if response OK but connection must be
|
||
|
closed. */
|
||
|
|
||
|
u8 parse_response(struct http_request* req, struct http_response* res,
|
||
|
u8* data, u32 data_len, u8 more) {
|
||
|
u8* cur_line = 0;
|
||
|
s32 pay_len = -1;
|
||
|
u32 cur_data_off = 0,
|
||
|
total_chunk = 0,
|
||
|
http_ver;
|
||
|
u8 chunked = 0, compressed = 0, must_close = 0;
|
||
|
|
||
|
if (res->code)
|
||
|
FATAL("struct http_response reused! Original code '%u'.", res->code);
|
||
|
|
||
|
#define NEXT_LINE() do { \
|
||
|
if (cur_line) ck_free(cur_line); \
|
||
|
cur_line = grab_line(data, &cur_data_off, data_len); \
|
||
|
} while (0)
|
||
|
|
||
|
/* First, let's do a superficial request completeness check. Be
|
||
|
prepared for a premature end at any point. */
|
||
|
|
||
|
NEXT_LINE(); /* HTTP/1.x xxx ... */
|
||
|
|
||
|
if (!cur_line) return more ? 1 : 2;
|
||
|
|
||
|
if (strlen((char*)cur_line) < 7 && more) {
|
||
|
ck_free(cur_line);
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
if (strncmp((char*)cur_line, "HTTP/1.", 7)) {
|
||
|
ck_free(cur_line);
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
/* Scan headers for Content-Length, Transfer-Encoding, etc. */
|
||
|
|
||
|
while (1) {
|
||
|
|
||
|
NEXT_LINE(); /* Next header or empty line. */
|
||
|
|
||
|
/* If headers end prematurely, and more data might arrive, ask for
|
||
|
it; otherwise, just assume end of headers and continue. */
|
||
|
|
||
|
if (!cur_line) {
|
||
|
if (more) return 1;
|
||
|
res->warn |= WARN_PARTIAL;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* Empty line indicates the beginning of a payload. */
|
||
|
|
||
|
if (!cur_line[0]) break;
|
||
|
|
||
|
if (!strncasecmp((char*)cur_line, "Content-Length:", 15)) {
|
||
|
|
||
|
/* The value in Content-Length header would be useful for seeing if we
|
||
|
have all the requested data already. Reject invalid values to avoid
|
||
|
integer overflows, etc, though. */
|
||
|
|
||
|
if (sscanf((char*)cur_line + 15, "%d", &pay_len) == 1) {
|
||
|
if (pay_len < 0 || pay_len > 1000000000 /* 1 GB */) {
|
||
|
ck_free(cur_line);
|
||
|
return 2;
|
||
|
}
|
||
|
} else pay_len = -1;
|
||
|
|
||
|
} else if (!strncasecmp((char*)cur_line, "Transfer-Encoding:", 18)) {
|
||
|
|
||
|
/* Transfer-Encoding: chunked must be accounted for to properly
|
||
|
determine if we received all the data when Content-Length not found. */
|
||
|
|
||
|
u8* x = cur_line + 18;
|
||
|
|
||
|
while (isspace(*x)) x++;
|
||
|
if (!strcasecmp((char*)x, "chunked")) chunked = 1;
|
||
|
|
||
|
} else if (!strncasecmp((char*)cur_line, "Content-Encoding:", 17)) {
|
||
|
|
||
|
/* Content-Encoding is good to know, too. */
|
||
|
|
||
|
u8* x = cur_line + 17;
|
||
|
|
||
|
while (isspace(*x)) x++;
|
||
|
|
||
|
if (!strcasecmp((char*)x, "deflate") || !strcasecmp((char*)x, "gzip"))
|
||
|
compressed = 1;
|
||
|
|
||
|
} else if (!strncasecmp((char*)cur_line, "Connection:", 11)) {
|
||
|
|
||
|
u8* x = cur_line + 11;
|
||
|
|
||
|
while (isspace(*x)) x++;
|
||
|
|
||
|
if (!strcasecmp((char*)x, "close")) must_close = 1;
|
||
|
|
||
|
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* We are now at the beginning of the payload. Firstly, how about decoding
|
||
|
'chunked' to see if we received a complete 0-byte terminator chunk
|
||
|
already? */
|
||
|
|
||
|
if (chunked) {
|
||
|
while (1) {
|
||
|
u32 chunk_len;
|
||
|
|
||
|
NEXT_LINE(); /* Should be chunk size, hex. */
|
||
|
|
||
|
if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) {
|
||
|
if (more) { ck_free(cur_line); return 1; }
|
||
|
res->warn |= WARN_PARTIAL;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (chunk_len > 1000000000 || total_chunk > 1000000000 /* 1 GB */) {
|
||
|
ck_free(cur_line);
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
/* See if we actually enough buffer to skip the chunk. Bail out if
|
||
|
not and more data might be coming; otherwise, adjust chunk size
|
||
|
accordingly. */
|
||
|
|
||
|
if (cur_data_off + chunk_len > data_len) {
|
||
|
|
||
|
if (more) { ck_free(cur_line); return 1; }
|
||
|
chunk_len = data_len - cur_data_off;
|
||
|
total_chunk += chunk_len;
|
||
|
|
||
|
res->warn |= WARN_PARTIAL;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
total_chunk += chunk_len;
|
||
|
|
||
|
cur_data_off += chunk_len;
|
||
|
NEXT_LINE();
|
||
|
|
||
|
/* No newline? */
|
||
|
if (!cur_line) {
|
||
|
if (more) return 1;
|
||
|
res->warn |= WARN_PARTIAL;
|
||
|
}
|
||
|
|
||
|
/* All right, so that was the last, complete 0-size chunk?
|
||
|
Exit the loop if so. */
|
||
|
|
||
|
if (!chunk_len) break;
|
||
|
|
||
|
}
|
||
|
|
||
|
if (cur_data_off != data_len) res->warn |= WARN_TRAIL;
|
||
|
|
||
|
} else if (pay_len == -1 && more) {
|
||
|
|
||
|
/* If in a mode other than 'chunked', and C-L not received, but more
|
||
|
data might be available - try to request it. */
|
||
|
|
||
|
ck_free(cur_line);
|
||
|
return 1;
|
||
|
|
||
|
} else if (pay_len != 1) {
|
||
|
|
||
|
if (cur_data_off + pay_len > data_len) {
|
||
|
|
||
|
/* If C-L seen, but not nough data in the buffer, try to request more
|
||
|
if possible, otherwise tag the response as partial. */
|
||
|
|
||
|
if (more) { ck_free(cur_line); return 1; }
|
||
|
res->warn |= WARN_PARTIAL;
|
||
|
|
||
|
} else if (cur_data_off + pay_len < data_len) res->warn |= WARN_TRAIL;
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Rewind, then properly parse HTTP headers, parsing cookies. */
|
||
|
|
||
|
cur_data_off = 0;
|
||
|
|
||
|
NEXT_LINE();
|
||
|
|
||
|
if (strlen((char*)cur_line) < 13 ||
|
||
|
sscanf((char*)cur_line, "HTTP/1.%u %u ", &http_ver, &res->code) != 2 ||
|
||
|
res->code < 100 || res->code > 999) {
|
||
|
ck_free(cur_line);
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
/* Some servers, when presented with 'Range' header, will return 200 on
|
||
|
some queries for a particular resource, and 206 on other queries (e.g.,
|
||
|
with query string), despite returning exactly as much data. As an
|
||
|
ugly workaround... */
|
||
|
|
||
|
if (res->code == 206) res->code = 200;
|
||
|
|
||
|
if (http_ver == 0) must_close = 1;
|
||
|
|
||
|
res->msg = ck_strdup(cur_line + 13);
|
||
|
|
||
|
while (1) {
|
||
|
u8* val;
|
||
|
|
||
|
NEXT_LINE(); /* Next header or empty line. */
|
||
|
|
||
|
if (!cur_line) return 2;
|
||
|
if (!cur_line[0]) break;
|
||
|
|
||
|
/* Split field name and value */
|
||
|
|
||
|
val = (u8*) strchr((char*)cur_line, ':');
|
||
|
if (!val) { ck_free(cur_line); return 2; }
|
||
|
*val = 0;
|
||
|
while (isspace(*(++val)));
|
||
|
|
||
|
if (!strcasecmp((char*)cur_line, "Set-Cookie") ||
|
||
|
!strcasecmp((char*)cur_line, "Set-Cookie2")) {
|
||
|
|
||
|
/* We could bother with a proper tokenizer here, but contrary to "teh
|
||
|
standards", browsers generally don't accept multiple cookies in
|
||
|
Set-Cookie headers, handle quoted-string encoding inconsistently,
|
||
|
etc. So let's just grab the first value naively and move on. */
|
||
|
|
||
|
u8* cval;
|
||
|
u8* orig_val;
|
||
|
|
||
|
cval = (u8*) strchr((char*)val, ';');
|
||
|
if (cval) *cval = 0;
|
||
|
cval = (u8*) strchr((char*)val, '=');
|
||
|
if (cval) { *cval = 0; cval++; }
|
||
|
|
||
|
/* If proper value not found, use NULL name and put whatever was
|
||
|
found in the value field. */
|
||
|
|
||
|
if (!cval) { cval = val; val = 0; }
|
||
|
|
||
|
SET_CK(val, cval, &res->hdr);
|
||
|
|
||
|
if (val) {
|
||
|
|
||
|
/* New or drastically changed cookies are noteworthy. */
|
||
|
|
||
|
orig_val = GET_CK(val, &global_http_par);
|
||
|
|
||
|
if (!orig_val || (strlen((char*)orig_val) != strlen((char*)cval) &&
|
||
|
strncmp((char*)cval, (char*)orig_val, 3))) {
|
||
|
res->cookies_set = 1;
|
||
|
problem(PROB_NEW_COOKIE, req, res, val, req->pivot, 0);
|
||
|
}
|
||
|
|
||
|
/* Set cookie globally, but ignore obvious attempts to delete
|
||
|
existing ones. */
|
||
|
|
||
|
if (!ignore_cookies && cval[0])
|
||
|
SET_CK(val, cval, &global_http_par);
|
||
|
|
||
|
}
|
||
|
|
||
|
} else SET_HDR(cur_line, val, &res->hdr);
|
||
|
|
||
|
/* Content-Type is worth mining for MIME, charset data at this point. */
|
||
|
|
||
|
if (!strcasecmp((char*)cur_line, "Content-Type")) {
|
||
|
|
||
|
if (res->header_mime) {
|
||
|
|
||
|
/* Duplicate Content-Type. Fetch previous value, if different,
|
||
|
complain. */
|
||
|
|
||
|
u8* tmp = GET_HDR((u8*)"Content-Type", &res->hdr);
|
||
|
if (strcasecmp((char*)tmp, (char*)val)) res->warn |= WARN_CFL_HDR;
|
||
|
|
||
|
} else {
|
||
|
u8 *tmp = (u8*)strchr((char*)val, ';'), *cset;
|
||
|
|
||
|
if (tmp) {
|
||
|
*tmp = 0;
|
||
|
if ((cset = (u8*)strchr((char*)tmp + 1, '=')))
|
||
|
res->header_charset = ck_strdup(cset + 1);
|
||
|
}
|
||
|
|
||
|
res->header_mime = ck_strdup(val);
|
||
|
if (tmp) *tmp = ';';
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* At the beginning of the payload again! */
|
||
|
|
||
|
if (!chunked) {
|
||
|
|
||
|
/* Identity. Ignore actual C-L data, use just as much as we collected. */
|
||
|
|
||
|
res->pay_len = data_len - cur_data_off;
|
||
|
res->payload = ck_alloc(res->pay_len + 1);
|
||
|
res->payload[res->pay_len] = 0; /* NUL-terminate for safer parsing. */
|
||
|
|
||
|
memcpy(res->payload, data + cur_data_off, res->pay_len);
|
||
|
|
||
|
} else {
|
||
|
|
||
|
u32 chunk_off = 0;
|
||
|
|
||
|
/* Chunked - we should have the authoritative length of chunk
|
||
|
contents in total_chunk already, and the overall structure
|
||
|
validated, so let's just reparse quickly. */
|
||
|
|
||
|
res->pay_len = total_chunk;
|
||
|
res->payload = ck_alloc(total_chunk + 1);
|
||
|
res->payload[res->pay_len] = 0;
|
||
|
|
||
|
while (1) {
|
||
|
u32 chunk_len;
|
||
|
|
||
|
NEXT_LINE();
|
||
|
|
||
|
if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) break;
|
||
|
|
||
|
if (cur_data_off + chunk_len > data_len)
|
||
|
chunk_len = data_len - cur_data_off;
|
||
|
|
||
|
memcpy(res->payload + chunk_off, data + cur_data_off, chunk_len);
|
||
|
|
||
|
chunk_off += chunk_len;
|
||
|
cur_data_off += chunk_len;
|
||
|
|
||
|
NEXT_LINE();
|
||
|
|
||
|
if (!chunk_len) break;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
ck_free(cur_line);
|
||
|
|
||
|
if (compressed) {
|
||
|
|
||
|
u8* tmp_buf;
|
||
|
|
||
|
/* Deflate or gzip - zlib can handle both the same way. We lazily allocate
|
||
|
a SIZE_LIMIT output buffer, then truncate it if necessary. */
|
||
|
|
||
|
z_stream d;
|
||
|
s32 err;
|
||
|
|
||
|
tmp_buf = ck_alloc(SIZE_LIMIT + 1);
|
||
|
|
||
|
d.zalloc = 0;
|
||
|
d.zfree = 0;
|
||
|
d.opaque = 0;
|
||
|
d.next_in = res->payload;
|
||
|
d.avail_in = res->pay_len;
|
||
|
d.next_out = tmp_buf;
|
||
|
d.avail_out = SIZE_LIMIT;
|
||
|
|
||
|
/* Say hello to third-party vulnerabilities! */
|
||
|
|
||
|
if (inflateInit2(&d, 32 + 15) != Z_OK) {
|
||
|
inflateEnd(&d);
|
||
|
ck_free(tmp_buf);
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
err = inflate(&d, Z_FINISH);
|
||
|
inflateEnd(&d);
|
||
|
|
||
|
if (err != Z_BUF_ERROR && err != Z_OK && err != Z_STREAM_END) {
|
||
|
ck_free(tmp_buf);
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
ck_free(res->payload);
|
||
|
|
||
|
bytes_deflated += res->pay_len;
|
||
|
|
||
|
res->pay_len = SIZE_LIMIT - d.avail_out;
|
||
|
res->payload = ck_realloc(tmp_buf, res->pay_len + 1);
|
||
|
res->payload[res->pay_len] = 0;
|
||
|
|
||
|
|
||
|
bytes_inflated += res->pay_len;
|
||
|
|
||
|
}
|
||
|
|
||
|
#undef NEXT_LINE
|
||
|
|
||
|
fprint_response(res);
|
||
|
|
||
|
return must_close ? 3 : 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Performs a deep free() of struct http_request */
|
||
|
|
||
|
void destroy_request(struct http_request* req) {
|
||
|
u32 i;
|
||
|
|
||
|
for (i=0;i<req->par.c;i++) {
|
||
|
ck_free(req->par.n[i]);
|
||
|
ck_free(req->par.v[i]);
|
||
|
}
|
||
|
|
||
|
ck_free(req->par.t);
|
||
|
ck_free(req->par.n);
|
||
|
ck_free(req->par.v);
|
||
|
|
||
|
ck_free(req->method);
|
||
|
ck_free(req->host);
|
||
|
ck_free(req->orig_url);
|
||
|
ck_free(req);
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Performs a deep free() of struct http_response */
|
||
|
|
||
|
void destroy_response(struct http_response* res) {
|
||
|
u32 i;
|
||
|
|
||
|
for (i=0;i<res->hdr.c;i++) {
|
||
|
ck_free(res->hdr.n[i]);
|
||
|
ck_free(res->hdr.v[i]);
|
||
|
}
|
||
|
|
||
|
ck_free(res->hdr.t);
|
||
|
ck_free(res->hdr.n);
|
||
|
ck_free(res->hdr.v);
|
||
|
|
||
|
ck_free(res->meta_charset);
|
||
|
ck_free(res->header_charset);
|
||
|
ck_free(res->header_mime);
|
||
|
|
||
|
ck_free(res->msg);
|
||
|
ck_free(res->payload);
|
||
|
ck_free(res);
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Performs a deep free(), unlinking of struct queue_entry, and the
|
||
|
underlying request / response pair. */
|
||
|
|
||
|
static void destroy_unlink_queue(struct queue_entry* q, u8 keep) {
|
||
|
if (!keep) {
|
||
|
if (q->req) destroy_request(q->req);
|
||
|
if (q->res) destroy_response(q->res);
|
||
|
}
|
||
|
if (!q->prev) queue = q->next; else q->prev->next = q->next;
|
||
|
#ifdef QUEUE_FILO
|
||
|
if (!q->next) q_tail = q->prev;
|
||
|
#endif /* QUEUE_FILO */
|
||
|
if (q->next) q->next->prev = q->prev;
|
||
|
ck_free(q);
|
||
|
queue_cur--;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Performs a deep free(), unlinking, network shutdown for struct
|
||
|
conn_entry, as well as the underlying queue entry, request
|
||
|
and response structs. */
|
||
|
|
||
|
static void destroy_unlink_conn(struct conn_entry* c, u8 keep) {
|
||
|
if (c->q) destroy_unlink_queue(c->q, keep);
|
||
|
if (!c->prev) conn = c->next; else c->prev->next = c->next;
|
||
|
if (c->next) c->next->prev = c->prev;
|
||
|
if (c->srv_ssl) SSL_free(c->srv_ssl);
|
||
|
if (c->srv_ctx) SSL_CTX_free(c->srv_ctx);
|
||
|
ck_free(c->write_buf);
|
||
|
ck_free(c->read_buf);
|
||
|
close(c->fd);
|
||
|
ck_free(c);
|
||
|
conn_cur--;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Performs struct conn_entry for reuse following a clean shutdown. */
|
||
|
|
||
|
static void reuse_conn(struct conn_entry* c, u8 keep) {
|
||
|
if (c->q) destroy_unlink_queue(c->q, keep);
|
||
|
c->q = 0;
|
||
|
ck_free(c->read_buf);
|
||
|
ck_free(c->write_buf);
|
||
|
c->read_buf = c->write_buf = NULL;
|
||
|
c->read_len = c->write_len = c->write_off = 0;
|
||
|
c->SSL_rd_w_wr = c->SSL_wr_w_rd = 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Schedules a new asynchronous request (does not make a copy of the
|
||
|
original http_request struct, may deallocate it immediately or
|
||
|
later on); req->callback() will be invoked when the request is
|
||
|
completed (or fails - maybe right away). */
|
||
|
|
||
|
void async_request(struct http_request* req) {
|
||
|
struct queue_entry *qe;
|
||
|
struct http_response *res;
|
||
|
|
||
|
if (req->proto == PROTO_NONE || !req->callback)
|
||
|
FATAL("uninitialized http_request");
|
||
|
|
||
|
res = ck_alloc(sizeof(struct http_response));
|
||
|
|
||
|
req->addr = maybe_lookup_host(req->host);
|
||
|
|
||
|
/* Don't try to issue extra requests if max_fail
|
||
|
consecutive failures exceeded; but still try to
|
||
|
wrap up the (partial) scan. */
|
||
|
|
||
|
if (req_errors_cur > max_fail) {
|
||
|
DEBUG("!!! Too many subsequent request failures!\n");
|
||
|
res->state = STATE_SUPPRESS;
|
||
|
if (!req->callback(req, res)) {
|
||
|
destroy_request(req);
|
||
|
destroy_response(res);
|
||
|
}
|
||
|
req_dropped++;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* DNS errors mean instant fail. */
|
||
|
|
||
|
if (!req->addr) {
|
||
|
DEBUG("!!! DNS error!\n");
|
||
|
res->state = STATE_DNSERR;
|
||
|
if (!req->callback(req, res)) {
|
||
|
destroy_request(req);
|
||
|
destroy_response(res);
|
||
|
}
|
||
|
req_errors_net++;
|
||
|
conn_count++;
|
||
|
conn_failed++;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* Enforce user limits. */
|
||
|
|
||
|
if (req_count > max_requests) {
|
||
|
DEBUG("!!! Total request limit exceeded!\n");
|
||
|
res->state = STATE_SUPPRESS;
|
||
|
if (!req->callback(req, res)) {
|
||
|
destroy_request(req);
|
||
|
destroy_response(res);
|
||
|
}
|
||
|
req_dropped++;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/* OK, looks like we're good to go. Insert the request
|
||
|
into the the queue. */
|
||
|
|
||
|
#ifdef QUEUE_FILO
|
||
|
|
||
|
qe = q_tail;
|
||
|
q_tail = ck_alloc(sizeof(struct queue_entry));
|
||
|
q_tail->req = req;
|
||
|
q_tail->res = res;
|
||
|
q_tail->prev = qe;
|
||
|
|
||
|
if (q_tail->prev) q_tail->prev->next = q_tail;
|
||
|
|
||
|
if (!queue) queue = q_tail;
|
||
|
|
||
|
#else
|
||
|
|
||
|
qe = queue;
|
||
|
|
||
|
queue = ck_alloc(sizeof(struct queue_entry));
|
||
|
queue->req = req;
|
||
|
queue->res = res;
|
||
|
queue->next = qe;
|
||
|
|
||
|
if (queue->next) queue->next->prev = queue;
|
||
|
|
||
|
#endif /* ^QUEUE_FILO */
|
||
|
|
||
|
queue_cur++;
|
||
|
req_count++;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Check SSL properties, raise security alerts if necessary. We do not perform
|
||
|
a very thorough validation - we do not check for valid root CAs, bad ciphers,
|
||
|
SSLv2 support, etc - as these are covered well by network-level security
|
||
|
assessment tools anyway.
|
||
|
|
||
|
We might eventually want to check aliases or support TLS SNI. */
|
||
|
|
||
|
static void check_ssl(struct conn_entry* c) {
|
||
|
X509 *p;
|
||
|
|
||
|
p = SSL_get_peer_certificate(c->srv_ssl);
|
||
|
|
||
|
if (p) {
|
||
|
u32 cur_time = time(0);
|
||
|
char *issuer, *host, *req_host;
|
||
|
|
||
|
/* Check for certificate expiration... */
|
||
|
|
||
|
if (ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notBefore, cur_time)
|
||
|
!= -1 ||
|
||
|
ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notAfter, cur_time)
|
||
|
!= 1)
|
||
|
problem(PROB_SSL_CERT_DATE, c->q->req, 0, 0,
|
||
|
host_pivot(c->q->req->pivot), 0);
|
||
|
|
||
|
/* Check for self-signed certs or no issuer data. */
|
||
|
|
||
|
issuer = X509_NAME_oneline(p->cert_info->issuer,NULL,0);
|
||
|
|
||
|
if (!issuer || !p->name || !strcmp(issuer, p->name))
|
||
|
problem(PROB_SSL_SELF_CERT, c->q->req, 0, (u8*)issuer,
|
||
|
host_pivot(c->q->req->pivot), 0);
|
||
|
else
|
||
|
problem(PROB_SSL_CERT, c->q->req, 0, (u8*)issuer,
|
||
|
host_pivot(c->q->req->pivot), 0);
|
||
|
|
||
|
free(issuer);
|
||
|
|
||
|
/* Extract CN= from certificate name, compare to destination host. */
|
||
|
|
||
|
host = strrchr(p->name, '=');
|
||
|
req_host = (char*)c->q->req->host;
|
||
|
|
||
|
if (host) {
|
||
|
host++;
|
||
|
if (host[0] == '*' && host[1] == '.') {
|
||
|
host++;
|
||
|
if (strlen(req_host) > strlen(host))
|
||
|
req_host += strlen(req_host) - strlen(host);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!host || strcasecmp(host, req_host))
|
||
|
problem(PROB_SSL_BAD_HOST, c->q->req, 0, (u8*)host,
|
||
|
host_pivot(c->q->req->pivot), 0);
|
||
|
|
||
|
X509_free(p);
|
||
|
|
||
|
} else problem(PROB_SSL_NO_CERT, c->q->req, 0, 0,
|
||
|
host_pivot(c->q->req->pivot), 0);
|
||
|
|
||
|
c->ssl_checked = 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Associates a queue entry with an existing connection (if 'use_c' is
|
||
|
non-NULL), or creates a new connection to host (if 'use_c' NULL). */
|
||
|
|
||
|
static void conn_associate(struct conn_entry* use_c, struct queue_entry* q) {
|
||
|
struct conn_entry* c;
|
||
|
|
||
|
if (use_c) {
|
||
|
|
||
|
c = use_c;
|
||
|
c->reused = 1;
|
||
|
|
||
|
} else {
|
||
|
|
||
|
struct sockaddr_in sin;
|
||
|
|
||
|
/* OK, we need to create a new connection list entry and connect
|
||
|
it to a target host. */
|
||
|
|
||
|
c = ck_alloc(sizeof(struct conn_entry));
|
||
|
|
||
|
conn_count++;
|
||
|
|
||
|
c->proto = q->req->proto;
|
||
|
c->addr = q->req->addr;
|
||
|
c->port = q->req->port;
|
||
|
|
||
|
c->fd = socket(PF_INET, SOCK_STREAM, 0);
|
||
|
|
||
|
if (c->fd < 0) {
|
||
|
|
||
|
connect_error:
|
||
|
|
||
|
if (c->fd >=0) close(c->fd);
|
||
|
q->res->state = STATE_LOCALERR;
|
||
|
destroy_unlink_queue(q, q->req->callback(q->req, q->res));
|
||
|
req_errors_net++;
|
||
|
req_errors_cur++;
|
||
|
|
||
|
ck_free(c);
|
||
|
conn_failed++;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
sin.sin_family = PF_INET;
|
||
|
sin.sin_port = htons(c->port);
|
||
|
|
||
|
memcpy(&sin.sin_addr, &q->req->addr, 4);
|
||
|
|
||
|
fcntl(c->fd, F_SETFL, O_NONBLOCK);
|
||
|
|
||
|
if (connect(c->fd, (struct sockaddr*) &sin, sizeof(struct sockaddr_in)) &&
|
||
|
(errno != EINPROGRESS)) goto connect_error;
|
||
|
|
||
|
/* HTTPS also requires SSL state to be initialized at this point. */
|
||
|
|
||
|
if (c->proto == PROTO_HTTPS) {
|
||
|
|
||
|
c->srv_ctx = SSL_CTX_new(SSLv23_client_method());
|
||
|
|
||
|
if (!c->srv_ctx) goto connect_error;
|
||
|
|
||
|
SSL_CTX_set_mode(c->srv_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE |
|
||
|
SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
|
||
|
|
||
|
c->srv_ssl = SSL_new(c->srv_ctx);
|
||
|
|
||
|
if (!c->srv_ssl) {
|
||
|
SSL_CTX_free(c->srv_ctx);
|
||
|
goto connect_error;
|
||
|
}
|
||
|
|
||
|
SSL_set_fd(c->srv_ssl, c->fd);
|
||
|
SSL_set_connect_state(c->srv_ssl);
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Make it official. */
|
||
|
|
||
|
c->next = conn;
|
||
|
conn = c;
|
||
|
if (c->next) c->next->prev = c;
|
||
|
|
||
|
conn_cur++;
|
||
|
|
||
|
}
|
||
|
|
||
|
c->q = q;
|
||
|
q->c = c;
|
||
|
|
||
|
q->res->state = STATE_CONNECT;
|
||
|
c->req_start = c->last_rw = time(0);
|
||
|
c->write_buf = build_request_data(q->req);
|
||
|
c->write_len = strlen((char*)c->write_buf);
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Processes the queue. Returns the number of queue entries remaining,
|
||
|
0 if none. Will do a blocking select() to wait for socket state changes
|
||
|
(or timeouts) if no data available to process. This is the main
|
||
|
routine for the scanning loop. */
|
||
|
|
||
|
u32 next_from_queue(void) {
|
||
|
|
||
|
u32 cur_time = time(0);
|
||
|
|
||
|
if (conn_cur) {
|
||
|
static struct pollfd* p;
|
||
|
struct conn_entry* c = conn;
|
||
|
u32 i = 0;
|
||
|
|
||
|
/* First, go through all connections, handle connects, SSL handshakes, data
|
||
|
reads and writes, and exceptions. */
|
||
|
|
||
|
if (p) free(p);
|
||
|
p = __DFL_ck_alloc(sizeof(struct pollfd) * conn_cur);
|
||
|
|
||
|
while (c) {
|
||
|
p[i].fd = c->fd;
|
||
|
p[i].events = POLLIN | POLLERR | POLLHUP;
|
||
|
if (c->write_len - c->write_off || c->SSL_rd_w_wr)
|
||
|
p[i].events |= POLLOUT;
|
||
|
c = c->next;
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
poll(p, conn_cur, 100);
|
||
|
|
||
|
c = conn;
|
||
|
|
||
|
for (i=0;i<conn_cur;i++) {
|
||
|
|
||
|
struct conn_entry* next = c->next;
|
||
|
|
||
|
/* Connection closed: see if we have any pending data to write. If yes,
|
||
|
fail. If not, try parse_response() to see if we have all the data.
|
||
|
Clean up. */
|
||
|
|
||
|
if (p[i].revents & (POLLERR|POLLHUP)) {
|
||
|
|
||
|
u8 keep;
|
||
|
|
||
|
network_error:
|
||
|
|
||
|
keep = 0;
|
||
|
|
||
|
/* Retry requests that were sent on old keep-alive connections
|
||
|
and failed instantly with no data read; might be just that
|
||
|
the server got bored. */
|
||
|
|
||
|
if (c->q && c->reused && !c->read_len) {
|
||
|
|
||
|
c->q->res->state = STATE_NOTINIT;
|
||
|
c->q->c = 0;
|
||
|
c->q = 0;
|
||
|
|
||
|
req_retried++;
|
||
|
|
||
|
} else if (c->q) {
|
||
|
|
||
|
if (c->write_len - c->write_off || !c->read_len) {
|
||
|
c->q->res->state = STATE_CONNERR;
|
||
|
keep = c->q->req->callback(c->q->req, c->q->res);
|
||
|
req_errors_net++;
|
||
|
req_errors_cur++;
|
||
|
} else {
|
||
|
if (parse_response(c->q->req, c->q->res, c->read_buf,
|
||
|
c->read_len, 0) != 2) {
|
||
|
c->q->res->state = STATE_OK;
|
||
|
keep = c->q->req->callback(c->q->req, c->q->res);
|
||
|
if (req_errors_cur <= max_fail)
|
||
|
req_errors_cur = 0;
|
||
|
} else {
|
||
|
c->q->res->state = STATE_CONNERR;
|
||
|
keep = c->q->req->callback(c->q->req, c->q->res);
|
||
|
req_errors_net++;
|
||
|
req_errors_cur++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
destroy_unlink_conn(c, keep);
|
||
|
|
||
|
} else
|
||
|
|
||
|
/* Incoming data (when SSL_write() did not request a read) or
|
||
|
continuation of SSL_read() possible (if SSL_read() wanted to write).
|
||
|
Process data, call parse_response() to see if w have all we wanted.
|
||
|
Update event timers. */
|
||
|
|
||
|
if (((p[i].revents & POLLIN) && !c->SSL_wr_w_rd) ||
|
||
|
((p[i].revents & POLLOUT) && c->SSL_rd_w_wr)) {
|
||
|
|
||
|
if (c->q) {
|
||
|
s32 read_res;
|
||
|
u8 p_ret;
|
||
|
|
||
|
c->read_buf = ck_realloc(c->read_buf, c->read_len + READ_CHUNK + 1);
|
||
|
|
||
|
if (c->proto == PROTO_HTTPS) {
|
||
|
s32 ssl_err;
|
||
|
|
||
|
c->SSL_rd_w_wr = 0;
|
||
|
|
||
|
read_res = SSL_read(c->srv_ssl, c->read_buf + c->read_len,
|
||
|
READ_CHUNK);
|
||
|
|
||
|
if (!read_res) goto network_error;
|
||
|
|
||
|
if (read_res < 0) {
|
||
|
ssl_err = SSL_get_error(c->srv_ssl, read_res);
|
||
|
if (ssl_err == SSL_ERROR_WANT_WRITE) c->SSL_rd_w_wr = 1;
|
||
|
else if (ssl_err != SSL_ERROR_WANT_READ) goto network_error;
|
||
|
read_res = 0;
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
read_res = read(c->fd, c->read_buf + c->read_len, READ_CHUNK);
|
||
|
if (read_res <= 0) goto network_error;
|
||
|
}
|
||
|
|
||
|
bytes_recv += read_res;
|
||
|
|
||
|
c->read_len += read_res;
|
||
|
c->read_buf = ck_realloc(c->read_buf, c->read_len + 1);
|
||
|
|
||
|
c->read_buf[c->read_len] = 0; /* NUL-terminate for sanity. */
|
||
|
|
||
|
/* We force final parse_response() if response length exceeded
|
||
|
size_limit by more than 4 kB. The assumption here is that
|
||
|
it is less expensive to redo the connection than it is
|
||
|
to continue receiving an unknown amount of extra data. */
|
||
|
|
||
|
p_ret = parse_response(c->q->req, c->q->res, c->read_buf, c->read_len,
|
||
|
(c->read_len > (size_limit + READ_CHUNK)) ? 0 : 1);
|
||
|
|
||
|
if (!p_ret || p_ret == 3) {
|
||
|
|
||
|
u8 keep;
|
||
|
|
||
|
c->q->res->state = STATE_OK;
|
||
|
keep = c->q->req->callback(c->q->req, c->q->res);
|
||
|
|
||
|
/* If we got all data without hitting the limit, and if
|
||
|
"Connection: close" is not indicated, we might want
|
||
|
to keep the connection for future use. */
|
||
|
|
||
|
if (c->read_len > (size_limit + READ_CHUNK) || p_ret)
|
||
|
destroy_unlink_conn(c, keep); else reuse_conn(c, keep);
|
||
|
|
||
|
if (req_errors_cur <= max_fail)
|
||
|
req_errors_cur = 0;
|
||
|
|
||
|
} else if (p_ret == 2) {
|
||
|
c->q->res->state = STATE_RESPERR;
|
||
|
destroy_unlink_conn(c, c->q->req->callback(c->q->req, c->q->res));
|
||
|
req_errors_http++;
|
||
|
req_errors_cur++;
|
||
|
} else {
|
||
|
c->last_rw = cur_time;
|
||
|
c->q->res->state = STATE_RECEIVE;
|
||
|
}
|
||
|
|
||
|
} else destroy_unlink_conn(c, 0); /* Unsolicited response! */
|
||
|
|
||
|
} else
|
||
|
|
||
|
/* Write possible (if SSL_read() did not request a write), or
|
||
|
continuation of SSL_write() possible (if SSL_write() wanted to
|
||
|
read). Send data, update timers, etc. */
|
||
|
|
||
|
if (((p[i].revents & POLLOUT) && !c->SSL_rd_w_wr) ||
|
||
|
((p[i].revents & POLLIN) && c->SSL_wr_w_rd)) {
|
||
|
|
||
|
if (c->write_len - c->write_off) {
|
||
|
s32 write_res;
|
||
|
|
||
|
if (c->proto == PROTO_HTTPS) {
|
||
|
s32 ssl_err;
|
||
|
|
||
|
c->SSL_wr_w_rd = 0;
|
||
|
|
||
|
write_res = SSL_write(c->srv_ssl, c->write_buf + c->write_off,
|
||
|
c->write_len - c->write_off);
|
||
|
|
||
|
if (!write_res) goto network_error;
|
||
|
|
||
|
if (write_res < 0) {
|
||
|
ssl_err = SSL_get_error(c->srv_ssl, write_res);
|
||
|
if (ssl_err == SSL_ERROR_WANT_READ) c->SSL_wr_w_rd = 1;
|
||
|
else if (ssl_err != SSL_ERROR_WANT_WRITE) goto network_error;
|
||
|
write_res = 0;
|
||
|
} else if (!c->ssl_checked) check_ssl(c);
|
||
|
|
||
|
} else {
|
||
|
write_res = write(c->fd, c->write_buf + c->write_off,
|
||
|
c->write_len - c->write_off);
|
||
|
if (write_res <= 0) goto network_error;
|
||
|
}
|
||
|
|
||
|
bytes_sent += write_res;
|
||
|
|
||
|
c->write_off += write_res;
|
||
|
|
||
|
c->q->res->state = STATE_SEND;
|
||
|
|
||
|
c->last_rw = cur_time;
|
||
|
|
||
|
}
|
||
|
|
||
|
} else
|
||
|
|
||
|
/* Nothing happened. Check timeouts, kill stale connections.
|
||
|
Active (c->q) connections get checked for total and last I/O
|
||
|
timeouts. Non-active connctions must just not exceed
|
||
|
idle_tmout. */
|
||
|
|
||
|
if (!p[i].revents) {
|
||
|
|
||
|
u8 keep = 0;
|
||
|
|
||
|
if ((c->q && (cur_time - c->last_rw > rw_tmout ||
|
||
|
cur_time - c->req_start > resp_tmout)) ||
|
||
|
(!c->q && (cur_time - c->last_rw > idle_tmout)) ||
|
||
|
(!c->q && tear_down_idle)) {
|
||
|
|
||
|
if (c->q) {
|
||
|
c->q->res->state = STATE_CONNERR;
|
||
|
keep = c->q->req->callback(c->q->req, c->q->res);
|
||
|
req_errors_net++;
|
||
|
req_errors_cur++;
|
||
|
conn_busy_tmout++;
|
||
|
} else {
|
||
|
conn_idle_tmout++;
|
||
|
tear_down_idle = 0;
|
||
|
}
|
||
|
|
||
|
destroy_unlink_conn(c, keep);
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
c = next;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* OK, connection-handling affairs taken care of! Next, let's go through all
|
||
|
queue entries NOT currently associated with a connection, and try to
|
||
|
pair them up with something. */
|
||
|
|
||
|
if (queue_cur) {
|
||
|
struct queue_entry *q = queue;
|
||
|
|
||
|
while (q) {
|
||
|
struct queue_entry* next = q->next;
|
||
|
u32 to_host = 0;
|
||
|
|
||
|
if (!q->c) {
|
||
|
|
||
|
struct conn_entry* c = conn;
|
||
|
|
||
|
/* Let's try to find a matching, idle connection first. */
|
||
|
|
||
|
while (c) {
|
||
|
struct conn_entry* cnext = c->next;
|
||
|
|
||
|
if (c->addr == q->req->addr && (++to_host) &&
|
||
|
c->port == q->req->port &&
|
||
|
c->proto == q->req->proto && !c->q) {
|
||
|
conn_associate(c, q);
|
||
|
goto next_q_entry;
|
||
|
}
|
||
|
|
||
|
c = cnext;
|
||
|
}
|
||
|
|
||
|
/* No match. If we are out of slots, request some other idle
|
||
|
connection to be nuked soon. */
|
||
|
|
||
|
if (to_host < max_conn_host && conn_cur < max_connections) {
|
||
|
conn_associate(0, q);
|
||
|
goto next_q_entry;
|
||
|
} else tear_down_idle = 1;
|
||
|
|
||
|
}
|
||
|
|
||
|
next_q_entry:
|
||
|
|
||
|
q = next;
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
return queue_cur;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Helper function for request / response dumpers: */
|
||
|
static void dump_params(struct param_array* par) {
|
||
|
u32 i;
|
||
|
|
||
|
for (i=0;i<par->c;i++) {
|
||
|
|
||
|
switch (par->t[i]) {
|
||
|
case PARAM_NONE: SAY(" <<<<"); break;
|
||
|
case PARAM_PATH: SAY(" PATH"); break;
|
||
|
case PARAM_PATH_S: SAY(" PT_S"); break;
|
||
|
case PARAM_PATH_C: SAY(" PT_C"); break;
|
||
|
case PARAM_PATH_E: SAY(" PT_E"); break;
|
||
|
case PARAM_PATH_D: SAY(" PT_D"); break;
|
||
|
case PARAM_QUERY: SAY(" QUER"); break;
|
||
|
case PARAM_QUERY_S: SAY(" QR_S"); break;
|
||
|
case PARAM_QUERY_C: SAY(" QR_C"); break;
|
||
|
case PARAM_QUERY_E: SAY(" QR_E"); break;
|
||
|
case PARAM_QUERY_D: SAY(" QR_D"); break;
|
||
|
case PARAM_POST: SAY(" POST"); break;
|
||
|
case PARAM_POST_F: SAY(" FILE"); break;
|
||
|
case PARAM_POST_O: SAY(" OPAQ"); break;
|
||
|
case PARAM_HEADER: SAY(" head"); break;
|
||
|
case PARAM_COOKIE: SAY(" cook"); break;
|
||
|
default: SAY(" ????");
|
||
|
}
|
||
|
|
||
|
SAY(":%-20s = '%s'\n",
|
||
|
par->n[i] ? par->n[i] : (u8*)"-",
|
||
|
par->v[i] ? par->v[i] : (u8*)"-");
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Creates a working copy of a request. If all is 0, does not copy
|
||
|
path, query parameters, or POST data (but still copies headers). */
|
||
|
|
||
|
struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv,
|
||
|
u8 all) {
|
||
|
struct http_request* ret;
|
||
|
u32 i;
|
||
|
|
||
|
if (!req) return NULL;
|
||
|
|
||
|
ret = ck_alloc(sizeof(struct http_request));
|
||
|
|
||
|
ret->proto = req->proto;
|
||
|
|
||
|
if (all)
|
||
|
ret->method = ck_strdup(req->method);
|
||
|
else
|
||
|
ret->method = ck_strdup((u8*)"GET");
|
||
|
|
||
|
ret->host = ck_strdup(req->host);
|
||
|
ret->addr = req->addr;
|
||
|
ret->port = req->port;
|
||
|
ret->pivot = pv;
|
||
|
ret->user_val = req->user_val;
|
||
|
|
||
|
/* Copy all the requested data. */
|
||
|
|
||
|
for (i=0;i<req->par.c;i++)
|
||
|
if (all || HEADER_SUBTYPE(req->par.t[i]))
|
||
|
set_value(req->par.t[i], req->par.n[i], req->par.v[i], -1,
|
||
|
&ret->par);
|
||
|
|
||
|
memcpy(&ret->same_sig, &req->same_sig, sizeof(struct http_sig));
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Creates a copy of a response. */
|
||
|
|
||
|
struct http_response* res_copy(struct http_response* res) {
|
||
|
struct http_response* ret;
|
||
|
u32 i;
|
||
|
|
||
|
if (!res) return NULL;
|
||
|
|
||
|
ret = ck_alloc(sizeof(struct http_response));
|
||
|
|
||
|
ret->state = res->state;
|
||
|
ret->code = res->code;
|
||
|
ret->msg = res->msg ? ck_strdup(res->msg) : NULL;
|
||
|
ret->warn = res->warn;
|
||
|
|
||
|
for (i=0;i<res->hdr.c;i++)
|
||
|
set_value(res->hdr.t[i], res->hdr.n[i], res->hdr.v[i], -1, &ret->hdr);
|
||
|
|
||
|
ret->pay_len = res->pay_len;
|
||
|
|
||
|
if (res->pay_len) {
|
||
|
ret->payload = ck_alloc(res->pay_len);
|
||
|
memcpy(ret->payload, res->payload, res->pay_len);
|
||
|
}
|
||
|
|
||
|
memcpy(&ret->sig, &res->sig, sizeof(struct http_sig));
|
||
|
|
||
|
ret->sniff_mime_id = res->sniff_mime_id;
|
||
|
ret->decl_mime_id = res->decl_mime_id;
|
||
|
ret->doc_type = res->doc_type;
|
||
|
ret->css_type = res->css_type;
|
||
|
ret->js_type = res->js_type;
|
||
|
ret->json_safe = res->json_safe;
|
||
|
ret->stuff_checked = res->stuff_checked;
|
||
|
ret->scraped = res->scraped;
|
||
|
|
||
|
if (res->meta_charset)
|
||
|
ret->meta_charset = ck_strdup(res->meta_charset);
|
||
|
|
||
|
if (res->header_charset)
|
||
|
ret->header_charset = ck_strdup(res->header_charset);
|
||
|
|
||
|
if (res->header_mime)
|
||
|
ret->header_mime = ck_strdup(res->header_mime);
|
||
|
|
||
|
ret->sniffed_mime = res->sniffed_mime;
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Dumps HTTP request data, for diagnostic purposes: */
|
||
|
|
||
|
void dump_http_request(struct http_request* r) {
|
||
|
|
||
|
u8 *new_url, *tmp;
|
||
|
|
||
|
SAY("\n== HTTP REQUEST %p ==\n\nBasic values:\n", r);
|
||
|
|
||
|
SAY(" Proto = %u\n", r->proto);
|
||
|
SAY(" Method = %s\n", r->method ? r->method : (u8*)"(GET)");
|
||
|
SAY(" Host = %s\n", r->host);
|
||
|
SAY(" Addr = %u.%u.%u.%u\n", ((u8*)&r->addr)[0], ((u8*)&r->addr)[1],
|
||
|
((u8*)&r->addr)[2], ((u8*)&r->addr)[3]);
|
||
|
SAY(" Port = %d\n", r->port);
|
||
|
SAY(" Xrefs = pivot %p, handler %p, user %d\n", r->pivot,
|
||
|
r->callback, r->user_val);
|
||
|
|
||
|
new_url = serialize_path(r, 1, 0);
|
||
|
|
||
|
SAY("\nURLs:\n Original = %s\n"
|
||
|
" Synthetic = %s\n", r->orig_url ? r->orig_url : (u8*)"[none]",
|
||
|
new_url);
|
||
|
|
||
|
ck_free(new_url);
|
||
|
|
||
|
SAY("\nParameter array:\n");
|
||
|
|
||
|
dump_params(&r->par);
|
||
|
|
||
|
SAY("\nRaw request data:\n\n");
|
||
|
|
||
|
tmp = build_request_data(r);
|
||
|
SAY("%s\n",tmp);
|
||
|
ck_free(tmp);
|
||
|
|
||
|
SAY("\n== END OF REQUEST ==\n");
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Dumps HTTP response data, likewise: */
|
||
|
|
||
|
void dump_http_response(struct http_response* r) {
|
||
|
|
||
|
SAY("\n== HTTP RESPONSE %p ==\n\nBasic values:\n", r);
|
||
|
|
||
|
SAY(" State = %u\n", r->state);
|
||
|
SAY(" Response = %u ('%s')\n", r->code, r->msg);
|
||
|
SAY(" Flags = %08x\n", r->warn);
|
||
|
SAY(" Data len = %u\n", r->pay_len);
|
||
|
|
||
|
SAY("\nParameter array:\n");
|
||
|
|
||
|
dump_params(&r->hdr);
|
||
|
|
||
|
if (r->payload) SAY("\nPayload data (%u):\n\n%s\n", r->pay_len, r->payload);
|
||
|
|
||
|
SAY("\n== END OF RESPONSE ==\n");
|
||
|
|
||
|
}
|
||
|
|
||
|
/* Destroys http state information, for memory profiling. */
|
||
|
|
||
|
void destroy_http() {
|
||
|
u32 i;
|
||
|
struct dns_entry* cur;
|
||
|
|
||
|
for (i=0;i<global_http_par.c;i++) {
|
||
|
ck_free(global_http_par.n[i]);
|
||
|
ck_free(global_http_par.v[i]);
|
||
|
}
|
||
|
|
||
|
ck_free(global_http_par.t);
|
||
|
ck_free(global_http_par.n);
|
||
|
ck_free(global_http_par.v);
|
||
|
|
||
|
while (conn) destroy_unlink_conn(conn,0 );
|
||
|
while (queue) destroy_unlink_queue(queue,0 );
|
||
|
|
||
|
cur = dns;
|
||
|
|
||
|
while (cur) {
|
||
|
struct dns_entry* next = cur->next;
|
||
|
ck_free(cur->name);
|
||
|
ck_free(cur);
|
||
|
cur = next;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Shows some pretty statistics. */
|
||
|
|
||
|
void http_stats(u64 st_time) {
|
||
|
u64 en_time;
|
||
|
struct timeval tv;
|
||
|
|
||
|
gettimeofday(&tv, NULL);
|
||
|
en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
|
||
|
|
||
|
SAY("Scan statistics\n"
|
||
|
"---------------\n\n"
|
||
|
cGRA " Scan time : " cNOR "%u:%02u:%02u.%04u\n"
|
||
|
cGRA " HTTP requests : " cNOR "%u sent (%.02f/s), %.02f kB in, "
|
||
|
"%.02f kB out (%.02f kB/s) \n"
|
||
|
cGRA " Compression : " cNOR "%.02f kB in, %.02f kB out "
|
||
|
"(%.02f%% gain) \n"
|
||
|
cGRA " HTTP exceptions : " cNOR "%u net errors, %u proto errors, "
|
||
|
"%u retried, %u drops\n"
|
||
|
cGRA " TCP connections : " cNOR "%u total (%.02f req/conn) \n"
|
||
|
cGRA " TCP exceptions : " cNOR "%u failures, %u timeouts, %u purged\n"
|
||
|
cGRA " External links : " cNOR "%u skipped\n"
|
||
|
cGRA " Reqs pending : " cNOR "%u \n",
|
||
|
|
||
|
/* hrs */ (u32)((en_time - st_time) / 1000 / 60 / 60),
|
||
|
/* min */ (u32)((en_time - st_time) / 1000 / 60) % 60,
|
||
|
/* sec */ (u32)((en_time - st_time) / 1000) % 60,
|
||
|
/* ms */ (u32)((en_time - st_time) % 1000),
|
||
|
|
||
|
req_count - queue_cur,
|
||
|
(float) (req_count - queue_cur / 1.15) * 1000 / (en_time - st_time + 1),
|
||
|
(float) bytes_recv / 1024, (float) bytes_sent / 1024,
|
||
|
(float) (bytes_recv + bytes_sent) / 1.024 / (en_time - st_time + 1),
|
||
|
|
||
|
(float) bytes_deflated / 1024, (float) bytes_inflated / 1024,
|
||
|
((float) bytes_inflated - bytes_deflated) / (bytes_inflated +
|
||
|
bytes_deflated + 1) * 100,
|
||
|
|
||
|
req_errors_net, req_errors_http, req_retried, req_dropped,
|
||
|
|
||
|
conn_count, (float) req_count / conn_count,
|
||
|
conn_failed, conn_busy_tmout, conn_idle_tmout,
|
||
|
url_scope, queue_cur);
|
||
|
}
|