skipfish/http_client.h

419 lines
14 KiB
C

/*
skipfish - high-performance, single-process asynchronous HTTP client
--------------------------------------------------------------------
Author: Michal Zalewski <lcamtuf@google.com>
Copyright 2009, 2010 by Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifndef _HAVE_HTTP_CLIENT_H
#define _HAVE_HTTP_CLIENT_H
#include <openssl/ssl.h>
#include "config.h"
#include "types.h"
#include "alloc-inl.h"
#include "string-inl.h"
/* Generic type-name-value array, used for HTTP headers, etc: */
struct param_array {
u8* t; /* Type */
u8** n; /* Name */
u8** v; /* Value */
u32 c; /* Count */
};
/* Flags for http_request protocol: */
#define PROTO_NONE 0 /* Illegal value */
#define PROTO_HTTP 1 /* Plain-text HTTP */
#define PROTO_HTTPS 2 /* TLS/SSL wrapper */
/* Flags for http_request parameter list entries: */
#define PARAM_NONE 0 /* Empty parameter slot */
#define PARAM_PATH 10 /* Path or parametrized path */
#define PARAM_PATH_S 11 /* - Semicolon element */
#define PARAM_PATH_C 12 /* - Comma element */
#define PARAM_PATH_E 13 /* - Exclamation mark element */
#define PARAM_PATH_D 14 /* - Dollar sign element */
#define PATH_SUBTYPE(_x) ((_x) >= PARAM_PATH && (_x) < PARAM_QUERY)
#define PARAM_QUERY 20 /* Query parameter */
#define PARAM_QUERY_S 21 /* - Semicolon element */
#define PARAM_QUERY_C 22 /* - Comma element */
#define PARAM_QUERY_E 23 /* - Exclamation mark element */
#define PARAM_QUERY_D 24 /* - Dollar sign element */
#define QUERY_SUBTYPE(_x) ((_x) >= PARAM_QUERY && (_x) < PARAM_POST)
#define PARAM_POST 50 /* Post parameter */
#define PARAM_POST_F 51 /* - File field */
#define PARAM_POST_O 52 /* - Non-standard (e.g., JSON) */
#define POST_SUBTYPE(_x) ((_x) >= PARAM_POST && (_x) < PARAM_HEADER)
#define PARAM_HEADER 100 /* Generic HTTP header */
#define PARAM_COOKIE 101 /* - HTTP cookie */
#define HEADER_SUBTYPE(_x) ((_x) >= PARAM_HEADER)
struct http_response;
struct queue_entry;
/* HTTP response signature. */
struct http_sig {
u32 code; /* HTTP response code */
u32 data[FP_SIZE]; /* Response fingerprint data */
};
/* HTTP request descriptor: */
struct http_request {
u8 proto; /* Protocol (PROTO_*) */
u8* method; /* HTTP method (GET, POST, ...) */
u8* host; /* Host name */
u32 addr; /* Resolved IP address */
u16 port; /* Port number to connect to */
u8* orig_url; /* Copy of the original URL */
struct param_array par; /* Parameters, headers, cookies */
struct pivot_desc *pivot; /* Pivot descriptor */
u32 user_val; /* Can be used freely */
u8 (*callback)(struct http_request*, struct http_response*);
/* Callback to invoke when done */
struct http_sig same_sig; /* Used by secondary ext fuzz. */
};
/* Flags for http_response completion state: */
#define STATE_NOTINIT 0 /* Request not sent */
#define STATE_CONNECT 1 /* Connecting... */
#define STATE_SEND 2 /* Sending request */
#define STATE_RECEIVE 3 /* Waiting for response */
#define STATE_OK 100 /* Proper fetch */
#define STATE_DNSERR 101 /* DNS error */
#define STATE_LOCALERR 102 /* Socket or routing error */
#define STATE_CONNERR 103 /* Connection failed */
#define STATE_RESPERR 104 /* Response not valid */
#define STATE_SUPPRESS 200 /* Dropped (limits / errors) */
/* Flags for http_response warnings: */
#define WARN_NONE 0 /* No warnings */
#define WARN_PARTIAL 1 /* Incomplete read */
#define WARN_TRAIL 2 /* Trailing request garbage */
#define WARN_CFL_HDR 4 /* Conflicting headers */
/* HTTP response descriptor: */
struct http_response {
u32 state; /* HTTP convo state (STATE_*) */
u32 code; /* HTTP response code */
u8* msg; /* HTTP response message */
u32 warn; /* Warning flags */
u8 cookies_set; /* Sets cookies? */
struct param_array hdr; /* Server header, cookie list */
u32 pay_len; /* Response payload length */
u8* payload; /* Response payload data */
struct http_sig sig; /* Response signature data */
/* Various information populated by content checks: */
u8 sniff_mime_id; /* Sniffed MIME (MIME_*) */
u8 decl_mime_id; /* Declared MIME (MIME_*) */
u8* meta_charset; /* META tag charset value */
u8* header_charset; /* Content-Type charset value */
u8* header_mime; /* Content-Type MIME type */
u8* sniffed_mime; /* Detected MIME type (ref) */
/* Everything below is of interest to scrape_response() only: */
u8 doc_type; /* 0 - tbd, 1 - bin, 2 - ascii */
u8 css_type; /* 0 - tbd, 1 - other, 2 - css */
u8 js_type; /* 0 - tbd, 1 - other, 2 - js */
u8 json_safe; /* 0 - no, 1 - yes */
u8 stuff_checked; /* check_stuff() called? */
u8 scraped; /* scrape_response() called? */
};
/* Open keep-alive connection descriptor: */
struct conn_entry {
s32 fd; /* The actual file descriptor */
u8 proto; /* Protocol (PROTO_*) */
u32 addr; /* Destination IP */
u32 port; /* Destination port */
u8 reused; /* Used for earier requests? */
u32 req_start; /* Unix time: request start */
u32 last_rw; /* Unix time: last read / write */
SSL_CTX *srv_ctx; /* SSL context */
SSL *srv_ssl;
u8 SSL_rd_w_wr; /* SSL_read() wants to write? */
u8 SSL_wr_w_rd; /* SSL_write() wants to read? */
u8 ssl_checked; /* SSL state checked? */
u8* read_buf; /* Current read buffer */
u32 read_len;
u8* write_buf; /* Pending write buffer */
u32 write_off; /* Current write offset */
u32 write_len;
struct queue_entry* q; /* Current queue entry */
struct conn_entry* prev; /* Previous connection entry */
struct conn_entry* next; /* Next connection entry */
};
/* Request queue descriptor: */
struct queue_entry {
struct http_request* req; /* Request descriptor */
struct http_response* res; /* Response descriptor */
struct conn_entry* c; /* Connection currently used */
struct queue_entry* prev; /* Previous queue entry */
struct queue_entry* next; /* Next queue entry */
};
/* DNS cache item: */
struct dns_entry {
u8* name; /* Name requested */
u32 addr; /* IP address (0 = bad host) */
struct dns_entry* next; /* Next cache entry */
};
/* Simplified macros to manipulate param_arrays: */
#define ADD(_ar,_t,_n,_v) do { \
u32 _cur = (_ar)->c++; \
(_ar)->t = ck_realloc((_ar)->t, (_ar)->c); \
(_ar)->n = ck_realloc((_ar)->n, (_ar)->c * sizeof(u8*)); \
(_ar)->v = ck_realloc((_ar)->v, (_ar)->c * sizeof(u8*)); \
(_ar)->t[cur] = _t; \
(_ar)->n[cur] = (_n) ? ck_strdup(_n) : 0; \
(_ar)->v[cur] = (_v) ? ck_strdup(_v) : 0; \
} while (0)
#define FREE(_ar) do { \
while ((_ar)->c--) { \
free((_ar)->n[(_ar)->c]); \
free((_ar)->v[(_ar)->c]); \
} \
free((_ar)->t); \
free((_ar)->n); \
free((_ar)->v); \
} while (0)
/* Extracts parameter value from param_array. Name is matched if
non-NULL. Returns pointer to value data, not a duplicate string;
NULL if no match found. */
u8* get_value(u8 type, u8* name, u32 offset, struct param_array* par);
/* Inserts or overwrites parameter value in param_array. If offset
== -1, will append parameter to list. Duplicates strings,
name and val can be NULL. */
void set_value(u8 type, u8* name, u8* val, s32 offset, struct param_array* par);
/* Simplified macros for value table access: */
#define GET_HDR(_name, _p) get_value(PARAM_HEADER, _name, 0, _p)
#define SET_HDR(_name, _val, _p) set_value(PARAM_HEADER, _name, _val, -1, _p)
#define GET_CK(_name, _p) get_value(PARAM_COOKIE, _name, 0, _p)
#define SET_CK(_name, _val, _p) set_value(PARAM_COOKIE, _name, _val, 0, _p)
void tokenize_path(u8* str, struct http_request* req, u8 add_slash);
/* Convert a fully-qualified or relative URL string to a proper http_request
representation. Returns 0 on success, 1 on format error. */
u8 parse_url(u8* url, struct http_request* req, struct http_request* ref);
/* URL-decodes a string. 'Plus' parameter governs the behavior on +
signs (as they have a special meaning only in query params, not in path). */
u8* url_decode_token(u8* str, u32 len, u8 plus);
/* URL-encodes a string according to custom rules. The assumption here is that
the data is already tokenized as "special" boundaries such as ?, =, &, /,
;, so these characters must always be escaped if present in tokens. We
otherwise let pretty much everything else go through, as it may help with
the exploitation of certain vulnerabilities. */
u8* url_encode_token(u8* str, u32 len);
/* Reconstructs URI from http_request data. Includes protocol and host
if with_host is non-zero. */
u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post);
/* Looks up IP for a particular host, returns data in network order.
Uses standard resolver, so it is slow and blocking, but we only
expect to call it a couple of times. */
u32 maybe_lookup_host(u8* name);
/* Creates an ad hoc DNS cache entry, to override NS lookups. */
void fake_host(u8* name, u32 addr);
/* Schedules a new asynchronous request; req->callback() will be invoked when
the request is completed. */
void async_request(struct http_request* req);
/* Prepares a serialized HTTP buffer to be sent over the network. */
u8* build_request_data(struct http_request* req);
/* Parses a network buffer containing raw HTTP response received over the
network ('more' == the socket is still available for reading). Returns 0
if response parses OK, 1 if more data should be read from the socket,
2 if the response seems invalid. */
u8 parse_response(struct http_request* req, struct http_response* res, u8* data,
u32 data_len, u8 more);
/* Processes the queue. Returns the number of queue entries remaining,
0 if none. Will do a blocking select() to wait for socket state changes
(or timeouts) if no data available to process. This is the main
routine for the scanning loop. */
u32 next_from_queue(void);
/* Dumps HTTP request stats, for debugging purposes: */
void dump_http_request(struct http_request* r);
/* Dumps HTTP response stats, for debugging purposes: */
void dump_http_response(struct http_response* r);
/* Fingerprints a response: */
void fprint_response(struct http_response* res);
/* Performs a deep free() of sturct http_request */
void destroy_request(struct http_request* req);
/* Performs a deep free() of sturct http_response */
void destroy_response(struct http_response* res);
/* Creates a working copy of a request. If all is 0, does not copy
path, query parameters, or POST data (but still copies headers). */
struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv,
u8 all);
/* Creates a copy of a response. */
struct http_response* res_copy(struct http_response* res);
/* Various settings and counters exported to other modules: */
extern u32 max_connections,
max_conn_host,
max_requests,
max_fail,
idle_tmout,
resp_tmout,
rw_tmout,
size_limit,
req_errors_net,
req_errors_http,
req_errors_cur,
req_count,
req_dropped,
req_retried,
url_scope,
conn_count,
conn_idle_tmout,
conn_busy_tmout,
conn_failed,
queue_cur;
extern u64 bytes_sent,
bytes_recv,
bytes_deflated,
bytes_inflated;
extern u8 ignore_cookies;
/* Flags for browser type: */
#define BROWSER_FAST 0 /* Minimimal HTTP headers */
#define BROWSER_MSIE 1 /* Try to mimic MSIE */
#define BROWSER_FFOX 2 /* Try to mimic Firefox */
extern u8 browser_type;
/* Flags for authentication type: */
#define AUTH_NONE 0 /* No authentication */
#define AUTH_BASIC 1 /* 'Basic' HTTP auth */
extern u8 auth_type;
extern u8 *auth_user,
*auth_pass;
/* Global HTTP cookies, extra headers: */
extern struct param_array global_http_par;
/* Destroys http state information, for memory profiling. */
void destroy_http();
/* Shows some pretty statistics. */
void http_stats(u64 st_time);
#endif /* !_HAVE_HTTP_CLIENT_H */