1.53b-1.54b: Improved loop derector and JSON discriminator

- Improved loop detector on mappings that only look at the last path segment.
- Slight improvement to JSON discriminator.
This commit is contained in:
Steve Pinkham 2010-08-09 10:49:43 -04:00
parent c4ad54fe2f
commit 701f665ab9
7 changed files with 114 additions and 15 deletions

View File

@ -1,3 +1,13 @@
Version 1.54b:
--------------
- Improved loop detector on mappings that only look at the last path segment.
Version 1.53b:
--------------
- Slight improvement to JSON discriminator.
Version 1.52b:
--------------

View File

@ -20,7 +20,7 @@
#
PROGNAME = skipfish
VERSION = 1.52b
VERSION = 1.54b
OBJFILES = http_client.c database.c crawler.c analysis.c report.c
INCFILES = alloc-inl.h string-inl.h debug.h types.h http_client.h \

View File

@ -125,6 +125,7 @@ static void test_add_link(u8* str, struct http_request* ref,
if (!strncasecmp((char*)str, "skipfish:", 10) ||
!strncasecmp((char*)str, "//skipfish.invalid/", 20) ||
inl_strcasestr(str, (u8*) "/" BOGUS_FILE) ||
!strncasecmp((char*)str, "http://skipfish.invalid/", 25)) return;
/* Don't add links that look like they came from JS code with fragmented HTML
@ -646,9 +647,11 @@ void scrape_response(struct http_request* req, struct http_response* res) {
res->scraped = 1;
/* Do not scrape pages that are identical to their parent. */
/* Do not scrape pages that are identical to their parent, or are parented
by suspicious locations. */
if (RPAR(req)->res && same_page(&res->sig, &RPAR(req)->res->sig)) {
if (RPAR(req)->res && (same_page(&res->sig, &RPAR(req)->res->sig) ||
RPAR(req)->bad_parent)) {
DEBUG("* Not extracting links because page looks the same as parent.\n");
return;
}
@ -1536,11 +1539,18 @@ void content_checks(struct http_request* req, struct http_response* res) {
if (is_javascript(res) || is_css(res)) check_js_xss(req, res, res->payload);
/* Responses that do not contain the term "function" are much more likely to
be dynamic JSON than just static scripts. Let's try to highlight these. */
/* Responses that do not contain the term "function", "if", "for", "while", etc,
are much more likely to be dynamic JSON than just static scripts. Let's
try to highlight these. */
if (is_javascript(res) && !res->json_safe &&
(!req->method || !strcmp((char*)req->method, "GET")) &&
!strstr((char*)res->payload, "if (") &&
!strstr((char*)res->payload, "if(") &&
!strstr((char*)res->payload, "for (") &&
!strstr((char*)res->payload, "for(") &&
!strstr((char*)res->payload, "while (") &&
!strstr((char*)res->payload, "while(") &&
!strstr((char*)res->payload, "function ") &&
!strstr((char*)res->payload, "function("))
problem(PROB_JS_XSSI, req, res, NULL, req->pivot, 0);

View File

@ -262,10 +262,11 @@ var issue_desc= {
"20101": "Resource fetch failed",
"20102": "Limits exceeded, fetch suppressed",
"20201": "Behavior checks failed",
"20202": "IPS filtering enabled",
"20203": "IPS filtering disabled again",
"20204": "Response varies randomly, skipping checks",
"20201": "Directory behavior checks failed (no brute force)",
"20202": "Parent behavior checks failed (no brute force)",
"20203": "IPS filtering enabled",
"20204": "IPS filtering disabled again",
"20205": "Response varies randomly, skipping checks",
"20301": "Node should be a directory, detection error?",
"30101": "HTTP credentials seen in URLs",

View File

@ -201,6 +201,7 @@ static void destroy_misc_data(struct pivot_desc* pv,
*/
static u8 dir_404_callback(struct http_request*, struct http_response*);
static u8 dir_parent_callback(struct http_request*, struct http_response*);
static u8 dir_ips_callback(struct http_request*, struct http_response*);
static void inject_init(struct pivot_desc*);
static void inject_init2(struct pivot_desc*);
@ -1442,7 +1443,7 @@ static void end_injection_checks(struct pivot_desc* pv) {
if (url_allowed(pv->req) && !pv->res_varies) {
if (pv->r404_cnt) {
if (pv->r404_cnt && !pv->bad_parent) {
pv->state = PSTATE_CHILD_DICT;
pv->cur_key = 0;
crawl_dir_dict_init(pv);
@ -2247,6 +2248,7 @@ static u8 dir_404_callback(struct http_request* req,
struct http_request* n;
u32 i;
s32 ppval = -1, pval = -1, val = -1;
DEBUG_CALLBACK(req, res);
@ -2441,10 +2443,82 @@ bad_404:
404 signatures largely eliminates the need for BH_COUNT identical probes
to confirm sane behavior here. */
/* The next probe is checking if /foo/current_path/ returns the same
response as /bar/current_path/. If yes, then the directory probably
should not be fuzzed. */
req->pivot->state = PSTATE_PARENT_CHECK;
n = req_copy(RPREQ(req), req->pivot, 1);
n->callback = dir_parent_callback;
n->user_val = 0;
/* Last path element is /; previous path element is current dir name;
previous previous element is parent dir name. Find and replace it. */
for (i=0;i<n->par.c;i++) {
if (PATH_SUBTYPE(n->par.t[i])) {
ppval = pval;
pval = val;
val = i;
}
}
if (ppval != -1 && req->pivot->r404_cnt) {
ck_free(n->par.v[ppval]);
n->par.v[ppval] = ck_strdup((u8*)BOGUS_FILE);
async_request(n);
} else {
/* Top-level dir - nothing to replace. Do a dummy call to
dir_parent_callback() to proceed directly to IPS checks. */
n->user_val = 1;
dir_parent_callback(n, res);
destroy_request(n);
}
return 0;
}
/* STAGE 3: Called to verify that changing parent path element has an effect, once. */
static u8 dir_parent_callback(struct http_request* req,
struct http_response* res) {
struct http_request* n;
DEBUG_CALLBACK(req, res);
if (req->user_val || req->pivot->r404_skip) {
DEBUG("* Check not carried out (non-existent / bad parent).\n");
goto schedule_next;
}
if (FETCH_FAIL(res)) {
handle_error(req, res, (u8*)"during parent checks", 0);
goto schedule_next;
}
if (same_page(&res->sig, &RPRES(req)->sig)) {
problem(PROB_PARENT_FAIL, req, res, 0, req->pivot, 0);
DEBUG("* Parent may be bogus, skipping.\n");
req->pivot->bad_parent = 1;
} else {
DEBUG("* Parent behaves OK.\n");
}
/* Regardless of the outcome, let's schedule a final IPS check. Theoretically,
a single request would be fine; but some servers, such as gws, tend
to respond to /?foo very differently than to /. */
schedule_next:
req->pivot->state = PSTATE_IPS_CHECK;
n = req_copy(RPREQ(req), req->pivot, 1);
@ -2464,7 +2538,7 @@ bad_404:
}
/* STAGE 3: Called on IPS check, twice. */
/* STAGE 4: Called on IPS check, twice. */
static u8 dir_ips_callback(struct http_request* req,
struct http_response* res) {

View File

@ -1213,6 +1213,7 @@ void dump_pivots(struct pivot_desc* cur, u8 nest) {
case PSTATE_FETCH: SAY("PSTATE_FETCH\n"); break;
case PSTATE_TYPE_CHECK: SAY("PSTATE_TYPE_CHECK\n"); break;
case PSTATE_404_CHECK: SAY("PSTATE_404_CHECK\n"); break;
case PSTATE_PARENT_CHECK: SAY("PSTATE_PARENT_CHECK\n"); break;
case PSTATE_IPS_CHECK: SAY("PSTATE_IPS_CHECK\n"); break;
case PSTATE_CHILD_INJECT: SAY("PSTATE_CHILD_INJECT\n"); break;
case PSTATE_CHILD_DICT: SAY("PSTATE_CHILD_DICT\n"); break;

View File

@ -55,7 +55,8 @@
#define PSTATE_TYPE_CHECK 20 /* Type check (unknown only) */
#define PSTATE_404_CHECK 22 /* 404 check (dir only) */
#define PSTATE_IPS_CHECK 25 /* IPS filtering check */
#define PSTATE_PARENT_CHECK 24 /* Parent check (dir only) */
#define PSTATE_IPS_CHECK 26 /* IPS filtering check */
/* For directories only (injecting children nodes): */
@ -102,6 +103,7 @@ struct pivot_desc {
struct http_response* res; /* HTTP response seen */
u8 res_varies; /* Response varies? */
u8 bad_parent; /* Parent is well-behaved? */
/* Fuzzer and probe state data: */
@ -220,9 +222,10 @@ u8 is_c_sens(struct pivot_desc* pv);
#define PROB_LIMITS 20102 /* Crawl limits exceeded. */
#define PROB_404_FAIL 20201 /* Behavior probe failed. */
#define PROB_IPS_FILTER 20202 /* IPS behavior detected. */
#define PROB_IPS_FILTER_OFF 20203 /* IPS no longer active. */
#define PROB_VARIES 20204 /* Response varies. */
#define PROB_PARENT_FAIL 20202 /* Parent behavior problem */
#define PROB_IPS_FILTER 20203 /* IPS behavior detected. */
#define PROB_IPS_FILTER_OFF 20204 /* IPS no longer active. */
#define PROB_VARIES 20205 /* Response varies. */
#define PROB_NOT_DIR 20301 /* Node should be a dir. */