1.40b: Command-line option not to descend into 5xx directories.
This commit is contained in:
parent
15c43e8675
commit
2d658f5126
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
Version 1.40b:
|
||||
--------------
|
||||
|
||||
- Command-line option not to descend into 5xx directories.
|
||||
|
||||
Version 1.39b:
|
||||
--------------
|
||||
|
||||
- Option to override 'Range' header from the command line.
|
||||
|
||||
Version 1.38b:
|
||||
--------------
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ u8 no_parse, /* Disable HTML link detection */
|
|||
warn_mixed, /* Warn on mixed content */
|
||||
log_ext_urls, /* Log all external URLs */
|
||||
no_forms, /* Do not submit forms */
|
||||
relaxed_mime, /* Relax about cset / mime */
|
||||
relaxed_mime, /* Relax cset / mime checks */
|
||||
pedantic_cache; /* Match HTTP/1.0 and HTTP/1.1 */
|
||||
|
||||
/* Form autofill hints: */
|
||||
|
@ -1760,7 +1760,7 @@ binary_checks:
|
|||
if ((tmp = GET_HDR((u8*)"Content-Disposition", &res->hdr)) &&
|
||||
inl_strcasestr(tmp, (u8*)"attachment")) return;
|
||||
|
||||
if (!relaxed_mime) {
|
||||
if (relaxed_mime) {
|
||||
|
||||
/* CHECK 5A: Renderable documents that are not CSS or static JS are of
|
||||
particular interest when it comes to MIME / charset mistakes. */
|
||||
|
|
|
@ -31,7 +31,7 @@ extern u8 no_parse, /* Disable HTML link detection */
|
|||
warn_mixed, /* Warn on mixed content */
|
||||
log_ext_urls, /* Log all external URLs */
|
||||
no_forms, /* Do not submit forms */
|
||||
relaxed_mime, /* Relax about cset / mime */
|
||||
relaxed_mime, /* Relax cset / mime checks */
|
||||
pedantic_cache; /* Match HTTP/1.0 and HTTP/1.1 */
|
||||
|
||||
/* Helper macros to group various useful checks: */
|
||||
|
|
2
config.h
2
config.h
|
@ -23,7 +23,7 @@
|
|||
#ifndef _HAVE_CONFIG_H
|
||||
#define _HAVE_CONFIG_H
|
||||
|
||||
#define VERSION "1.38b"
|
||||
#define VERSION "1.40b"
|
||||
|
||||
#define USE_COLOR 1 /* Use terminal colors */
|
||||
|
||||
|
|
13
crawler.c
13
crawler.c
|
@ -34,6 +34,7 @@
|
|||
|
||||
u32 crawl_prob = 100; /* Crawl probability (1-100%) */
|
||||
u8 no_fuzz_ext; /* Don't fuzz extensions for dirs */
|
||||
u8 no_500_dir; /* Don't assume dirs on 500 */
|
||||
|
||||
/*
|
||||
|
||||
|
@ -2799,7 +2800,7 @@ static u8 unknown_check_callback(struct http_request* req,
|
|||
for (i=0;i<par->r404_cnt;i++)
|
||||
if (same_page(&res->sig, &par->r404[i])) break;
|
||||
|
||||
if ((!par && res->code == 404) || (par && i != par->r404_cnt) ||
|
||||
if ((!par && res->code == 404) || (par && i != par->r404_cnt) ||
|
||||
(RPRES(req)->code < 300 && res->code >= 300 && RPRES(req)->pay_len)) {
|
||||
|
||||
req->pivot->type = PIVOT_FILE;
|
||||
|
@ -2808,6 +2809,16 @@ static u8 unknown_check_callback(struct http_request* req,
|
|||
|
||||
assume_dir:
|
||||
|
||||
/* If any of the responses is 500, and the user asked for 500 to
|
||||
be treated specially to work around quirky frameworks,
|
||||
assume file right away. */
|
||||
|
||||
if (no_500_dir && (res->code >= 500 || RPRES(req)->code >= 500)) {
|
||||
DEBUG("Feels like a directory, but assuming file pivot as per -Z flag.\n");
|
||||
req->pivot->type = PIVOT_FILE;
|
||||
goto schedule_next;
|
||||
}
|
||||
|
||||
req->pivot->type = PIVOT_DIR;
|
||||
|
||||
/* Replace original request, response with new data. */
|
||||
|
|
|
@ -30,6 +30,7 @@ extern u32 crawl_prob; /* Crawl probability (1-100%) */
|
|||
extern u8 no_parse, /* Disable HTML link detection */
|
||||
warn_mixed, /* Warn on mixed content? */
|
||||
no_fuzz_ext, /* Don't fuzz ext in dirs? */
|
||||
no_500_dir, /* Don't assume dirs on 500 */
|
||||
log_ext_urls; /* Log external URLs? */
|
||||
|
||||
/* Provisional debugging callback. */
|
||||
|
|
|
@ -908,9 +908,9 @@ u8* build_request_data(struct http_request* req) {
|
|||
|
||||
/* Request a limited range up front to minimize unwanted traffic.
|
||||
Note that some Oracle servers apparently fail on certain ranged
|
||||
requests; maybe do something smarter to detect this? */
|
||||
requests, so allowing -H override seems like a good idea. */
|
||||
|
||||
{
|
||||
if (!GET_HDR((u8*)"Range", &global_http_par)) {
|
||||
u8 limit[32];
|
||||
sprintf((char*)limit, "Range: bytes=0-%u\r\n", size_limit - 1);
|
||||
ASD(limit);
|
||||
|
|
|
@ -80,13 +80,14 @@ void usage(char* argv0) {
|
|||
" -K string - do not fuzz parameters named 'string'\n"
|
||||
" -D domain - crawl cross-site links to another domain\n"
|
||||
" -B domain - trust, but do not crawl, another domain\n"
|
||||
" -Z - do not descend into 5xx locations\n"
|
||||
" -O - do not submit any forms\n"
|
||||
" -P - do not parse HTML, etc, to find new links\n\n"
|
||||
|
||||
"Reporting options:\n\n"
|
||||
|
||||
" -o dir - write output to specified directory (required)\n"
|
||||
" -J - be less noisy about MIME / charset mismatches\n"
|
||||
" -J - be less picky about MIME / charset mismatches\n"
|
||||
" -M - log warnings about mixed content\n"
|
||||
" -E - log all HTTP/1.0 / HTTP/1.1 caching intent mismatches\n"
|
||||
" -U - log all external URLs and e-mails seen\n"
|
||||
|
@ -165,7 +166,7 @@ int main(int argc, char** argv) {
|
|||
SAY("skipfish version " VERSION " by <lcamtuf@google.com>\n");
|
||||
|
||||
while ((opt = getopt(argc, argv,
|
||||
"+A:F:C:H:b:Nd:c:r:p:I:X:S:D:PJOYQMUEK:W:LVT:G:R:B:q:g:m:f:t:w:i:s:o:hu")) > 0)
|
||||
"+A:F:C:H:b:Nd:c:r:p:I:X:S:D:PJOYQMZUEK:W:LVT:G:R:B:q:g:m:f:t:w:i:s:o:hu")) > 0)
|
||||
|
||||
switch (opt) {
|
||||
|
||||
|
@ -388,6 +389,10 @@ int main(int argc, char** argv) {
|
|||
be_quiet = 1;
|
||||
break;
|
||||
|
||||
case 'Z':
|
||||
no_500_dir = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
usage(argv[0]);
|
||||
|
||||
|
|
Loading…
Reference in New Issue