Version 2.00b: Many improvements

- Minor bug fix to path parsing to avoid problems with /.$foo/,
- Improved PHP error detection (courtesy of Niels Heinen),
- Improved dictionary logic (courtesy of Niels Heinen) and new documentation of the same,
- Improved support for file.ext keywords in the dictionary,
- Fixed missing content_checks() in unknown_check_callback()(courtesy of Niels Heinen),
- Improved an oversight in dictionary case sensitivity,
- Improved pivots.txt data,
- Support for supplementary read-only dictionaries (-W +dict),
- Change to directory detection to work around a certain sneaky server behavior.
- TODO: Revise dictionaries!!!
This commit is contained in:
Steve Pinkham 2011-08-09 16:04:52 -04:00
parent b199943c9d
commit 6b2d33edca
17 changed files with 630 additions and 326 deletions

View File

@ -1,3 +1,29 @@
Version 2.00b:
--------------
- Minor bug fix to path parsing to avoid problems with /.$foo/,
- Improved PHP error detection (courtesy of Niels Heinen),
- Improved dictionary logic (courtesy of Niels Heinen) and new
documentation of the same,
- Improved support for file.ext keywords in the dictionary,
- Fixed missing content_checks() in unknown_check_callback()
(courtesy of Niels Heinen),
- Improved an oversight in dictionary case sensitivity,
- Improved pivots.txt data,
- Support for supplementary read-only dictionaries (-W +dict),
- Change to directory detection to work around a certain sneaky
server behavior.
- TODO: Revise dictionaries!!!
Version 1.94b:
--------------
@ -9,7 +35,7 @@ Version 1.94b:
Version 1.93b:
--------------
- Major fix to URL XSS detection logic.
- Major fix to URL XSS detection logic (courtesy of Niels Heinen).
Version 1.92b:
--------------

View File

@ -20,7 +20,7 @@
#
PROGNAME = skipfish
VERSION = 1.94b
VERSION = 2.00b
OBJFILES = http_client.c database.c crawler.c analysis.c report.c
INCFILES = alloc-inl.h string-inl.h debug.h types.h http_client.h \

10
README
View File

@ -429,7 +429,7 @@ $ ./skipfish -MEU -C "AuthCookie=value" -X /logout.aspx -o output_dir \
http://www.example.com/
Five-connection crawl, but no brute-force; pretending to be MSIE and and
trusting example.com content):
trusting example.com content:
$ ./skipfish -m 5 -LV -W /dev/null -o output_dir -b ie -B example.com \
http://www.example.com/
@ -506,7 +506,7 @@ know:
a #define directive in config.h. Adding support for HTTPS proxying is
more complicated, and still in the works.
* Scan resume option.
* Scan resume option, better runtime info.
* Option to limit document sampling or save samples directly to disk.
@ -514,7 +514,11 @@ know:
* Config file support.
* A database for banner / version checks?
* Scheduling and management web UI.
* QPS throttling and maximum scan time limit.
* A database for banner / version checks or other configurable rules?
-------------------------------------
9. Oy! Something went horribly wrong!

View File

@ -930,7 +930,7 @@ add_link:
i = 0;
while ((ext = wordlist_get_extension(i++))) {
while ((ext = wordlist_get_extension(i++, 0))) {
u32 ext_len = strlen((char*)ext);
if (clean_len > ext_len + 2 &&
@ -2280,13 +2280,34 @@ static void check_for_stuff(struct http_request* req,
return;
}
if (strstr((char*)res->payload, "<b>Fatal error</b>:") ||
strstr((char*)res->payload, "<b>Parse error</b>:") ||
strstr((char*)res->payload, "</b> on line <b>")) {
problem(PROB_ERROR_POI, req, res, (u8*)"PHP error", req->pivot, 0);
if ((tmp = (u8*)strstr((char*)res->payload, " on line "))) {
u32 off = 512;
while (tmp - 1 > res->payload && !strchr("\r\n", tmp[-1])
&& off--) tmp--;
if (off && (!prefix(tmp, "Warning: ") || !prefix(tmp, "Notice: ") ||
!prefix(tmp, "Fatal error: ") || !prefix(tmp, "Parse error: ") ||
!prefix(tmp, "Deprecated: ") ||
!prefix(tmp, "Strict Standards: ") ||
!prefix(tmp, "Catchable fatal error: "))) {
problem(PROB_ERROR_POI, req, res, (u8*)"PHP error (text)", req->pivot, 0);
return;
}
if (off && !prefix(tmp, "<b>") && (!prefix(tmp + 3, "Warning</b>: ") ||
!prefix(tmp + 3, "Notice</b>: ") ||
!prefix(tmp + 3, "Fatal error</b>: ") ||
!prefix(tmp + 3, "Parse error</b>: ") ||
!prefix(tmp + 3, "Deprecated</b>: ") ||
!prefix(tmp + 3, "Strict Standards</b>: ") ||
!prefix(tmp + 3, "Catchable fatal error</b>: "))) {
problem(PROB_ERROR_POI, req, res, (u8*)"PHP error (HTML)", req->pivot, 0);
return;
}
}
if (strstr((char*)res->payload, "<b>Warning</b>: MySQL: ") ||
strstr((char*)res->payload, "java.sql.SQLException") ||
strstr((char*)res->payload, "You have an error in your SQL syntax; ")) {
@ -2326,12 +2347,26 @@ static void check_for_stuff(struct http_request* req,
if (strstr((char*)sniffbuf, "<cross-domain-policy>")) {
problem(PROB_FILE_POI, req, res, (u8*)
"Flash cross-domain policy", req->pivot, 0);
/*
if (strstr((char*)res->payload, "domain=\"*\""))
problem(PROB_CROSS_WILD, req, res, (u8*)
"Cross-domain policy with wildcard rules", req->pivot, 0);
*/
return;
}
if (strstr((char*)sniffbuf, "<access-policy>")) {
problem(PROB_FILE_POI, req, res, (u8*)"Silverlight cross-domain policy",
req->pivot, 0);
/*
if (strstr((char*)res->payload, "uri=\"*\""))
problem(PROB_CROSS_WILD, req, res, (u8*)
"Cross-domain policy with wildcard rules", req->pivot, 0);
*/
return;
}

View File

@ -29,7 +29,7 @@
/* Define this to enable experimental HTTP proxy support, through the -J
option in the command line. This mode will not work as expected for
HTTPS requests at this point. */
HTTPS requests at this time - sorry. */
// #define PROXY_SUPPORT 1

View File

@ -354,7 +354,7 @@ static void secondary_ext_init(struct pivot_desc* pv, struct http_request* req,
i = 0;
while ((ex = wordlist_get_extension(i))) {
while ((ex = wordlist_get_extension(i, 0))) {
u8* tmp = ck_alloc(strlen((char*)base_name) + strlen((char*)ex) + 2);
u32 c;
@ -382,6 +382,7 @@ static void secondary_ext_init(struct pivot_desc* pv, struct http_request* req,
n->par.v[tpar] = tmp;
n->user_val = 1;
n->with_ext = 1;
memcpy(&n->same_sig, &res->sig, sizeof(struct http_sig));
@ -1814,6 +1815,7 @@ static void crawl_par_dict_init(struct pivot_desc* pv) {
struct http_request* n;
u8 *kw, *ex;
u32 i, c;
u8 specific;
/* Too many requests still pending, or already done? */
@ -1832,7 +1834,7 @@ restart_dict:
i = 0;
kw = (pv->pdic_guess ? wordlist_get_guess : wordlist_get_word)
(pv->pdic_cur_key);
(pv->pdic_cur_key, &specific);
if (!kw) {
@ -1878,10 +1880,11 @@ restart_dict:
/* Schedule probes for all extensions for the current word, but
only if the original parameter contained '.' somewhere,
and only if string is not on the try list. */
and only if string is not on the try list. Special handling
for specific keywords with '.' inside. */
if (strchr((char*)TPAR(pv->req), '.'))
while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) {
if (!no_fuzz_ext && strchr((char*)TPAR(pv->req), '.'))
while ((ex = wordlist_get_extension(i, specific))) {
u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2);
@ -1901,6 +1904,7 @@ restart_dict:
ck_free(TPAR(n));
TPAR(n) = tmp;
n->callback = par_dict_callback;
n->with_ext = 1;
pv->pdic_pending++;
in_dict_init = 1;
async_request(n);
@ -2333,6 +2337,7 @@ static u8 dir_404_callback(struct http_request* req,
}
memcpy(&req->pivot->r404[i], &res->sig, sizeof(struct http_sig));
req->pivot->r404_cnt++;
/* Is this a new signature not seen on parent? Notify if so,
@ -2379,7 +2384,7 @@ schedule_next:
/* Aaand schedule all the remaining probes. */
while ((nk = wordlist_get_extension(cur_ext++))) {
while ((nk = wordlist_get_extension(cur_ext++, 0))) {
u8* tmp = ck_alloc(strlen(BOGUS_FILE) + strlen((char*)nk) + 2);
n = req_copy(RPREQ(req), req->pivot, 1);
@ -2388,6 +2393,7 @@ schedule_next:
replace_slash(n, tmp);
ck_free(tmp);
n->callback = dir_404_callback;
n->with_ext = 1;
n->user_val = 1;
/* r404_pending is at least 1 to begin with, so this is safe
@ -2655,6 +2661,7 @@ static void crawl_dir_dict_init(struct pivot_desc* pv) {
struct http_request* n;
u8 *kw, *ex;
u32 i, c;
u8 specific;
/* Too many requests still pending, or already moved on to
parametric tests? */
@ -2682,7 +2689,8 @@ static void crawl_dir_dict_init(struct pivot_desc* pv) {
restart_dict:
kw = (pv->guess ? wordlist_get_guess : wordlist_get_word)(pv->cur_key);
kw = (pv->guess ? wordlist_get_guess : wordlist_get_word)
(pv->cur_key, &specific);
if (!kw) {
@ -2739,11 +2747,13 @@ restart_dict:
}
/* Schedule probes for all extensions for the current word,
likewise. */
likewise. Make an exception for specific keywords that
already contain a period. */
i = 0;
while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) {
if (!no_fuzz_ext)
while ((ex = wordlist_get_extension(i, specific))) {
u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2);
@ -2761,6 +2771,7 @@ restart_dict:
n = req_copy(pv->req, pv, 1);
replace_slash(n, tmp);
n->callback = dir_dict_callback;
n->with_ext = 1;
pv->pending++;
in_dict_init = 1;
async_request(n);
@ -2917,6 +2928,7 @@ u8 fetch_unknown_callback(struct http_request* req, struct http_response* res) {
n = req_copy(req, req->pivot, 1);
set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par);
n->callback = unknown_check_callback;
n->with_ext = req->with_ext;
async_request(n);
/* This is the initial callback, keep the response. */
@ -2974,13 +2986,34 @@ static u8 unknown_check_callback(struct http_request* req,
}
if (par)
if (par) {
for (i=0;i<par->r404_cnt;i++)
if (same_page(&res->sig, &par->r404[i])) break;
/* Do not use extension-originating signatures for settling non-extension
cases. */
if (i && !req->with_ext) i = par->r404_cnt;
}
if ((!par && res->code == 404) || (par && i != par->r404_cnt) ||
(RPRES(req)->code < 300 && res->code >= 300 && RPRES(req)->pay_len)) {
DEBUG("REASON X\n");
if (par) DEBUG("same_404 = %d\n", i != par->r404_cnt);
DEBUG("par = %p\n", par);
if (par) DEBUG("par->r404_cnt = %d\n", par->r404_cnt);
DEBUG("res->code = %d\n", res->code);
DEBUG("parent code = %d\n", RPRES(req)->code);
DEBUG("parent len = %d\n", RPRES(req)->pay_len);
// (!par && res->code == 404) || - NIE
// (par && i != par->r404_cnt) || - TAK
// (RPRES(req)->code < 300 && res->code >= 300 && RPRES(req)->pay_len))
req->pivot->type = PIVOT_FILE;
} else {
@ -2999,6 +3032,11 @@ assume_dir:
req->pivot->type = PIVOT_DIR;
/* Perform content checks before discarding the old payload. */
if (!same_page(&RPRES(req)->sig, &res->sig))
content_checks(RPREQ(req), RPRES(req));
/* Replace original request, response with new data. */
destroy_request(RPREQ(req));

View File

@ -57,11 +57,17 @@ u32 max_depth = MAX_DEPTH,
u8 dont_add_words; /* No auto dictionary building */
#define KW_SPECIFIC 0
#define KW_GENERIC 1
#define KW_GEN_AUTO 2
struct kw_entry {
u8* word; /* Keyword itself */
u32 hit_cnt; /* Number of confirmed sightings */
u8 is_ext; /* Is an extension? */
u8 hit_already; /* Had its hit count bumped up? */
u8 read_only; /* Read-only dictionary? */
u8 class; /* KW_* */
u32 total_age; /* Total age (in scan cycles) */
u32 last_age; /* Age since last hit */
};
@ -71,11 +77,19 @@ static struct kw_entry*
static u32 keyword_cnt[WORD_HASH]; /* Per-bucket keyword counts */
static u8 **extension, /* Extension list */
**guess; /* Keyword candidate list */
struct ext_entry {
u32 bucket;
u32 index;
};
static struct ext_entry *extension, /* Extension list */
*sp_extension;
static u8 **guess; /* Keyword candidate list */
u32 guess_cnt, /* Number of keyword candidates */
extension_cnt, /* Number of extensions */
sp_extension_cnt, /* Number of specific extensions */
keyword_total_cnt, /* Current keyword count */
keyword_orig_cnt; /* At-boot keyword count */
@ -818,7 +832,7 @@ static inline u32 hash_word(u8* str) {
/* Adds a new keyword candidate to the global "guess" list. This
list is always case-insensitive. */
list is case-sensitive. */
void wordlist_add_guess(u8* text) {
u32 target, i, kh;
@ -830,7 +844,7 @@ void wordlist_add_guess(u8* text) {
if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return;
for (i=0;i<guess_cnt;i++)
if (!strcasecmp((char*)text, (char*)guess[i])) return;
if (!strcmp((char*)text, (char*)guess[i])) return;
kh = hash_word(text);
@ -853,10 +867,10 @@ void wordlist_add_guess(u8* text) {
/* Adds a single, sanitized keyword to the list, or increases its hit count.
Keyword list is case-insensitive - first capitalization wins. */
Keyword list is case-sensitive. */
static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
u32 total_age, u32 last_age) {
static void wordlist_confirm_single(u8* text, u8 is_ext, u8 class, u8 read_only,
u32 add_hits, u32 total_age, u32 last_age) {
u32 kh, i;
if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return;
@ -866,7 +880,7 @@ static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
kh = hash_word(text);
for (i=0;i<keyword_cnt[kh];i++)
if (!strcasecmp((char*)text, (char*)keyword[kh][i].word)) {
if (!strcmp((char*)text, (char*)keyword[kh][i].word)) {
/* Known! Increase hit count, and if this is now
tagged as an extension, add to extension list. */
@ -875,13 +889,19 @@ static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
keyword[kh][i].hit_cnt += add_hits;
keyword[kh][i].hit_already = 1;
keyword[kh][i].last_age = 0;
if (!keyword[kh][i].read_only && read_only)
keyword[kh][i].read_only = 1;
}
if (!keyword[kh][i].is_ext && is_ext) {
keyword[kh][i].is_ext = 1;
extension = ck_realloc(extension, (extension_cnt + 1) * sizeof(u8*));
extension[extension_cnt++] = keyword[kh][i].word;
extension = ck_realloc(extension, (extension_cnt + 1) *
sizeof(struct ext_entry));
extension[extension_cnt].bucket = kh;
extension[extension_cnt++].index = i;
}
return;
@ -896,6 +916,8 @@ static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
keyword[kh][i].word = ck_strdup(text);
keyword[kh][i].is_ext = is_ext;
keyword[kh][i].class = class;
keyword[kh][i].read_only = read_only;
keyword[kh][i].hit_cnt = add_hits;
keyword[kh][i].total_age = total_age;
keyword[kh][i].last_age = last_age;
@ -906,8 +928,21 @@ static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
if (!total_age) keyword[kh][i].hit_already = 1;
if (is_ext) {
extension = ck_realloc(extension, (extension_cnt + 1) * sizeof(u8*));
extension[extension_cnt++] = keyword[kh][i].word;
extension = ck_realloc(extension, (extension_cnt + 1) *
sizeof(struct ext_entry));
extension[extension_cnt].bucket = kh;
extension[extension_cnt++].index = i;
if (class == KW_SPECIFIC) {
sp_extension = ck_realloc(sp_extension, (sp_extension_cnt + 1) *
sizeof(struct ext_entry));
sp_extension[sp_extension_cnt].bucket = kh;
sp_extension[sp_extension_cnt++].index = i;
}
}
}
@ -946,6 +981,18 @@ void wordlist_confirm_word(u8* text) {
}
}
/* If the format is foo.bar, check if the entire string is a known keyword.
If yes, don't try to look up and add individual components. */
if (ppos != -1) {
u32 kh = hash_word(text);
for (i=0;i<keyword_cnt[kh];i++)
if (!strcasecmp((char*)text, (char*)keyword[kh][i].word)) return;
}
/* Too many dots? Tokenize class paths and domains as individual keywords,
still. */
@ -972,22 +1019,22 @@ void wordlist_confirm_word(u8* text) {
if (tlen == 1 || tlen - ppos > 12) return;
if (ppos && ppos != tlen - 1 && !isdigit(text[ppos] + 1)) {
wordlist_confirm_single(text + ppos + 1, 1, 1, 0, 0);
wordlist_confirm_single(text + ppos + 1, 1, KW_GEN_AUTO, 0, 1, 0, 0);
text[ppos] = 0;
wordlist_confirm_single(text, 0, 1, 0, 0);
wordlist_confirm_single(text, 0, KW_GEN_AUTO, 0, 1, 0, 0);
text[ppos] = '.';
return;
}
}
wordlist_confirm_single(text, 0, 1, 0, 0);
wordlist_confirm_single(text, 0, KW_GEN_AUTO, 0, 1, 0, 0);
}
/* Returns wordlist item at a specified offset (NULL if no more available). */
u8* wordlist_get_word(u32 offset) {
u8* wordlist_get_word(u32 offset, u8* specific) {
u32 cur_off = 0, kh;
for (kh=0;kh<WORD_HASH;kh++) {
@ -997,32 +1044,42 @@ u8* wordlist_get_word(u32 offset) {
if (kh == WORD_HASH) return NULL;
*specific = (keyword[kh][offset - cur_off].is_ext == 0 &&
keyword[kh][offset - cur_off].class == KW_SPECIFIC);
return keyword[kh][offset - cur_off].word;
}
/* Returns keyword candidate at a specified offset (or NULL). */
u8* wordlist_get_guess(u32 offset) {
u8* wordlist_get_guess(u32 offset, u8* specific) {
if (offset >= guess_cnt) return NULL;
*specific = 0;
return guess[offset];
}
/* Returns extension at a specified offset (or NULL). */
u8* wordlist_get_extension(u32 offset) {
u8* wordlist_get_extension(u32 offset, u8 specific) {
if (!specific) {
if (offset >= extension_cnt) return NULL;
return extension[offset];
return keyword[extension[offset].bucket][extension[offset].index].word;
}
if (offset >= sp_extension_cnt) return NULL;
return keyword[sp_extension[offset].bucket][sp_extension[offset].index].word;
}
/* Loads keywords from file. */
void load_keywords(u8* fname, u32 purge_age) {
void load_keywords(u8* fname, u8 read_only, u32 purge_age) {
FILE* in;
u32 hits, total_age, last_age, lines = 0;
u8 type;
u8 type[3];
s32 fields;
u8 kword[MAX_WORD + 1];
char fmt[32];
@ -1036,19 +1093,28 @@ void load_keywords(u8* fname, u32 purge_age) {
return;
}
sprintf(fmt, "%%c %%u %%u %%u %%%u[^\x01-\x1f]", MAX_WORD);
sprintf(fmt, "%%2s %%u %%u %%u %%%u[^\x01-\x1f]", MAX_WORD);
while ((fields = fscanf(in, fmt, &type, &hits, &total_age, &last_age, kword))
while ((fields = fscanf(in, fmt, type, &hits, &total_age, &last_age, kword))
== 5) {
u8 class = KW_GEN_AUTO;
if (type[0] != 'e' && type[0] != 'w')
FATAL("Wordlist '%s': bad keyword type in line %u.\n", fname, lines + 1);
if (type[1] == 's') class = KW_SPECIFIC; else
if (type[1] == 'g') class = KW_GENERIC;
if (!purge_age || last_age < purge_age)
wordlist_confirm_single(kword, (type == 'e'), hits,
wordlist_confirm_single(kword, (type[0] == 'e'), class, read_only, hits,
total_age + 1, last_age + 1);
lines++;
fgetc(in); /* sink \n */
}
if (fields != -1 && fields != 5)
FATAL("Wordlist '%s': syntax error in line %u.\n", fname, lines + 1);
FATAL("Wordlist '%s': syntax error in line %u.\n", fname, lines);
if (!lines)
WARN("Wordlist '%s' contained no valid entries.", fname);
@ -1110,11 +1176,21 @@ void save_keywords(u8* fname) {
}
for (kh=0;kh<WORD_HASH;kh++)
for (i=0;i<keyword_cnt[kh];i++)
fprintf(out,"%c %u %u %u %s\n", keyword[kh][i].is_ext ? 'e' : 'w',
for (i=0;i<keyword_cnt[kh];i++) {
u8 class = '?';
if (keyword[kh][i].read_only) continue;
if (keyword[kh][i].class == KW_SPECIFIC) class = 's'; else
if (keyword[kh][i].class == KW_GENERIC) class = 'g';
fprintf(out,"%c%c %u %u %u %s\n", keyword[kh][i].is_ext ? 'e' : 'w',
class,
keyword[kh][i].hit_cnt, keyword[kh][i].total_age,
keyword[kh][i].last_age, keyword[kh][i].word);
}
SAY(cLGN "[+] " cNOR "Wordlist '%s' updated (%u new words added).\n",
fname, keyword_total_cnt - keyword_orig_cnt);
@ -1409,8 +1485,9 @@ void destroy_database() {
ck_free(keyword[kh]);
}
/* Extensions just referenced keyword[][].word entries. */
/* Extensions just referenced keyword[][] entries. */
ck_free(extension);
ck_free(sp_extension);
for (i=0;i<guess_cnt;i++) ck_free(guess[i]);
ck_free(guess);

View File

@ -375,19 +375,19 @@ void wordlist_confirm_word(u8* text);
/* Returns wordlist item at a specified offset (NULL if no more available). */
u8* wordlist_get_word(u32 offset);
u8* wordlist_get_word(u32 offset, u8* specific);
/* Returns keyword candidate at a specified offset (or NULL). */
u8* wordlist_get_guess(u32 offset);
u8* wordlist_get_guess(u32 offset, u8* specific);
/* Returns extension at a specified offset (or NULL). */
u8* wordlist_get_extension(u32 offset);
u8* wordlist_get_extension(u32 offset, u8 specific);
/* Loads keywords from file. */
void load_keywords(u8* fname, u32 purge_age);
void load_keywords(u8* fname, u8 read_only, u32 purge_age);
/* Saves all keywords to a file. */

View File

@ -3,11 +3,75 @@ This directory contains four alternative, hand-picked Skipfish dictionaries.
PLEASE READ THIS FILE CAREFULLY BEFORE PICKING ONE. This is *critical* to
getting good results in your work.
----------------
Dictionary modes
----------------
------------------------
Key command-line options
------------------------
The basic modes you should be aware of (in order of request cost):
The dictionary to be used by the tool can be specified with the -W option,
and must conform to the format outlined at the end of this document. If you
omit -W in the command line, 'skipfish.wl' is assumed. This file does not
exist by default. That part is by design: THE SCANNER WILL MODIFY THE
SUPPLIED FILE UNLESS SPECIFICALLY INSTRUCTED NOT TO.
That's because the scanner automatically learns new keywords and extensions
based on any links discovered during the scan, and on random sampling of
site contents. The information is consequently stored in the dictionary
for future reuse, along with other bookkeeping information useful for
determining which keywords perform well, and which ones don't.
All this means that it is very important to maintain a separate dictionary
for every separate set of unrelated target sites. Otherwise, undesirable
interference will occur.
With this out of the way, let's quickly review the options that may be used
to fine-tune various aspects of dictionary handling:
-L - do not automatically learn new keywords based on site content.
This option should not be normally used in most scanning
modes; if supplied, the scanner will not be able to discover
and leverage technology-specific terms and file extensions
unique to the architecture of the targeted site.
-G num - change jar size for keyword candidates.
Up to <num> candidates are randomly selected from site
content, and periodically retried during brute-force checks;
when one of them results in a unique non-404 response, it is
promoted to the dictionary proper. Unsuccessful candidates are
gradually replaced with new picks, and then discarded at the
end of the scan. The default jar size is 256.
-V - prevent the scanner from updating the dictionary file.
Normally, the primary read-write dictionary specified with the
-W option is updated at the end of the scan to add any newly
discovered keywords, and to update keyword usage stats. Using
this option eliminates this step.
-R num - purge all dictionary entries that had no non-404 hits for
the last <num> scans.
This option prevents dictionary creep in repeated assessments,
but needs to be used with care: it will permanently nuke a
part of the dictionary!
-Y - inhibit full ${filename}.${extension} brute-force.
In this mode, the scanner will only brute-force one component
at a time, trying all possible keywords without any extension,
and then trying to append extensions to any otherwise discovered
content.
This greatly improves scan times, but reduces coverage. Scan modes
2 and 3 shown in the next section make use of this flag.
--------------
Scanning modes
--------------
The basic dictionary-dependent modes you should be aware of (in order of the
associated request cost):
1) Orderly crawl with no DirBuster-like brute-force at all. In this mode, the
scanner will not discover non-linked resources such as /admin,
@ -20,14 +84,15 @@ The basic modes you should be aware of (in order of request cost):
2) Orderly scan with minimal extension brute-force. In this mode, the scanner
will not discover resources such as /admin, but will discover cases such as
/index.php.old:
/index.php.old (once index.php itself is spotted during an orderly crawl):
cp dictionaries/extensions-only.wl dictionary.wl
./skipfish -W dictionary.wl -Y [...other options...]
This method is only slightly more request-intensive than #1, and therefore,
generally recommended in cases where time is of essence. The cost is about
100 requests per fuzzed location.
is a marginally better alternative in cases where time is of essence. It's
still not recommended for most uses. The cost is about 100 requests per
fuzzed location.
3) Directory OR extension brute-force only. In this mode, the scanner will only
try fuzzing the file name, or the extension, at any given time - but will
@ -61,39 +126,27 @@ The basic modes you should be aware of (in order of request cost):
reasonably responsive servers; but it may be prohibitively expensive
when dealing with very large or very slow sites.
As should be obvious, the -W option points to a dictionary to be used; the
scanner updates the file based on scan results, so please always make a
target-specific copy - do not use the master file directly, or it may be
polluted with keywords not relevant to other targets.
----------------------------------
Using separate master dictionaries
----------------------------------
Additional options supported by the aforementioned modes:
A recently introduced feature allows you to load any number of read-only
supplementary dictionaries in addition to the "main" read-write one (-W
dictionary.wl).
-L - do not automatically learn new keywords based on site content.
This option should not be normally used in most scanning
modes; *not* using it significantly improves the coverage of
minimal.wl.
This is a convenient way to isolate (and be able to continually update) your
customized top-level wordlist, whilst still acquiring site-specific data in
a separate file. The following syntax may be used to accomplish this:
-G num - specifies jar size for keyword candidates selected from the
content; up to <num> candidates are kept and tried during
brute-force checks; when one of them results in a unique
non-404 response, it is promoted to the dictionary proper.
./skipfish -W initially_empty_site_specific_dict.wl -W +supplementary_dict1.wl \
-W +supplementary_dict2.wl [...other options...]
-V - prevents the scanner from updating the dictionary file with
newly discovered keywords and keyword usage stats (i.e., all
new findings are discarded on exit).
Only the main dictionary will be modified as a result of the scan, and only
newly discovered site-specific keywords will be appended there.
-Y - inhibits full ${filename}.${extension} brute-force: the scanner
will only brute-force one component at a time. This greatly
improves scan times, but reduces coverage. Modes 2 and 3
shown above make use of this flag.
-R num - purges all dictionary entries that had no non-404 hits for
the last <num> scans. Prevents dictionary creep in repeated
assessments, but use with care!
-----------------------------
More about dictionary design:
-----------------------------
----------------------------
More about dictionary design
----------------------------
Each dictionary may consist of a number of extensions, and a number of
"regular" keywords. Extensions are considered just a special subset of the
@ -103,29 +156,74 @@ You can create custom dictionaries, conforming to this format:
type hits total_age last_age keyword
...where 'type' is either 'e' or 'w' (extension or wordlist); 'hits' is the
total number of times this keyword resulted in a non-404 hit in all previous
scans; 'total_age' is the number of scan cycles this word is in the dictionary;
'last_age' is the number of scan cycles since the last 'hit'; and 'keyword' is
the actual keyword.
...where 'type' is either 'e' or 'w' (extension or keyword), followed by a
qualifier (explained below); 'hits' is the total number of times this keyword
resulted in a non-404 hit in all previous scans; 'total_age' is the number of scan
cycles this word is in the dictionary; 'last_age' is the number of scan cycles
since the last 'hit'; and 'keyword' is the actual keyword.
Do not duplicate extensions as keywords - if you already have 'html' as an 'e'
entry, there is no need to also create a 'w' one.
Qualifiers alter the meaning of an entry in the following way:
There must be no empty or malformed lines, comments in the wordlist file.
Extension keywords must have no leading dot (e.g., 'exe', not '.exe'), and all
keywords should be NOT url-encoded (e.g., 'Program Files', not
'Program%20Files'). No keyword should exceed 64 characters.
wg - generic keyword that is not associated with any specific server-side
technology. Examples include 'backup', 'accounting', or 'logs'. These
will be indiscriminately combined with every known extension (e.g.,
'backup.php') during the fuzzing process.
If you omit -W in the command line, 'skipfish.wl' is assumed. This file does
not exist by default; this is by design.
ws - technology-specific keyword that are unlikely to have a random
extension; for example, with 'cgi-bin', testing for 'cgi-bin.php' is
usually a waste of time. Keywords tagged this way will be combined only
with a small set of technology-agnostic extensions - e.g., 'cgi-bin.old'.
The scanner will automatically learn new keywords and extensions based on any
links discovered during the scan; and will also analyze pages and extract
words to use as keyword candidates.
NOTE: Technology-specific keywords that in the real world, are always
paired with a single, specific extension, should be combined with said
extension in the 'ws' entry itself, rather than trying to accommodate
them with 'wg' rules. For example, 'MANIFEST.MF' is OK.
Tread carefully; poor wordlists are one of the reasons why some web security
scanners perform worse than expected. You will almost always be better off
narrowing down or selectively extending the supplied set (and possibly
contributing back your changes upstream!), than importing a giant wordlist
scored elsewhere.
eg - generic extension that is not specific to any well-defined technology,
or may pop-up in administrator- or developer-created auxiliary content.
Examples include 'bak', 'old', 'txt', or 'log'.
es - technology-specific extension, such as 'php', or 'cgi', that are
unlikely to spontaneously accompany random 'ws' keywords.
Skipfish leverages this distinction by only trying the following brute-force
combinations:
/some/path/wg_keyword ('index')
/some/path/ws_keyword ('cgi-bin')
/some/path/wg_extension ('old')
/some/path/ws_extension ('php')
/some/path/wg_keyword.wg_extension ('index.old')
/some/path/wg_keyword.ws_extension ('index.php')
/some/path/ws_keyword.ws_extension ('cgi-bin.old')
To decide between 'wg' and 'ws', consider if you are likely to ever encounter
files such as ${this_word}.php or ${this_word}.class. If not, tag the keyword
as 'ws'.
Similarly, to decide between 'eg' and 'es', think about the possibility of
encoutering cgi-bin.${this_ext} or formmail.${this_ext}. If it seems unlikely,
choose 'es'.
For your convenience, all legacy keywords and extensions, as well as any entries
detected automatically, will be stored in the dictionary with a '?' qualifier.
This is equivalent to 'g', and is meant to assist the user in reviewing and
triaging any automatically acquired dictionary data.
Other notes about dictionaries:
- Do not duplicate extensions as keywords - if you already have 'html' as an
'e' entry, there is no need to also create a 'w' one.
- There must be no empty or malformed lines, or comments, in the wordlist
file. Extension keywords must have no leading dot (e.g., 'exe', not '.exe'),
and all keywords should be NOT url-encoded (e.g., 'Program Files', not
'Program%20Files'). No keyword should exceed 64 characters.
- Tread carefully; poor wordlists are one of the reasons why some web security
scanners perform worse than expected. You will almost always be better off
narrowing down or selectively extending the supplied set (and possibly
contributing back your changes upstream!), than importing a giant wordlist
scored elsewhere.

View File

@ -16,6 +16,7 @@ e 1 1 1 class
e 1 1 1 cnf
e 1 1 1 conf
e 1 1 1 config
e 1 1 1 core
e 1 1 1 cpp
e 1 1 1 cs
e 1 1 1 csproj
@ -587,7 +588,6 @@ w 1 1 1 cookies
w 1 1 1 copies
w 1 1 1 copy
w 1 1 1 copyright
w 1 1 1 core
w 1 1 1 corp
w 1 1 1 corpo
w 1 1 1 corporate

View File

@ -16,6 +16,7 @@ e 1 1 1 class
e 1 1 1 cnf
e 1 1 1 conf
e 1 1 1 config
e 1 1 1 core
e 1 1 1 cpp
e 1 1 1 cs
e 1 1 1 csproj

View File

@ -11,6 +11,7 @@ e 1 1 1 class
e 1 1 1 cnf
e 1 1 1 conf
e 1 1 1 config
e 1 1 1 core
e 1 1 1 cpp
e 1 1 1 csproj
e 1 1 1 csv
@ -556,7 +557,6 @@ w 1 1 1 cookies
w 1 1 1 copies
w 1 1 1 copy
w 1 1 1 copyright
w 1 1 1 core
w 1 1 1 corp
w 1 1 1 corpo
w 1 1 1 corporate

View File

@ -565,6 +565,22 @@ void tokenize_path(u8* str, struct http_request* req, u8 add_slash) {
value = url_decode_token(cur + !first_el, next_seg - !first_el, 0);
}
/* If the extracted segment is just '.' or '..', but is followed by
something else than '/', skip one separator. */
if (!name && cur[next_seg] && cur[next_seg] != '/' &&
(!strcmp((char*)value, ".") || !strcmp((char*)value, ".."))) {
next_seg = strcspn((char*)cur + next_seg + 1, "/;,!$?#") + next_seg + 1,
ck_free(name);
ck_free(value);
value = url_decode_token(cur + !first_el, next_seg - !first_el, 0);
}
switch (first_el ? '/' : *cur) {
case ';': set_value(PARAM_PATH_S, name, value, -1, &req->par); break;

View File

@ -97,12 +97,12 @@ struct http_request {
u16 port; /* Port number to connect to */
u8* orig_url; /* Copy of the original URL */
struct param_array par; /* Parameters, headers, cookies */
struct pivot_desc *pivot; /* Pivot descriptor */
u32 user_val; /* Can be used freely */
u8 with_ext; /* Extension-based probe? */
u8 (*callback)(struct http_request*, struct http_response*);
/* Callback to invoke when done */

View File

@ -303,7 +303,7 @@ static void compute_counts(struct pivot_desc* pv) {
/* Helper to JS-escape data. Static buffer, will be destroyed on
subsequent calls. */
static inline u8* js_escape(u8* str) {
static inline u8* js_escape(u8* str, u8 sp) {
u32 len;
static u8* ret;
u8* opos;
@ -316,7 +316,7 @@ static inline u8* js_escape(u8* str) {
opos = ret = __DFL_ck_alloc(len * 4 + 1);
while (len--) {
if (*str > 0x1f && *str < 0x80 && !strchr("<>\\'\"", *str)) {
if (*str > (sp ? 0x20 : 0x1f) && *str < 0x80 && !strchr("<>\\'\"", *str)) {
*(opos++) = *(str++);
} else {
sprintf((char*)opos, "\\x%02x", *(str++));
@ -343,7 +343,7 @@ static void output_scan_info(u64 scan_time, u32 seed) {
if (!f) PFATAL("Cannot open 'summary.js'");
fprintf(f, "var sf_version = '%s';\n", VERSION);
fprintf(f, "var scan_date = '%s';\n", js_escape(ct));
fprintf(f, "var scan_date = '%s';\n", js_escape(ct, 0));
fprintf(f, "var scan_seed = '0x%08x';\n", seed);
fprintf(f, "var scan_ms = %llu;\n", (long long)scan_time);
@ -370,12 +370,12 @@ static void describe_res(FILE* f, struct http_response* res) {
case STATE_OK:
fprintf(f, "'fetched': true, 'code': %u, 'len': %u, 'decl_mime': '%s', ",
res->code, res->pay_len,
js_escape(res->header_mime));
js_escape(res->header_mime, 0));
fprintf(f, "'sniff_mime': '%s', 'cset': '%s'",
res->sniffed_mime ? res->sniffed_mime : (u8*)"[none]",
js_escape(res->header_charset ? res->header_charset
: res->meta_charset));
: res->meta_charset, 0));
break;
case STATE_DNSERR:
@ -514,18 +514,18 @@ static void output_crawl_tree(struct pivot_desc* pv) {
fprintf(f, " { 'dupe': %s, 'type': %u, 'name': '%s%s",
pv->child[i]->dupe ? "true" : "false",
pv->child[i]->type, js_escape(pv->child[i]->name),
pv->child[i]->type, js_escape(pv->child[i]->name, 0),
(pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE)
? (u8*)"" : (u8*)"=");
fprintf(f, "%s', 'dir': '%s', 'linked': %d, ",
(pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE)
? (u8*)"" :
js_escape(pv->child[i]->req->par.v[pv->child[i]->fuzz_par]),
js_escape(pv->child[i]->req->par.v[pv->child[i]->fuzz_par], 0),
tmp, pv->child[i]->linked);
p = serialize_path(pv->child[i]->req, 1, 1);
fprintf(f, "'url': '%s', ", js_escape(p));
fprintf(f, "'url': '%s', ", js_escape(p, 0));
ck_free(p);
describe_res(f, pv->child[i]->res);
@ -557,7 +557,7 @@ static void output_crawl_tree(struct pivot_desc* pv) {
fprintf(f, " { 'severity': %u, 'type': %u, 'extra': '%s', ",
PSEV(pv->issue[i].type) - 1, pv->issue[i].type,
pv->issue[i].extra ? js_escape(pv->issue[i].extra) : (u8*)"");
pv->issue[i].extra ? js_escape(pv->issue[i].extra, 0) : (u8*)"");
describe_res(f, pv->issue[i].res);
@ -658,7 +658,7 @@ static void output_summary_views() {
save_req_res(m_samp[i].req[c], m_samp[i].res[c], 0);
if (chdir("..")) PFATAL("chdir unexpectedly fails!");
fprintf(f, " { 'url': '%s', 'dir': '%s/%s', 'linked': %d, 'len': %d"
" }%s\n", js_escape(p), tmp, tmp2,
" }%s\n", js_escape(p, 0), tmp, tmp2,
m_samp[i].req[c]->pivot->linked, m_samp[i].res[c]->pay_len,
(c == use_samp - 1) ? " ]" : ",");
ck_free(p);
@ -693,9 +693,9 @@ static void output_summary_views() {
if (chdir((char*)tmp2)) PFATAL("chdir unexpectedly fails!");
save_req_res(i_samp[i].i[c]->req, i_samp[i].i[c]->res, 0);
if (chdir("..")) PFATAL("chdir unexpectedly fails!");
fprintf(f, " { 'url': '%s', ", js_escape(p));
fprintf(f, " { 'url': '%s', ", js_escape(p, 0));
fprintf(f, "'extra': '%s', 'dir': '%s/%s' }%s\n",
i_samp[i].i[c]->extra ? js_escape(i_samp[i].i[c]->extra) :
i_samp[i].i[c]->extra ? js_escape(i_samp[i].i[c]->extra, 0) :
(u8*)"", tmp, tmp2,
(c == use_samp - 1) ? " ]" : ",");
ck_free(p);
@ -763,10 +763,12 @@ static void save_pivots(FILE* f, struct pivot_desc* cur) {
u8* url = serialize_path(cur->req, 1, 1);
fprintf(f, "%s %s ", cur->req->method ? cur->req->method : (u8*)"GET",
js_escape(url));
js_escape(url, 0));
ck_free(url);
fprintf(f, "name=%s ", js_escape(cur->name, 1));
switch (cur->type) {
case PIVOT_SERV: fprintf(f, "type=serv "); break;
case PIVOT_DIR: fprintf(f, "type=dir "); break;
@ -785,7 +787,8 @@ static void save_pivots(FILE* f, struct pivot_desc* cur) {
}
if (cur->res)
fprintf(f, "dup=%u %scode=%u len=%u notes=%u\n", cur->dupe,
fprintf(f, "dup=%u %s%scode=%u len=%u notes=%u\n", cur->dupe,
cur->bogus_par ? "bogus " : "",
cur->missing ? "returns_404 " : "",
cur->res->code, cur->res->pay_len, cur->issue_cnt);
else

View File

@ -83,10 +83,6 @@ do not parse HTML and other documents to find new links
.B \-o dir
write output to specified directory (required)
.TP
.B \-J
be less noisy about MIME / charset mismatches on probably
static content
.TP
.B \-M
log warnings about mixed content or non-SSL password forms
.TP
@ -147,6 +143,9 @@ timeout on idle HTTP connections (default: 10 s)
.TP
.B \-s s_limit
response size limit (default: 200000 B)
.TP
.B \-e
do not keep binary responses for reporting
.TP
.B \-h, \-\-help

View File

@ -239,7 +239,7 @@ int main(int argc, char** argv) {
u32 loop_cnt = 0, purge_age = 0, seed;
u8 dont_save_words = 0, show_once = 0, be_quiet = 0, display_mode = 0,
has_fake = 0;
u8 *wordlist = (u8*)DEF_WORDLIST, *output_dir = NULL;
u8 *wordlist = NULL, *output_dir = NULL;
struct termios term;
struct timeval tv;
@ -421,7 +421,12 @@ int main(int argc, char** argv) {
break;
case 'W':
if (optarg[0] == '+') load_keywords((u8*)optarg + 1, 1, 0);
else {
if (wordlist)
FATAL("Only one -W parameter permitted (unless '+' used).");
wordlist = (u8*)optarg;
}
break;
case 'b':
@ -526,7 +531,9 @@ int main(int argc, char** argv) {
if (max_connections < max_conn_host)
max_connections = max_conn_host;
load_keywords((u8*)wordlist, purge_age);
if (!wordlist) wordlist = (u8*)DEF_WORDLIST;
load_keywords(wordlist, 0, purge_age);
/* Schedule all URLs in the command line for scanning. */