/* skipfish - content analysis --------------------------- Author: Michal Zalewski Copyright 2009, 2010 by Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #define _VIA_ANALYSIS_C #include "debug.h" #include "config.h" #include "types.h" #include "http_client.h" #include "database.h" #include "crawler.h" #include "analysis.h" u8 no_parse, /* Disable HTML link detection */ warn_mixed, /* Warn on mixed content */ log_ext_urls, /* Log all external URLs */ no_forms, /* Do not submit forms */ relaxed_mime, /* Relax about cset / mime */ pedantic_cache; /* Match HTTP/1.0 and HTTP/1.1 */ /* Form autofill hints: */ static u8** addl_form_name; static u8** addl_form_value; static u32 addl_form_cnt; /* Runs some rudimentary checks on top-level pivot HTTP responses. */ void pivot_header_checks(struct http_request* req, struct http_response* res) { u32 i; u8 *par_hdr, *cur_hdr; DEBUG_CALLBACK(req, res); /* Server: change. */ cur_hdr = GET_HDR((u8*)"Server", &res->hdr); if (!RPAR(req)->res) par_hdr = NULL; else par_hdr = GET_HDR((u8*)"Server", &RPAR(req)->res->hdr); if (!cur_hdr) cur_hdr = (u8*)"[none]"; if (!par_hdr) par_hdr = (u8*)"[none]"; if (strcmp((char*)cur_hdr, (char*)par_hdr)) problem(PROB_SERVER_CHANGE, req, res, cur_hdr, req->pivot, 0); /* Via: appears or disappears. */ cur_hdr = GET_HDR((u8*)"Via", &res->hdr); if (!RPAR(req)->res) par_hdr = NULL; else par_hdr = GET_HDR((u8*)"Via", &RPAR(req)->res->hdr); if (cur_hdr != par_hdr) problem(PROB_VIA_CHANGE, req, res, cur_hdr ? cur_hdr : (u8*)"[none]", req->pivot, 0); /* New X-* header appears. */ for (i=0;ihdr.c;i++) { if (strncasecmp((char*)res->hdr.n[i], "X-", 2)) continue; if (!RPAR(req)->res) par_hdr = NULL; else par_hdr = GET_HDR(res->hdr.n[i], &RPAR(req)->res->hdr); if (!par_hdr) problem(PROB_X_CHANGE, req, res, res->hdr.n[i], req->pivot,0); } /* Old X-* header disappears. */ if (RPAR(req)->res) for (i=0;ires->hdr.c;i++) { if (strncasecmp((char*)RPAR(req)->res->hdr.n[i], "X-", 2)) continue; cur_hdr = GET_HDR(RPAR(req)->res->hdr.n[i], &res->hdr); if (!cur_hdr) problem(PROB_X_CHANGE, req, res, RPAR(req)->res->hdr.n[i], req->pivot, 0); } } /* Helper for scrape_response(). Tries to add a previously extracted link, also checks for cross-site and mixed content issues and similar woes. Subres is: 1 - redirect; 2 - IMG; 3 - IFRAME, EMBED, OBJECT, APPLET; 4 - SCRIPT, LINK REL=STYLESHEET; 0 - everything else. */ static void test_add_link(u8* str, struct http_request* ref, struct http_response* res, u8 subres, u8 sure) { struct http_request* n; DEBUG_CALLBACK(ref,res); DEBUG("* Alleged URL = '%s' [%u]\n", str, subres); /* Don't add injected links. */ if (!strncasecmp((char*)str, "skipfish:", 10) || !strncasecmp((char*)str, "//skipfish.invalid/", 20) || !strncasecmp((char*)str, "http://skipfish.invalid/", 25)) return; /* Don't add links that look like they came from JS code with fragmented HTML snippets, etc. */ if (!sure && (strchr("()\"' +,^", *str) || (*str == '/' && strchr("()\"' +,^", str[1])))) return; if ((str[0] == '\'' || str[0] == '"') && (str[1] == '+' || str[1] == ' ')) return; if (!strncasecmp((char*)str, "mailto:", 7)) { if (log_ext_urls) { u8* qmark = (u8*)strchr((char*)str, '?'); if (qmark) *qmark = 0; problem(PROB_MAIL_ADDR, ref, res, str + 7, host_pivot(ref->pivot),0); if (qmark) *qmark = '?'; } return; } n = ck_alloc(sizeof(struct http_request)); n->pivot = ref->pivot; if (!parse_url(str, n, ref)) { if (R(100) < crawl_prob) maybe_add_pivot(n, NULL, sure ? 2 : 1); /* Link to a third-party site? */ if (!url_allowed_host(n) && !url_trusted_host(n)) switch (subres) { case 0: if (log_ext_urls) problem(PROB_EXT_LINK, ref, res, str, host_pivot(ref->pivot), 0); break; case 1: if (log_ext_urls) problem(PROB_EXT_REDIR, ref, res, str, ref->pivot, 0); break; case 2: case 3: problem(PROB_EXT_OBJ, ref, res, str, ref->pivot, 0); break; case 4: problem(PROB_EXT_SUB, ref, res, str, ref->pivot, 0); break; } /* Mixed content? We don't care about or redirectors here, though. */ if (ref->proto == PROTO_HTTPS && n->proto == PROTO_HTTP && subres > 2 && warn_mixed) problem((subres == 4) ? PROB_MIXED_SUB : PROB_MIXED_OBJ, ref, res, str, ref->pivot, 0); } else if (!ref->proto) { /* Parser didn't recognize the protocol. If it's a hierarchical URL (foo://), log it. */ u8* x = str; while (isalnum(*x)) x++; if (str != x && *x == ':' && x[1] == '/') problem(PROB_UNKNOWN_PROTO, ref, res, str, ref->pivot, 0); } destroy_request(n); } /* Another scrape_response() helper - decodes HTML escaping, maybe also JS escaping, from URLs. Returns a dynamically allocated copy. */ static u8* html_decode_param(u8* url, u8 also_js) { u32 len = strlen((char*)url); u8* ret = ck_alloc(len + 1); u32 i, pos = 0; /* If directed to do so, decode \x, \u, and \char sequences first. */ if (also_js) { for (i=0;i 0xff) act_val = '?'; ret[pos++] = act_val; } else ret[pos++] = url[i]; } ret[pos] = 0; url = ret; len = pos; pos = 0; ret = ck_alloc(len + 1); } /* Next, do old-school HTML decoding. There are many other named entities, of course, but the odds of them appearing in URLs without %-encoding are negligible. */ for (i=0;i 0xff) act_val = '?'; ret[pos++] = act_val; i += strcspn((char*)url + i, ";"); continue; } /* Fall through and output the sequence as-is. */ } } else if (url[i] == '\r' || url[i] == '\n') continue; ret[pos++] = url[i]; } ret[pos] = 0; if (also_js) ck_free(url); return ret; } /* Macro to test for tag names */ #define ISTAG(_val, _tag) \ (!strncasecmp((char*)(_val), _tag, strlen((char*)_tag)) && \ isspace(_val[strlen((char*)_tag)])) /* Macro to find and move past parameter name (saves result in _store, NULL if not found). Buffer needs to be NUL-terminated at nearest >. */ #define FIND_AND_MOVE(_store, _val, _param) { \ (_store) = inl_strcasestr((u8*)_val, (u8*)_param); \ if (_store) { \ if (!isspace((_store)[-1])) (_store) = NULL; \ else (_store) += strlen((char*)_param); \ } \ } while (0) /* Macro to extract parameter value, handling quotes. */ #define EXTRACT_ALLOC_VAL(_store, _val) do { \ u32 _val_len; \ if (*(_val) == '\'') _val_len = strcspn((char*)++(_val), "'"); else \ if (*(_val) == '"') _val_len = strcspn((char*)++(_val), "\""); else \ _val_len = strcspn((char*)(_val), "> \t\r\n"); \ (_store) = ck_memdup((_val), (_val_len) + 1); \ (_store)[(_val_len)] = 0; \ } while (0) /* Adds a new item to the form hint system. */ void add_form_hint(u8* name, u8* value) { addl_form_name = ck_realloc(addl_form_name, (addl_form_cnt + 1) * sizeof(u8*)); addl_form_value = ck_realloc(addl_form_value, (addl_form_cnt + 1) * sizeof(u8*)); addl_form_name[addl_form_cnt] = name; addl_form_value[addl_form_cnt] = value; addl_form_cnt++; } /* Helper for collect_form_data() - comes up with a fitting value for a checkbox. Returns a static buffer. */ static u8* make_up_form_value(u8* name, struct http_request* req, struct http_response* res) { u32 i; for (i=0;ipivot), 0); return (u8*)form_suggestion[i][1]; } /* Helper for collect_form_data() - checks for a probable anti-XSRF token values. */ static u8 maybe_xsrf(u8* token) { u8* tmp; u32 digit_cnt = 0, upper_cnt = 0, slash_cnt = 0;; static u8 tm_prefix[8]; if (!tm_prefix[0]) sprintf((char*)tm_prefix, "%lu", time(0) / 100000); /* Unix time is not a valid token. */ if (!strncasecmp((char*)token, (char*)tm_prefix, strlen((char*)tm_prefix))) return 0; tmp = token; while (*tmp && (isdigit(*tmp) || strchr("abcdef", tolower(*tmp)))) { if (isdigit(*tmp)) digit_cnt++; tmp++; } /* Looks like base 10 or 16... */ if (!*tmp) { u32 len = tmp - token; if (len >= XSRF_B16_MIN && len <= XSRF_B16_MAX && digit_cnt >= XSRF_B16_NUM) return 1; return 0; } digit_cnt = 0; tmp = token; while (*tmp && (isalnum(*tmp) || strchr("=+/", *tmp))) { if (isdigit(*tmp)) digit_cnt++; if (isupper(*tmp)) upper_cnt++; if (*tmp == '/') slash_cnt++; tmp++; } /* Looks like base 32 or 64... */ if (!*tmp) { u32 len = tmp - token; if (len >= XSRF_B64_MIN && len <= XSRF_B64_MAX && ((digit_cnt >= XSRF_B64_NUM && upper_cnt >= XSRF_B64_CASE) || digit_cnt >= XSRF_B64_NUM2) && slash_cnt <= XSRF_B64_SLASH) return 1; return 0; } /* Looks like... not a numerical token at all. */ return 0; } /* Another helper for scrape_response(): examines all tags up until , then adds them as parameters to current request. */ static void collect_form_data(struct http_request* req, struct http_request* orig_req, struct http_response* orig_res, u8* cur_str, u8 is_post) { u8 has_xsrf = 0, pass_form = 0; u32 tag_cnt = 0; DEBUG("* collect_form_data() entered\n"); do { u8* tag_end; if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) { cur_str++; *tag_end = 0; if (!strncasecmp((char*)cur_str, "/form", 5)) { *tag_end = '>'; goto final_checks; } if (ISTAG(cur_str, "input") || ISTAG(cur_str, "textarea") || ISTAG(cur_str, "select")) { u8 *tag_name, *tag_value, *tag_type, *clean_name = NULL, *clean_value = NULL; FIND_AND_MOVE(tag_name, cur_str, "name="); FIND_AND_MOVE(tag_value, cur_str, "value="); FIND_AND_MOVE(tag_type, cur_str, "type="); if (!tag_name) goto next_tag; EXTRACT_ALLOC_VAL(tag_name, tag_name); clean_name = html_decode_param(tag_name, 0); ck_free(tag_name); tag_name = 0; if (tag_value) { EXTRACT_ALLOC_VAL(tag_value, tag_value); clean_value = html_decode_param(tag_value, 0); ck_free(tag_value); tag_value = 0; } if (tag_type) EXTRACT_ALLOC_VAL(tag_type, tag_type); else tag_type = ck_strdup((u8*)"text"); tag_cnt++; if (!strcasecmp((char*)tag_type, "file")) { if (!is_post) { ck_free(req->method); req->method = ck_strdup((u8*)"POST"); is_post = 1; } set_value(PARAM_POST_F, clean_name, clean_value ? clean_value : (u8*)"", 0, &req->par); } else if (!strcasecmp((char*)tag_type, "reset")) { /* Do nothing - do not store. */ tag_cnt--; } else if (!strcasecmp((char*)tag_type, "button") || !strcasecmp((char*)tag_type, "submit")) { set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name, clean_value ? clean_value : (u8*)"", 0, &req->par); } else if (!strcasecmp((char*)tag_type, "checkbox")) { /* Turn checkboxes on. */ set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name, (u8*)"on", 0, &req->par); } else { u8* use_value = clean_value; /* Don't second-guess hidden fields. */ if (strcasecmp((char*)tag_type, "hidden") && (!use_value || !use_value[0])) { use_value = make_up_form_value(clean_name, orig_req, orig_res); } else { if (!use_value) use_value = (u8*)""; } /* Radio buttons are rolled back into a single parameter because we always replace offset 0 for given clean_name. */ set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name, use_value, 0, &req->par); if (!strcasecmp((char*)tag_type, "hidden") && maybe_xsrf(use_value)) has_xsrf = 1; } if (inl_strcasestr(tag_name, (u8*) "passw")) pass_form = 1; ck_free(tag_name); ck_free(tag_type); ck_free(tag_value); ck_free(clean_name); ck_free(clean_value); } next_tag: *tag_end = '>'; } else tag_end = cur_str; /* Skip to next tag. */ cur_str = (u8*)strchr((char*)tag_end + 1, '<'); } while (cur_str); final_checks: if (pass_form) { problem(PROB_PASS_FORM, req, orig_res, NULL, req->pivot, 0); } else { if (tag_cnt && !has_xsrf) problem(PROB_VULN_FORM, req, orig_res, NULL, req->pivot, 0); else problem(PROB_FORM, req, orig_res, NULL, req->pivot, 0); } } /* Helper for scrape_response() and content_checks: is the file mostly ASCII? */ static u8 is_mostly_ascii(struct http_response* res) { u32 i, total, printable = 0; if (res->doc_type) return (res->doc_type == 2); total = (res->pay_len > 128) ? 128 : res->pay_len; if (!total) { res->doc_type = 2; return 1; } for (i=0;ipayload[i] >= 0x20 && res->payload[i] <= 0x7f) || (res->payload[i] && strchr("\r\n", res->payload[i]))) printable++; if (printable * 100 / total < 90) { DEBUG("* looks like binary data (print = %u, total = %u)\n", printable, total); res->doc_type = 1; return 1; } DEBUG("* looks like text file (print = %u, total = %u)\n", printable, total); res->doc_type = 2; return 1; } /* Analyzes response headers (Location, etc), body to extract new links, keyword guesses. This code is designed to be simple and fast, but it does not even try to understand the intricacies of HTML or whatever the response might be wrapped in. */ void scrape_response(struct http_request* req, struct http_response* res) { struct http_request *base = NULL; u8* cur_str; u32 i; DEBUG_CALLBACK(req, res); if (no_parse || res->scraped) return; res->scraped = 1; /* Handle Location, Refresh headers first. */ if ((cur_str = GET_HDR((u8*)"Location", &res->hdr))) test_add_link(cur_str, req, res, 1, 1); if ((cur_str = GET_HDR((u8*)"Refresh", &res->hdr)) && (cur_str = (u8*)strchr((char*)cur_str, '='))) test_add_link(cur_str + 1, req, res, 1, 1); if (!res->payload || !is_mostly_ascii(res)) return; cur_str = res->payload; /* PASS 1: Do a simplified check to what looks like proper, known HTML parameters bearing URLs. Note that payload is conveniently NUL-terminated. */ do { u8 *tag_end; if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) { u32 link_type = 0; u8 set_base = 0, parse_form = 0; u8 *dirty_url = NULL, *clean_url = NULL, *meta_url = NULL; cur_str++; *tag_end = 0; /* Several tags we need to handle specially, either because they denote a particularly interesting content type (marked in link_type, see test_add_link()), or because they use a non-standard parameter for URL data. */ if (ISTAG(cur_str, "meta")) { link_type = 1; FIND_AND_MOVE(dirty_url, cur_str, "content="); if (dirty_url) { EXTRACT_ALLOC_VAL(meta_url, dirty_url); dirty_url = inl_strcasestr(meta_url, (u8*)"URL="); if (dirty_url) dirty_url += 4; } } else if (ISTAG(cur_str, "img")) { link_type = 2; FIND_AND_MOVE(dirty_url, cur_str, "src="); } else if (ISTAG(cur_str, "object") || ISTAG(cur_str, "embed") || ISTAG(cur_str, "applet") || ISTAG(cur_str, "iframe")) { link_type = 3; FIND_AND_MOVE(dirty_url, cur_str, "src="); if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "codebase="); } else if (ISTAG(cur_str, "param") && inl_strcasestr(cur_str, (u8*)"movie")) { link_type = 3; FIND_AND_MOVE(dirty_url, cur_str, "value="); } else if (ISTAG(cur_str, "script")) { link_type = 4; FIND_AND_MOVE(dirty_url, cur_str, "src="); } else if (ISTAG(cur_str, "link") && inl_strcasestr(cur_str, (u8*)"stylesheet")) { link_type = 4; FIND_AND_MOVE(dirty_url, cur_str, "href="); } else if (ISTAG(cur_str, "base")) { set_base = 1; FIND_AND_MOVE(dirty_url, cur_str, "href="); } else if (ISTAG(cur_str, "form")) { u8* method; parse_form = 1; FIND_AND_MOVE(dirty_url, cur_str, "action="); /* See if we need to POST this form or not. */ FIND_AND_MOVE(method, cur_str, "method="); if (method && *method) { if (strchr("\"'", *method)) method++; if (tolower(method[0]) == 'p') parse_form = 2; } } else { /* All other tags - other types, , - are handled in a generic way. */ FIND_AND_MOVE(dirty_url, cur_str, "href="); if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "src="); } /* If we found no URL to speak of, we're done. */ if (!dirty_url) { ck_free(meta_url); goto next_tag; } /* De-quotify and decode the value. */ EXTRACT_ALLOC_VAL(dirty_url, dirty_url); clean_url = html_decode_param(dirty_url, 0); ck_free(dirty_url); ck_free(meta_url); if (!*clean_url) goto next_tag; test_add_link(clean_url, base ? base : req, res, link_type, 1); /* If we are dealing with a tag, we need to create a new dummy request to use as a referrer. */ if (set_base) { struct http_request* n = ck_alloc(sizeof(struct http_request)); n->pivot = req->pivot; if (!parse_url(clean_url, n, base ? base : req)) base = n; } else if (parse_form) { /*
handling... */ struct http_request* n = ck_alloc(sizeof(struct http_request)); n->pivot = req->pivot; if (parse_form == 2) { ck_free(n->method); n->method = ck_strdup((u8*)"POST"); } /* Don't collect form fields, etc, if target is not within the scope anyway. */ DEBUG("* Found form: target %s method %s\n", clean_url, n->method); if (!parse_url(clean_url, n, base ? base : req) && url_allowed(n) && R(100) < crawl_prob && !no_forms) { collect_form_data(n, req, res, tag_end + 1, (parse_form == 2)); maybe_add_pivot(n, NULL, 2); } destroy_request(n); } next_tag: *tag_end = '>'; if (clean_url) ck_free(clean_url); } else tag_end = cur_str; /* Skip to next tag. */ cur_str = (u8*)strchr((char*)tag_end + 1, '<'); } while (cur_str); cur_str = res->payload; /* PASS 2: Extract links from non-HTML body, JS, etc; add keywords. */ do { u32 clean_len, alpha_cnt = 0, lower_cnt = 0, lead = 0, seg_len; u8 *ext, *token, *clean_url, *tmp, *pos_at; u8 last = 0, saved; /* Skip leading whitespaces, terminators. */ seg_len = strspn((char*)cur_str, " \t\r\n<>\"'"); cur_str += seg_len; /* If there's a = character preceeded only by alnums or underscores, skip this chunk (to handle something=http://www.example.com/ neatly) */ tmp = cur_str; while (*tmp && (isalnum(*tmp) || *tmp == '_')) tmp++; if (*tmp == '=') cur_str = tmp + 1; if (!*cur_str) break; seg_len = strcspn((char*)cur_str + 1, " \t\r\n<>\"'") + 1; /* Extract the segment, decoding JS and HTML on the go. */ saved = cur_str[seg_len]; cur_str[seg_len] = 0; clean_url = html_decode_param(cur_str, 1); cur_str[seg_len] = saved; tmp = clean_url; /* We want the entire extracted segment to consist only of nice characters we would expect in a URL. If not, panic. */ while (*tmp) { if (!isalnum(*tmp) && !isspace(*tmp) && !strchr("_-.:@/?&=#%;$!+~()[]{}\\|^*", *tmp)) goto url_done; tmp++; } clean_len = tmp - clean_url; /* Strip trailing characters that are unlikely to appear in valid URLs anyway, and could be a part of some message. */ while (clean_len && strchr(".,:?!-$&", clean_url[clean_len-1])) clean_len--; clean_url[clean_len] = 0; /* URL CHECK 1: Things that start with ./ or ../ are obviously URLs. We do not make assumptins about syntax such as /foo/, though, as it could very well be a regex in a JS block. */ if (!strncmp((char*)clean_url, "./", 2) || !strncmp((char*)clean_url, "../", 3)) { add_link: test_add_link(clean_url, base ? base : req, res, 0, 0); goto url_done; } /* URL CHECK 2: Things that start with :// are quite clearly URLs. */ while (clean_url[lead] && (isalnum(clean_url[lead]))) lead++; if (lead && !strncmp((char*)clean_url + lead, "://", 3) && clean_url[lead + 3]) goto add_link; /* URL CHECK 3: If the result ends with ., and contains a slash anywhere, assume URL (without that slash check, we would get duped by 'domain.com'. */ if (strchr((char*)clean_url, '/')) { i = 0; while ((ext = wordlist_get_extension(i++))) { u32 ext_len = strlen((char*)ext); if (clean_len > ext_len + 2 && !strncasecmp((char*)clean_url + clean_len - ext_len, (char*)ext, ext_len) && clean_url[clean_len - ext_len - 1] == '.') goto add_link; } } if (!(pos_at = (u8*)strchr((char*)clean_url, '@'))) { /* URL CHECK 4: ?= syntax is strongly indicative of an URL (only if not e-mail). */ u8 *pos_qmark = (u8*)strchr((char*)clean_url, '?'), *pos_eq = (u8*)strchr((char*)clean_url, '='), *pos_amp = (u8*)strchr((char*)clean_url, '&'); if (pos_qmark && pos_eq && pos_qmark + 1 < pos_eq && pos_eq[1] && (!pos_amp || pos_amp > pos_eq) && pos_eq[1] != '=' && !strchr((char*)clean_url, '(') && !strchr((char*)clean_url, '[') && (u8*)strchr((char*)clean_url, ':') < pos_eq) goto add_link; } else if (log_ext_urls) { /* EMAIL CHECK: If the string uses a limited set of characters, starts with alpha, ahs at least one period after @, and both @ and the period are immediately followed by alpha - assume e-mail. */ u8 *pos_dot, *pos_qmark = (u8*)strchr((char*)clean_url, '?'); if (pos_qmark && pos_qmark > pos_at) *pos_qmark = 0; lead = 0; while (clean_url[lead] && (isalnum(clean_url[lead]) || strchr("._-+@", clean_url[lead]))) lead++; pos_dot = (u8*)strchr((char*)pos_at + 1, '.'); if (!clean_url[lead] && pos_at && pos_dot && isalpha(clean_url[0]) && isalpha(pos_at[1]) && isalpha(pos_dot[1])) { problem(PROB_MAIL_ADDR, req, res, clean_url, host_pivot(req->pivot), 0); goto url_done; } } /* LAST CHANCE: Try to detect base64; if the segment does not look like base64, add each segment to try_list. */ tmp = clean_url; while (*tmp) { if (isalpha(*tmp)) { alpha_cnt++; if (islower(*tmp)) lower_cnt++; } tmp++; } if (alpha_cnt > 20 && (lower_cnt * 100 / alpha_cnt) > 35 && (lower_cnt * 100 / alpha_cnt) < 65) goto url_done; token = clean_url; do { while (*token && !isalnum(*token)) token++; tmp = token; while (*tmp && isalnum(*tmp)) tmp++; if (!*tmp) last = 1; *tmp = 0; if (R(100) < GUESS_PROB) wordlist_add_guess(token); token = tmp + 1; } while (!last); url_done: ck_free(clean_url); cur_str += seg_len; } while (*cur_str); if (base) destroy_request(base); /* Phew! */ } /* Returns 1 if document looks like standalone CSS. */ static u8 is_css(struct http_response* res) { u8* text = res->payload; u8 first = 0, last = 0; if (res->css_type) return (res->css_type == 2); if (!text || !is_mostly_ascii(res)) return 0; do { /* Skip whitespaces... */ while (isspace(*text)) text++; /* Skip HTML, CSS comments. */ if (!strncmp((char*)text, "