1.43b: Reduce the likelyhood of crawl loops

- Improvement to reduce the likelihood of crawl loops: do not
    extract links if current page identical to parent.
This commit is contained in:
Steve Pinkham 2010-06-29 10:08:21 -04:00
parent d0ce4e0db9
commit 98ffe73aba
5 changed files with 18 additions and 5 deletions

View File

@ -1,3 +1,9 @@
Version 1.43b:
--------------
- Improvement to reduce the likelihood of crawl loops: do not
extract links if current page identical to parent.
Version 1.42b:
--------------

View File

@ -646,6 +646,13 @@ void scrape_response(struct http_request* req, struct http_response* res) {
res->scraped = 1;
/* Do not scrape pages that are identical to their parent. */
if (RPAR(req)->res && same_page(&res->sig, &RPAR(req)->res->sig)) {
DEBUG("* Not extracting links because page looks the same as parent.\n");
return;
}
/* Handle Location, Refresh headers first. */
if ((cur_str = GET_HDR((u8*)"Location", &res->hdr)))

View File

@ -23,7 +23,7 @@
#ifndef _HAVE_CONFIG_H
#define _HAVE_CONFIG_H
#define VERSION "1.42b"
#define VERSION "1.43b"
#define USE_COLOR 1 /* Use terminal colors */

View File

@ -2445,7 +2445,7 @@ void http_stats(u64 st_time) {
struct timeval tv;
gettimeofday(&tv, NULL);
en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
en_time = tv.tv_sec * 1000L + tv.tv_usec / 1000L;
SAY(cLBL "Scan statistics:\n\n"
cGRA " Scan time : " cNOR "%u:%02u:%02u.%04u\n"

View File

@ -447,7 +447,7 @@ int main(int argc, char** argv) {
fcntl(0, F_SETFL, O_NONBLOCK);
gettimeofday(&tv, NULL);
st_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
st_time = tv.tv_sec * 1000L + tv.tv_usec / 1000L;
if (!be_quiet) SAY("\x1b[H\x1b[J");
else SAY(cLGN "[*] " cBRI "Scan in progress, please stay tuned...\n");
@ -456,7 +456,7 @@ int main(int argc, char** argv) {
u8 keybuf[8];
if (be_quiet || ((loop_cnt++ % 20) && !show_once)) continue;
if (be_quiet || ((loop_cnt++ % 100) && !show_once)) continue;
if (clear_screen) {
SAY("\x1b[H\x1b[2J");
@ -487,7 +487,7 @@ int main(int argc, char** argv) {
}
gettimeofday(&tv, NULL);
en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
en_time = tv.tv_sec * 1000L + tv.tv_usec / 1000L;
SAY("\n");