commit fcf0650b5ee378f7baf9cfddb71504f8316184fc Author: Steve Pinkham Date: Sat Mar 20 11:46:08 2010 -0400 Version 1.00b as released diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c897eda --- /dev/null +++ b/Makefile @@ -0,0 +1,54 @@ +# +# skipfish - Makefile +# ------------------- +# +# Author: Michal Zalewski +# +# Copyright 2009, 2010 by Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +PROGNAME = skipfish + +OBJFILES = http_client.c database.c crawler.c analysis.c report.c +INCFILES = alloc-inl.h string-inl.h debug.h types.h http_client.h \ + database.h crawler.h analysis.h config.h report.h + +CFLAGS_GEN = -Wall -funsigned-char -g -ggdb +CFLAGS_DBG = $(CFLAGS_GEN) -DLOG_STDERR=1 -DDEBUG_ALLOCATOR=1 +CFLAGS_OPT = $(CFLAGS_GEN) -O3 -Wno-format +LDFLAGS = -lcrypto -lssl -lidn -lz + +all: $(PROGNAME) + +$(PROGNAME): $(PROGNAME).c $(OBJFILES) $(INCFILES) + $(CC) $(PROGNAME).c -o $(PROGNAME) $(CFLAGS_OPT) $(OBJFILES) $(LDFLAGS) + @echo + @echo "NOTE: See dictionaries/README-FIRST to pick a dictionary for the tool." + @echo + +debug: $(PROGNAME).c $(OBJFILES) $(INCFILES) + $(CC) $(PROGNAME).c -o $(PROGNAME) $(CFLAGS_DBG) $(OBJFILES) $(LDFLAGS) + +clean: + rm -f $(PROGNAME) *.exe *.o *~ a.out core core.[1-9][0-9]* *.stackdump \ + LOG same_test + rm -rf tmpdir + +same_test: same_test.c $(OBJFILES) $(INCFILES) + $(CC) same_test.c -o same_test $(CFLAGS_DBG) $(OBJFILES) $(LDFLAGS) + +publish: clean + cd ..; tar cfvz ~/www/skipfish.tgz skipfish + chmod 644 ~/www/skipfish.tgz diff --git a/README b/README new file mode 100644 index 0000000..a01040e --- /dev/null +++ b/README @@ -0,0 +1,484 @@ +=========================================== +skipfish - web application security scanner +=========================================== + + http://code.google.com/p/skipfish/ + + * Written and maintained by Michal Zalewski . + * Copyright 2009, 2010 Google Inc, rights reserved. + * Released under terms and conditions of the Apache License, version 2.0. + +-------------------- +1. What is skipfish? +-------------------- + +Skipfish is an active web application security reconnaissance tool. It prepares +an interactive sitemap for the targeted site by carrying out a recursive crawl +and dictionary-based probes. The resulting map is then annotated with the +output from a number of active (but hopefully non-disruptive) security checks. +The final report generated by the tool is meant to serve as a foundation for +professional web application security assessments. +Why should I bother with this particular tool? + +A number of commercial and open source tools with analogous functionality is +readily available (e.g., Nikto, Nessus); stick to the one that suits you best. +That said, skipfish tries to address some of the common problems associated +with web security scanners. Specific advantages include: + + * High performance: 500+ requests per second against responsive Internet + targets, 2000+ requests per second on LAN / MAN networks, and 7000+ requests + against local instances has been observed, with a very modest CPU, network, + and memory footprint. This can be attributed to: + + - Multiplexing single-thread, fully asynchronous network I/O and data + processing model that eliminates memory management, scheduling, and IPC + inefficiencies present in some multi-threaded clients. + + - Advanced HTTP/1.1 features such as range requests, content + compression, and keep-alive connections, as well as forced response size + limiting, to keep network-level overhead in check. + + - Smart response caching and advanced server behavior heuristics are + used to minimize unnecessary traffic. + + - Performance-oriented, pure C implementation, including a custom + HTTP stack. + + * Ease of use: skipfish is highly adaptive and reliable. The scanner + features: + + - Heuristic recognition of obscure path- and query-based parameter + handling schemes. + + - Graceful handling of multi-framework sites where certain paths obey + a completely different semantics, or are subject to different filtering + rules. + + - Automatic wordlist construction based on site content analysis. + + - Probabilistic scanning features to allow periodic, time-bound + assessments of arbitrarily complex sites. + + * Well-designed security checks: the tool is meant to provide accurate and + meaningful results: + + - Three-step differential probes are preferred to signature checks + for detecting vulnerabilities. + + - Ratproxy-style logic is used to spot subtle security problems: + cross-site request forgery, cross-site script inclusion, mixed content, + issues MIME- and charset mismatches, incorrect caching directive, etc. + + - Bundled security checks are designed to handle tricky scenarios: + stored XSS (path, parameters, headers), blind SQL or XML injection, or + blind shell injection. + + - Report post-processing drastically reduces the noise caused by any + remaining false positives or server gimmicks by identifying repetitive + patterns. + +That said, skipfish is not a silver bullet, and may be unsuitable for certain +purposes. For example, it does not satisfy most of the requirements outlined in +WASC Web Application Security Scanner Evaluation Criteria (some of them on +purpose, some out of necessity); and unlike most other projects of this type, +it does not come with an extensive database of known vulnerabilities for +banner-type checks. + +----------------------------------------------------- +2. Most curious! What specific tests are implemented? +----------------------------------------------------- + +A rough list of the security checks offered by the tool is outlined below. + + * High risk flaws (potentially leading to system compromise): + + - Server-side SQL injection (including blind vectors, numerical + parameters). + - Explicit SQL-like syntax in GET or POST parameters. + - Server-side shell command injection (including blind vectors). + - Server-side XML / XPath injection (including blind vectors). + - Format string vulnerabilities. + - Integer overflow vulnerabilities. + + * Medium risk flaws (potentially leading to data compromise): + + - Stored and reflected XSS vectors in document body (minimal JS XSS + support present). + - Stored and reflected XSS vectors via HTTP redirects. + - Stored and reflected XSS vectors via HTTP header splitting. + - Directory traversal (including constrained vectors). + - Assorted file POIs (server-side sources, configs, etc). + - Attacker-supplied script and CSS inclusion vectors (stored and + reflected). + - External untrusted script and CSS inclusion vectors. + - Mixed content problems on script and CSS resources (optional). + - Incorrect or missing MIME types on renderables. + - Generic MIME types on renderables. + - Incorrect or missing charsets on renderables. + - Conflicting MIME / charset info on renderables. + - Bad caching directives on cookie setting responses. + + * Low risk issues (limited impact or low specificity): + + - Directory listing bypass vectors. + - Redirection to attacker-supplied URLs (stored and reflected). + - Attacker-supplied embedded content (stored and reflected). + - External untrusted embedded content. + - Mixed content on non-scriptable subresources (optional). + - HTTP credentials in URLs. + - Expired or not-yet-valid SSL certificates. + - HTML forms with no XSRF protection. + - Self-signed SSL certificates. + - SSL certificate host name mismatches. + - Bad caching directives on less sensitive content. + + * Internal warnings: + + - Failed resource fetch attempts. + - Exceeded crawl limits. + - Failed 404 behavior checks. + - IPS filtering detected. + - Unexpected response variations. + - Seemingly misclassified crawl nodes. + + * Non-specific informational entries: + + - General SSL certificate information. + - Significantly changing HTTP cookies. + - Changing Server, Via, or X-... headers. + - New 404 signatures. + - Resources that cannot be accessed. + - Resources requiring HTTP authentication. + - Broken links. + - Server errors. + - All external links not classified otherwise (optional). + - All external e-mails (optional). + - All external URL redirectors (optional). + - Links to unknown protocols. + - Form fields that could not be autocompleted. + - All HTML forms detected. + - Password entry forms (for external brute-force). + - Numerical file names (for external brute-force). + - User-supplied links otherwise rendered on a page. + - Incorrect or missing MIME type on less significant content. + - Generic MIME type on less significant content. + - Incorrect or missing charset on less significant content. + - Conflicting MIME / charset information on less significant content. + - OGNL-like parameter passing conventions. + +Along with a list of identified issues, skipfish also provides summary +overviews of document types and issue types found; and an interactive sitemap, +with nodes discovered through brute-force denoted in a distinctive way. + +----------------------------------------------------------- +3. All right, I want to try it out. What do I need to know? +----------------------------------------------------------- + +First and foremost, please do not be evil. Use skipfish only against services +you own, or have a permission to test. + +Keep in mind that all types of security testing can be disruptive. Although the +scanner is designed not to carry out disruptive malicious attacks, it may +accidentally interfere with the operations of the site. You must accept the +risk, and plan accordingly. Run the scanner against test instances where +feasible, and be prepared to deal with the consequences if things go wrong. + +Also note that the tool is meant to be used by security professionals, and is +experimental in nature. It may return false positives or miss obvious security +problems - and even when it operates perfectly, it is simply not meant to be a +point-and-click application. Do not rely on its output at face value. +How to run the scanner? + +To compile it, simply unpack the archive and try make. Chances are, you will +need to install libidn first. + +Next, you need to copy the desired dictionary file from dictionaries/ to +skipfish.wl. Please read dictionaries/README-FIRST carefully to make the right +choice. This step has a profound impact on the quality of scan results later on. + +Once you have the dictionary selected, you can try: + +$ ./skipfish -o output_dir http://www.example.com/some/starting/path.txt + +Note that you can provide more than one starting URL if so desired; all of them +will be crawled. + +In the example above, skipfish will scan the entire www.example.com (including +services on other ports, if linked to from the main page), and write a report +to output_dir/index.html. You can then view this report with your favorite +browser (JavaScript must be enabled). The index.html file is static; actual +results are stored as a hierarchy of JSON files, suitable for machine +processing if needs be. + +Some sites may require authentication; for simple HTTP credentials, you can try: + +$ ./skipfish -A user:pass ...other parameters... + +Alternatively, if the site relies on HTTP cookies instead, log in in your +browser or using a simple curl script, and then provide skipfish with a session +cookie: + +$ ./skipfish -C name=val ...other parameters... + +Other session cookies may be passed the same way, one per each -C option. + +Certain URLs on the site may log out your session; you can combat this in two +ways: by using the -N option, which causes the scanner to reject attempts to +set or delete cookies; or with the -X parameter, which prevents matching URLs +from being fetched: + +$ ./skipfish -X /logout/logout.aspx ...other parameters... + +The -X option is also useful for speeding up your scans by excluding /icons/, +/doc/, /manuals/, and other standard, mundane locations along these lines. In +general, you can use -X, plus -I (only spider URLs matching a substring) and -S +(ignore links on pages where a substring appears in response body) to limit the +scope of a scan any way you like - including restricting it only to a specific +protocol and port: + +$ ./skipfish -I http://example.com:1234/ ...other parameters... + +Another useful scoping option is -D - allowing you to specify additional hosts +or domains to consider in-scope for the test. By default, all hosts appearing +in the command-line URLs are added to the list - but you can use -D to broaden +these rules, for example: + +$ ./skipfish -D test2.example.com -o output-dir http://test1.example.com/ + +...or, for a domain wildcard match, use: + +$ ./skipfish -D .example.com -o output-dir http://test1.example.com/ + +In some cases, you do not want to actually crawl a third-party domain, but you +trust the owner of that domain enough not to worry about cross-domain content +inclusion from that location. To suppress warnings, you can use the -B option, +for example: + +$ ./skipfish -B .google-analytics.com -B .googleapis.com ...other parameters... + +By default, skipfish sends minimalistic HTTP headers to reduce the amount of +data exchanged over the wire; some sites examine User-Agent strings or header +ordering to reject unsupported clients, however. In such a case, you can use -b +ie or -b ffox to mimic one of the two popular browsers. + +When it comes to customizing your HTTP requests, you can also use the -H option +to insert any additional, non-standard headers; or -F to define a custom +mapping between a host and an IP (bypassing the resolver). The latter feature +is particularly useful for not-yet-launched or legacy services. + +Some sites may be too big to scan in a reasonable timeframe. If the site +features well-defined tarpits - for example, 100,000 nearly identical user +profiles as a part of a social network - these specific locations can be +excluded with -X or -S. In other cases, you may need to resort to other +settings: -d limits crawl depth to a specified number of subdirectories; -c +limits the number of children per directory; and -r limits the total number of +requests to send in a scan. + +An interesting option is available for repeated assessments: -p. By specifying +a percentage between 1 and 100%, it is possible to tell the crawler to follow +fewer than 100% of all links, and try fewer than 100% of all dictionary +entries. This - naturally - limits the completeness of a scan, but unlike most +other settings, it does so in a balanced, non-deterministic manner. It is +extremely useful when you are setting up time-bound, but periodic assessments +of your infrastructure. Another related option is -q, which sets the initial +random seed for the crawler to a specified value. This can be used to exactly +reproduce a previous scan to compare results. Randomness is relied upon most +heavily in the -p mode, but also for making a couple of other scan management +decisions elsewhere. + +Some particularly complex (or broken) services may involve a very high number +of identical or nearly identical pages. Although these occurrences are by +default grayed out in the report, they still use up some screen estate and take +a while to process on JavaScript level. In such extreme cases, you may use the +-Q option to suppress reporting of duplicate nodes altogether, before the +report is written. This may give you a less comprehensive understanding of how +the site is organized, but has no impact on test coverage. + +In certain quick assessments, you might also have no interest in paying any +particular attention to the desired functionality of the site - hoping to +explore non-linked secrets only. In such a case, you may specify -P to inhibit +all HTML parsing. This limits the coverage and takes away the ability for the +scanner to learn new keywords by looking at the HTML, but speeds up the test +dramatically. Another similarly crippling option that reduces the risk of +persistent effects of a scan is -O, which inhibits all form parsing and +submission steps. + +By default, skipfish complains loudly about all MIME or character set +mismatches on renderable documents, and classifies many of them as "medium +risk"; this is because, if any user-controlled content is returned, the +situation could lead to cross-site scripting attacks in certain browsers. On +some poorly designed and maintained sites, this may contribute too much noise; +if so, you may use -J to mark these issues as "low risk" unless the scanner can +explicitly sees its own user input being echoed back on the resulting page. +This may miss many subtle attack vectors, though. + +Some sites that handle sensitive user data care about SSL - and about getting +it right. Skipfish may optionally assist you in figuring out problematic mixed +content scenarios - use the -M option to enable this. The scanner will complain +about situations such as http:// scripts being loaded on https:// pages - but +will disregard non-risk scenarios such as images. + +Likewise, certain pedantic sites may care about cases where caching is +restricted on HTTP/1.1 level, but no explicit HTTP/1.0 caching directive is +given on specifying -E in the command-line causes skipfish to log all such +cases carefully. + +Lastly, in some assessments that involve self-contained sites without extensive +user content, the auditor may care about any external e-mails or HTTP links +seen, even if they have no immediate security impact. Use the -U option to have +these logged. + +Dictionary management is a special topic, and - as mentioned - is covered in +more detail in dictionaries/README-FIRST. Please read that file before +proceeding. Some of the relevant options include -W to specify a custom +wordlist, -L to suppress auto-learning, -V to suppress dictionary updates, -G +to limit the keyword guess jar size, -R to drop old dictionary entries, and -Y +to inhibit expensive $keyword.$extension fuzzing. + +Skipfish also features a form auto-completion mechanism in order to maximize +scan coverage. The values should be non-malicious, as they are not meant to +implement security checks - but rather, to get past input validation logic. You +can define additional rules, or override existing ones, with the -T option (-T +form_field_name=field_value, e.g. -T login=test123 -T password=test321 - +although note that -C and -A are a much better method of logging in). + +There is also a handful of performance-related options. Use -g to set the +maximum number of connections to maintain, globally, to all targets (it is +sensible to keep this under 50 or so to avoid overwhelming the TCP/IP stack on +your system or on the nearby NAT / firewall devices); and -m to set the per-IP +limit (experiment a bit: 2-4 is usually good for localhost, 4-8 for local +networks, 10-20 for external targets, 30+ for really lagged or non-keep-alive +hosts). You can also use -w to set the I/O timeout (i.e., skipfish will wait +only so long for an individual read or write), and -t to set the total request +timeout, to account for really slow or really fast sites. + +Lastly, -f controls the maximum number of consecutive HTTP errors you are +willing to see before aborting the scan; and -s sets the maximum length of a +response to fetch and parse (longer responses will be truncated). + +-------------------------------- +4. But seriously, how to run it? +-------------------------------- + +A standard, authenticated scan of a well-designed and self-contained site +(warns about all external links, e-mails, mixed content, and caching header +issues): + +$ ./skipfish -MEU -C "AuthCookie=value" -X /logout.aspx -o output_dir \ + http://www.example.com/ + +Five-connection crawl, but no brute-force; pretending to be MSIE and caring +less about ambiguous MIME or character set mismatches: + +$ ./skipfish -m 5 -LVJ -W /dev/null -o output_dir -b ie http://www.example.com/ + +Brute force only (no HTML link extraction), trusting links within example.com +and timing out after 5 seconds: + +$ ./skipfish -B .example.com -O -o output_dir -t 5 http://www.example.com/ + +For a short list of all command-line options, try ./skipfish -h. + +---------------------------------------------------- +5. How to interpret and address the issues reported? +---------------------------------------------------- + +Most of the problems reported by skipfish should self-explanatory, assuming you +have a good gasp of the fundamentals of web security. If you need a quick +refresher on some of the more complicated topics, such as MIME sniffing, you +may enjoy our comprehensive Browser Security Handbook as a starting point: + + http://code.google.com/p/browsersec/ + +If you still need assistance, there are several organizations that put a +considerable effort into documenting and explaining many of the common web +security threats, and advising the public on how to address them. I encourage +you to refer to the materials published by OWASP and Web Application Security +Consortium, amongst others: + + * http://www.owasp.org/index.php/Category:Principle + * http://www.owasp.org/index.php/Category:OWASP_Guide_Project + * http://www.webappsec.org/projects/articles/ + +Although I am happy to diagnose problems with the scanner itself, I regrettably +cannot offer any assistance with the inner wokings of third-party web +applications. + +--------------------------------------- +6. Known limitations / feature wishlist +--------------------------------------- + +Below is a list of features currently missing in skipfish. If you wish to +improve the tool by contributing code in one of these areas, please let me know: + + * Buffer overflow checks: after careful consideration, I suspect there is + no reliable way to test for buffer overflows remotely. Much like the actual + fault condition we are looking for, proper buffer size checks may also + result in uncaught exceptions, 500 messages, etc. I would love to be proved + wrong, though. + + * Fully-fledged JavaScript XSS detection: several rudimentary checks are + present in the code, but there is no proper script engine to evaluate + expressions and DOM access built in. + + * Variable length encoding character consumption / injection bugs: these + problems seem to be largely addressed on browser level at this point, so + they were much lower priority at the time of this writing. + + * Security checks and link extraction for third-party, plugin-based content + (Flash, Java, PDF, etc). + + * Password brute-force and numerical filename brute-force probes. + + * Search engine integration (vhosts, starting paths). + + * VIEWSTATE decoding. + + * NTLM and digest authentication. + + * Proxy support: somewhat incompatible with performance control features + currently employed by skipfish; but in the long run, should be provided as + a last-resort option. + + * Scan resume option. + + * Standalone installation (make install) support. + + * Config file support. + +------------------------------------- +7. Oy! Something went horribly wrong! +------------------------------------- + +There is no web crawler so good that there wouldn't be a web framework to one +day set it on fire. If you encounter what appears to be bad behavior (e.g., a +scan that takes forever and generates too many requests, completely bogus nodes +in scan output, or outright crashes), please recompile the scanner with: + +$ make clean debug + +...and re-run it this way: + +$ ./skipfish [...previous options...] 2>logfile.txt + +You can then inspect logfile.txt to get an idea what went wrong; if it looks +like a scanner problem, please scrub any sensitive information from the log +file and send it to the author. + +If the scanner crashed, please recompile it as indicated above, and then type: + +$ ulimit -c unlimited +$ ./skipfish [...previous options...] 2>logfile.txt +$ gdb --batch -ex back ./skipfish core + +...and be sure to send the author the output of that last command as well. + +----------------------- +8. Credits and feedback +----------------------- + +Skipfish is made possible thanks to the contributions of, and valuable feedback +from, Google's information security engineering team. + +If you have any bug reports, questions, suggestions, or concerns regarding the +application, the author can be reached at lcamtuf@google.com. diff --git a/alloc-inl.h b/alloc-inl.h new file mode 100644 index 0000000..3b90fb7 --- /dev/null +++ b/alloc-inl.h @@ -0,0 +1,294 @@ +/* + skipfish - error-checking, memory-zeroing alloc routines + -------------------------------------------------------- + + Note: when DEBUG_ALLOCATOR is set, a horribly slow but pedantic + allocation tracker is used. Don't enable this in production. + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_ALLOC_INL_H +#define _HAVE_ALLOC_INL_H + +#include + +#ifdef __APPLE__ +#include +#else +#include +#endif /* __APPLE__ */ + +#include + +#include "config.h" +#include "types.h" +#include "debug.h" + +#define ALLOC_CHECK_SIZE(_s) do { \ + if ((_s) > MAX_ALLOC) \ + FATAL("bad alloc request: %u bytes", (_s)); \ + } while (0) + +#define ALLOC_CHECK_RESULT(_r,_s) do { \ + if (!(_r)) \ + FATAL("out of memory: can't allocate %u bytes", (_s)); \ + } while (0) + +#ifdef __APPLE__ +#define malloc_usable_size malloc_size +#endif /* __APPLE__ */ + +static inline void* __DFL_ck_alloc(u32 size) { + void* ret; + u32 usable; + + if (!size) return NULL; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size); + ALLOC_CHECK_RESULT(ret, size); + + usable = malloc_usable_size(ret); + memset(ret, 0, usable); + + return ret; +} + + +static inline void* __DFL_ck_realloc(void* orig, u32 size) { + void* ret; + u32 old_usable = 0, + new_usable; + + if (!size) { + free(orig); + return NULL; + } + + if (orig) old_usable = malloc_usable_size(orig); + + ALLOC_CHECK_SIZE(size); + ret = realloc(orig, size); + ALLOC_CHECK_RESULT(ret, size); + + new_usable = malloc_usable_size(ret); + + if (new_usable > old_usable) + memset(ret + old_usable, 0, new_usable - old_usable); + + return ret; +} + + +static inline void* __DFL_ck_strdup(u8* str) { + void* ret; + u32 size; + u32 usable; + + if (!str) return NULL; + + size = strlen((char*)str) + 1; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size); + ALLOC_CHECK_RESULT(ret, size); + + usable = malloc_usable_size(ret); + + memcpy(ret, str, size); + + if (usable > size) + memset(ret + size, 0, usable - size); + + return ret; +} + +static inline void* __DFL_ck_memdup(u8* mem, u32 size) { + void* ret; + u32 usable; + + if (!mem || !size) return NULL; + + ALLOC_CHECK_SIZE(size); + ret = malloc(size); + ALLOC_CHECK_RESULT(ret, size); + + usable = malloc_usable_size(ret); + + memcpy(ret, mem, size); + + if (usable > size) + memset(ret + size, 0, usable - size); + + return ret; +} + + +#ifndef DEBUG_ALLOCATOR + +/* Non-debugging mode - straightforward aliasing. */ + +#define ck_alloc __DFL_ck_alloc +#define ck_realloc __DFL_ck_realloc +#define ck_strdup __DFL_ck_strdup +#define ck_memdup __DFL_ck_memdup +#define ck_free free + +#else + +/* Debugging mode - include additional structures and support code. */ + +#define ALLOC_BUCKETS 1024 + +struct __AD_trk_obj { + void *ptr; + char *file, *func; + u32 line; +}; + + +extern struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS]; +extern u32 __AD_trk_cnt[ALLOC_BUCKETS]; + +#define __AD_H(_ptr) (((((u32)(long)(_ptr)) >> 16) ^ ((u32)(long)(_ptr))) % \ + ALLOC_BUCKETS) + +/* Adds a new entry to the list of allocated objects. */ + +static inline void __AD_alloc_buf(void* ptr, const char* file, const char* func, + u32 line) { + u32 i, b; + + if (!ptr) return; + + b = __AD_H(ptr); + + for (i=0;i<__AD_trk_cnt[b];i++) + if (!__AD_trk[b][i].ptr) { + __AD_trk[b][i].ptr = ptr; + __AD_trk[b][i].file = (char*)file; + __AD_trk[b][i].func = (char*)func; + __AD_trk[b][i].line = line; + return; + } + + __AD_trk[b] = __DFL_ck_realloc(__AD_trk[b], + (__AD_trk_cnt[b] + 1) * sizeof(struct __AD_trk_obj)); + + __AD_trk[b][__AD_trk_cnt[b]].ptr = ptr; + __AD_trk[b][__AD_trk_cnt[b]].file = (char*)file; + __AD_trk[b][__AD_trk_cnt[b]].func = (char*)func; + __AD_trk[b][__AD_trk_cnt[b]].line = line; + __AD_trk_cnt[b]++; + +} + + +/* Removes entry from the list of allocated objects. */ + +static inline void __AD_free_buf(void* ptr, const char* file, const char* func, + u32 line) { + u32 i, b; + + if (!ptr) return; + + b = __AD_H(ptr); + + for (i=0;i<__AD_trk_cnt[b];i++) + if (__AD_trk[b][i].ptr == ptr) { + __AD_trk[b][i].ptr = 0; + return; + } + + WARN("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)", + func, file, line); + +} + + +/* Does a final report on all non-deallocated objects. */ + +static inline void __AD_report(void) { + u32 i, b; + + fflush(0); + + for (b=0;b + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#define _VIA_ANALYSIS_C + +#include "debug.h" +#include "config.h" +#include "types.h" +#include "http_client.h" +#include "database.h" +#include "crawler.h" +#include "analysis.h" + +u8 no_parse, /* Disable HTML link detection */ + warn_mixed, /* Warn on mixed content */ + log_ext_urls, /* Log all external URLs */ + no_forms, /* Do not submit forms */ + relaxed_mime, /* Relax about cset / mime */ + pedantic_cache; /* Match HTTP/1.0 and HTTP/1.1 */ + +/* Form autofill hints: */ + +static u8** addl_form_name; +static u8** addl_form_value; +static u32 addl_form_cnt; + + +/* Runs some rudimentary checks on top-level pivot HTTP responses. */ + +void pivot_header_checks(struct http_request* req, + struct http_response* res) { + + u32 i; + u8 *par_hdr, *cur_hdr; + + DEBUG_CALLBACK(req, res); + + /* Server: change. */ + + cur_hdr = GET_HDR((u8*)"Server", &res->hdr); + if (!RPAR(req)->res) par_hdr = NULL; + else par_hdr = GET_HDR((u8*)"Server", &RPAR(req)->res->hdr); + + if (!cur_hdr) cur_hdr = (u8*)"[none]"; + if (!par_hdr) par_hdr = (u8*)"[none]"; + + if (strcmp((char*)cur_hdr, (char*)par_hdr)) + problem(PROB_SERVER_CHANGE, req, res, cur_hdr, req->pivot, 0); + + /* Via: appears or disappears. */ + + cur_hdr = GET_HDR((u8*)"Via", &res->hdr); + if (!RPAR(req)->res) par_hdr = NULL; + else par_hdr = GET_HDR((u8*)"Via", &RPAR(req)->res->hdr); + + if (cur_hdr != par_hdr) + problem(PROB_VIA_CHANGE, req, res, cur_hdr ? cur_hdr : (u8*)"[none]", + req->pivot, 0); + + /* New X-* header appears. */ + + for (i=0;ihdr.c;i++) { + + if (strncasecmp((char*)res->hdr.n[i], "X-", 2)) continue; + + if (!RPAR(req)->res) par_hdr = NULL; + else par_hdr = GET_HDR(res->hdr.n[i], &RPAR(req)->res->hdr); + + if (!par_hdr) + problem(PROB_X_CHANGE, req, res, res->hdr.n[i], req->pivot,0); + + } + + /* Old X-* header disappears. */ + + if (RPAR(req)->res) + for (i=0;ires->hdr.c;i++) { + + if (strncasecmp((char*)RPAR(req)->res->hdr.n[i], "X-", 2)) continue; + + cur_hdr = GET_HDR(RPAR(req)->res->hdr.n[i], &res->hdr); + + if (!cur_hdr) + problem(PROB_X_CHANGE, req, res, RPAR(req)->res->hdr.n[i], req->pivot, 0); + + } + +} + + +/* Helper for scrape_response(). Tries to add a previously extracted link, + also checks for cross-site and mixed content issues and similar woes. + Subres is: 1 - redirect; 2 - IMG; 3 - IFRAME, EMBED, OBJECT, APPLET; + 4 - SCRIPT, LINK REL=STYLESHEET; 0 - everything else. */ + +static void test_add_link(u8* str, struct http_request* ref, + struct http_response* res, u8 subres, u8 sure) { + struct http_request* n; + + DEBUG_CALLBACK(ref,res); + DEBUG("* Alleged URL = '%s' [%u]\n", str, subres); + + /* Don't add injected links. */ + + if (!strncasecmp((char*)str, "skipfish:", 10) || + !strncasecmp((char*)str, "//skipfish.invalid/", 20) || + !strncasecmp((char*)str, "http://skipfish.invalid/", 25)) return; + + /* Don't add links that look like they came from JS code with fragmented HTML + snippets, etc. */ + + if (!sure && (strchr("()\"' +,^", *str) || + (*str == '/' && strchr("()\"' +,^", str[1])))) return; + + if ((str[0] == '\'' || str[0] == '"') && (str[1] == '+' || str[1] == ' ')) + return; + + if (!strncasecmp((char*)str, "mailto:", 7)) { + + if (log_ext_urls) { + u8* qmark = (u8*)strchr((char*)str, '?'); + if (qmark) *qmark = 0; + problem(PROB_MAIL_ADDR, ref, res, str + 7, host_pivot(ref->pivot),0); + if (qmark) *qmark = '?'; + } + + return; + } + + n = ck_alloc(sizeof(struct http_request)); + + n->pivot = ref->pivot; + + if (!parse_url(str, n, ref)) { + + if (R(100) < crawl_prob) maybe_add_pivot(n, NULL, sure ? 2 : 1); + + /* Link to a third-party site? */ + + if (!url_allowed_host(n) && !url_trusted_host(n)) + switch (subres) { + + case 0: + if (log_ext_urls) + problem(PROB_EXT_LINK, ref, res, str, host_pivot(ref->pivot), 0); + break; + + case 1: + if (log_ext_urls) + problem(PROB_EXT_REDIR, ref, res, str, ref->pivot, 0); + break; + + case 2: + case 3: + problem(PROB_EXT_OBJ, ref, res, str, ref->pivot, 0); + break; + + case 4: + problem(PROB_EXT_SUB, ref, res, str, ref->pivot, 0); + break; + + } + + /* Mixed content? We don't care about or redirectors + here, though. */ + + if (ref->proto == PROTO_HTTPS && n->proto == PROTO_HTTP && + subres > 2 && warn_mixed) + problem((subres == 4) ? PROB_MIXED_SUB : PROB_MIXED_OBJ, + ref, res, str, ref->pivot, 0); + + } else if (!ref->proto) { + + /* Parser didn't recognize the protocol. If it's a + hierarchical URL (foo://), log it. */ + + u8* x = str; + + while (isalnum(*x)) x++; + + if (str != x && *x == ':' && x[1] == '/') + problem(PROB_UNKNOWN_PROTO, ref, res, str, ref->pivot, 0); + + } + + destroy_request(n); +} + + +/* Another scrape_response() helper - decodes HTML escaping, + maybe also JS escaping, from URLs. Returns a dynamically + allocated copy. */ + +static u8* html_decode_param(u8* url, u8 also_js) { + u32 len = strlen((char*)url); + u8* ret = ck_alloc(len + 1); + u32 i, pos = 0; + + /* If directed to do so, decode \x, \u, and \char sequences + first. */ + + if (also_js) { + + for (i=0;i 0xff) act_val = '?'; + + ret[pos++] = act_val; + + } else ret[pos++] = url[i]; + + } + + ret[pos] = 0; + url = ret; + len = pos; + pos = 0; + ret = ck_alloc(len + 1); + + } + + /* Next, do old-school HTML decoding. There are many other named + entities, of course, but the odds of them appearing in URLs + without %-encoding are negligible. */ + + for (i=0;i 0xff) act_val = '?'; + ret[pos++] = act_val; + i += strcspn((char*)url + i, ";"); + continue; + } + + /* Fall through and output the sequence as-is. */ + + } + + } else if (url[i] == '\r' || url[i] == '\n') continue; + + ret[pos++] = url[i]; + + } + + ret[pos] = 0; + if (also_js) ck_free(url); + + return ret; + +} + + +/* Macro to test for tag names */ + +#define ISTAG(_val, _tag) \ + (!strncasecmp((char*)(_val), _tag, strlen((char*)_tag)) && \ + isspace(_val[strlen((char*)_tag)])) + +/* Macro to find and move past parameter name (saves result in + _store, NULL if not found). Buffer needs to be NUL-terminated + at nearest >. */ + +#define FIND_AND_MOVE(_store, _val, _param) { \ + (_store) = inl_strcasestr((u8*)_val, (u8*)_param); \ + if (_store) { \ + if (!isspace((_store)[-1])) (_store) = NULL; \ + else (_store) += strlen((char*)_param); \ + } \ + } while (0) + +/* Macro to extract parameter value, handling quotes. */ + +#define EXTRACT_ALLOC_VAL(_store, _val) do { \ + u32 _val_len; \ + if (*(_val) == '\'') _val_len = strcspn((char*)++(_val), "'"); else \ + if (*(_val) == '"') _val_len = strcspn((char*)++(_val), "\""); else \ + _val_len = strcspn((char*)(_val), "> \t\r\n"); \ + (_store) = ck_memdup((_val), (_val_len) + 1); \ + (_store)[(_val_len)] = 0; \ + } while (0) + + +/* Adds a new item to the form hint system. */ + +void add_form_hint(u8* name, u8* value) { + addl_form_name = ck_realloc(addl_form_name, + (addl_form_cnt + 1) * sizeof(u8*)); + + addl_form_value = ck_realloc(addl_form_value, + (addl_form_cnt + 1) * sizeof(u8*)); + + addl_form_name[addl_form_cnt] = name; + addl_form_value[addl_form_cnt] = value; + addl_form_cnt++; + +} + + +/* Helper for collect_form_data() - comes up with a fitting value for + a checkbox. Returns a static buffer. */ + +static u8* make_up_form_value(u8* name, struct http_request* req, + struct http_response* res) { + u32 i; + + for (i=0;ipivot), 0); + + return (u8*)form_suggestion[i][1]; + +} + + +/* Helper for collect_form_data() - checks for a probable anti-XSRF token + values. */ + +static u8 maybe_xsrf(u8* token) { + u8* tmp; + u32 digit_cnt = 0, upper_cnt = 0, slash_cnt = 0;; + static u8 tm_prefix[8]; + + if (!tm_prefix[0]) + sprintf((char*)tm_prefix, "%lu", time(0) / 100000); + + /* Unix time is not a valid token. */ + + if (!strncasecmp((char*)token, (char*)tm_prefix, strlen((char*)tm_prefix))) + return 0; + + tmp = token; + while (*tmp && (isdigit(*tmp) || strchr("abcdef", tolower(*tmp)))) { + if (isdigit(*tmp)) digit_cnt++; + tmp++; + } + + /* Looks like base 10 or 16... */ + + if (!*tmp) { + u32 len = tmp - token; + if (len >= XSRF_B16_MIN && len <= XSRF_B16_MAX && digit_cnt >= XSRF_B16_NUM) + return 1; + return 0; + } + + digit_cnt = 0; + tmp = token; + while (*tmp && (isalnum(*tmp) || strchr("=+/", *tmp))) { + if (isdigit(*tmp)) digit_cnt++; + if (isupper(*tmp)) upper_cnt++; + if (*tmp == '/') slash_cnt++; + tmp++; + } + + /* Looks like base 32 or 64... */ + + if (!*tmp) { + u32 len = tmp - token; + if (len >= XSRF_B64_MIN && len <= XSRF_B64_MAX && ((digit_cnt >= + XSRF_B64_NUM && upper_cnt >= XSRF_B64_CASE) || digit_cnt >= + XSRF_B64_NUM2) && slash_cnt <= XSRF_B64_SLASH) return 1; + return 0; + } + + /* Looks like... not a numerical token at all. */ + + return 0; + +} + + +/* Another helper for scrape_response(): examines all tags + up until , then adds them as parameters to current request. */ + +static void collect_form_data(struct http_request* req, + struct http_request* orig_req, + struct http_response* orig_res, + u8* cur_str, u8 is_post) { + + u8 has_xsrf = 0, pass_form = 0; + u32 tag_cnt = 0; + + DEBUG("* collect_form_data() entered\n"); + + do { + + u8* tag_end; + + if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) { + + cur_str++; + *tag_end = 0; + + if (!strncasecmp((char*)cur_str, "/form", 5)) { + *tag_end = '>'; + goto final_checks; + } + + if (ISTAG(cur_str, "input") || ISTAG(cur_str, "textarea") || + ISTAG(cur_str, "select")) { + + u8 *tag_name, *tag_value, *tag_type, *clean_name = NULL, + *clean_value = NULL; + + FIND_AND_MOVE(tag_name, cur_str, "name="); + FIND_AND_MOVE(tag_value, cur_str, "value="); + FIND_AND_MOVE(tag_type, cur_str, "type="); + + if (!tag_name) goto next_tag; + + EXTRACT_ALLOC_VAL(tag_name, tag_name); + clean_name = html_decode_param(tag_name, 0); + ck_free(tag_name); + tag_name = 0; + + if (tag_value) { + EXTRACT_ALLOC_VAL(tag_value, tag_value); + clean_value = html_decode_param(tag_value, 0); + ck_free(tag_value); + tag_value = 0; + } + + if (tag_type) + EXTRACT_ALLOC_VAL(tag_type, tag_type); + else tag_type = ck_strdup((u8*)"text"); + + tag_cnt++; + + if (!strcasecmp((char*)tag_type, "file")) { + + if (!is_post) { + ck_free(req->method); + req->method = ck_strdup((u8*)"POST"); + is_post = 1; + } + + set_value(PARAM_POST_F, clean_name, clean_value ? + clean_value : (u8*)"", 0, &req->par); + + } else if (!strcasecmp((char*)tag_type, "reset")) { + + /* Do nothing - do not store. */ + tag_cnt--; + + } else if (!strcasecmp((char*)tag_type, "button") || + !strcasecmp((char*)tag_type, "submit")) { + + set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name, + clean_value ? clean_value : (u8*)"", 0, &req->par); + + } else if (!strcasecmp((char*)tag_type, "checkbox")) { + + /* Turn checkboxes on. */ + + set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name, + (u8*)"on", 0, &req->par); + + } else { + + u8* use_value = clean_value; + + /* Don't second-guess hidden fields. */ + + if (strcasecmp((char*)tag_type, "hidden") && + (!use_value || !use_value[0])) { + use_value = make_up_form_value(clean_name, orig_req, orig_res); + } else { + if (!use_value) use_value = (u8*)""; + } + + /* Radio buttons are rolled back into a single parameter + because we always replace offset 0 for given clean_name. */ + + set_value(is_post ? PARAM_POST : PARAM_QUERY, + clean_name, use_value, 0, &req->par); + + if (!strcasecmp((char*)tag_type, "hidden") && + maybe_xsrf(use_value)) has_xsrf = 1; + + } + + if (inl_strcasestr(tag_name, (u8*) "passw")) pass_form = 1; + + ck_free(tag_name); + ck_free(tag_type); + ck_free(tag_value); + ck_free(clean_name); + ck_free(clean_value); + + } + +next_tag: + + *tag_end = '>'; + + } else tag_end = cur_str; + + /* Skip to next tag. */ + + cur_str = (u8*)strchr((char*)tag_end + 1, '<'); + + } while (cur_str); + +final_checks: + + if (pass_form) { + problem(PROB_PASS_FORM, req, orig_res, NULL, req->pivot, 0); + } else { + if (tag_cnt && !has_xsrf) + problem(PROB_VULN_FORM, req, orig_res, NULL, req->pivot, 0); + else + problem(PROB_FORM, req, orig_res, NULL, req->pivot, 0); + } + +} + + +/* Helper for scrape_response() and content_checks: is the + file mostly ASCII? */ + +static u8 is_mostly_ascii(struct http_response* res) { + u32 i, total, printable = 0; + + if (res->doc_type) return (res->doc_type == 2); + + total = (res->pay_len > 128) ? 128 : res->pay_len; + + if (!total) { res->doc_type = 2; return 1; } + + for (i=0;ipayload[i] >= 0x20 && res->payload[i] <= 0x7f) + || (res->payload[i] && strchr("\r\n", res->payload[i]))) + printable++; + + if (printable * 100 / total < 90) { + DEBUG("* looks like binary data (print = %u, total = %u)\n", + printable, total); + res->doc_type = 1; + return 1; + } + + DEBUG("* looks like text file (print = %u, total = %u)\n", + printable, total); + + res->doc_type = 2; + return 1; + +} + +/* Analyzes response headers (Location, etc), body to extract new links, + keyword guesses. This code is designed to be simple and fast, but it + does not even try to understand the intricacies of HTML or whatever + the response might be wrapped in. */ + +void scrape_response(struct http_request* req, struct http_response* res) { + + struct http_request *base = NULL; + u8* cur_str; + u32 i; + + DEBUG_CALLBACK(req, res); + + if (no_parse || res->scraped) return; + + res->scraped = 1; + + /* Handle Location, Refresh headers first. */ + + if ((cur_str = GET_HDR((u8*)"Location", &res->hdr))) + test_add_link(cur_str, req, res, 1, 1); + + if ((cur_str = GET_HDR((u8*)"Refresh", &res->hdr)) && + (cur_str = (u8*)strchr((char*)cur_str, '='))) + test_add_link(cur_str + 1, req, res, 1, 1); + + if (!res->payload || !is_mostly_ascii(res)) return; + + cur_str = res->payload; + + /* PASS 1: Do a simplified check to what looks like proper, + known HTML parameters bearing URLs. Note that payload is + conveniently NUL-terminated. */ + + do { + + u8 *tag_end; + + if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) { + + u32 link_type = 0; + u8 set_base = 0, parse_form = 0; + u8 *dirty_url = NULL, *clean_url = NULL, *meta_url = NULL; + + cur_str++; + *tag_end = 0; + + /* Several tags we need to handle specially, either because they + denote a particularly interesting content type (marked in + link_type, see test_add_link()), or because they use a + non-standard parameter for URL data. */ + + if (ISTAG(cur_str, "meta")) { + + link_type = 1; + FIND_AND_MOVE(dirty_url, cur_str, "content="); + + if (dirty_url) { + EXTRACT_ALLOC_VAL(meta_url, dirty_url); + dirty_url = inl_strcasestr(meta_url, (u8*)"URL="); + if (dirty_url) dirty_url += 4; + } + + } else if (ISTAG(cur_str, "img")) { + + link_type = 2; + FIND_AND_MOVE(dirty_url, cur_str, "src="); + + } else if (ISTAG(cur_str, "object") || ISTAG(cur_str, "embed") || + ISTAG(cur_str, "applet") || ISTAG(cur_str, "iframe")) { + + link_type = 3; + FIND_AND_MOVE(dirty_url, cur_str, "src="); + if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "codebase="); + + } else if (ISTAG(cur_str, "param") && inl_strcasestr(cur_str, + (u8*)"movie")) { + + link_type = 3; + FIND_AND_MOVE(dirty_url, cur_str, "value="); + + } else if (ISTAG(cur_str, "script")) { + + link_type = 4; + FIND_AND_MOVE(dirty_url, cur_str, "src="); + + } else if (ISTAG(cur_str, "link") && inl_strcasestr(cur_str, + (u8*)"stylesheet")) { + + link_type = 4; + FIND_AND_MOVE(dirty_url, cur_str, "href="); + + } else if (ISTAG(cur_str, "base")) { + + set_base = 1; + FIND_AND_MOVE(dirty_url, cur_str, "href="); + + } else if (ISTAG(cur_str, "form")) { + + u8* method; + parse_form = 1; + FIND_AND_MOVE(dirty_url, cur_str, "action="); + + /* See if we need to POST this form or not. */ + + FIND_AND_MOVE(method, cur_str, "method="); + + if (method && *method) { + if (strchr("\"'", *method)) method++; + if (tolower(method[0]) == 'p') parse_form = 2; + } + + } else { + + /* All other tags - other types, , - + are handled in a generic way. */ + + FIND_AND_MOVE(dirty_url, cur_str, "href="); + if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "src="); + + } + + /* If we found no URL to speak of, we're done. */ + + if (!dirty_url) { + ck_free(meta_url); + goto next_tag; + } + + /* De-quotify and decode the value. */ + + EXTRACT_ALLOC_VAL(dirty_url, dirty_url); + clean_url = html_decode_param(dirty_url, 0); + ck_free(dirty_url); + ck_free(meta_url); + + if (!*clean_url) goto next_tag; + + test_add_link(clean_url, base ? base : req, res, link_type, 1); + + /* If we are dealing with a tag, we need to create + a new dummy request to use as a referrer. */ + + if (set_base) { + + struct http_request* n = ck_alloc(sizeof(struct http_request)); + n->pivot = req->pivot; + if (!parse_url(clean_url, n, base ? base : req)) base = n; + + } else if (parse_form) { + + /*
handling... */ + + struct http_request* n = ck_alloc(sizeof(struct http_request)); + n->pivot = req->pivot; + + if (parse_form == 2) { + ck_free(n->method); + n->method = ck_strdup((u8*)"POST"); + } + + /* Don't collect form fields, etc, if target is not within the + scope anyway. */ + + DEBUG("* Found form: target %s method %s\n", clean_url, n->method); + + if (!parse_url(clean_url, n, base ? base : req) && url_allowed(n) && + R(100) < crawl_prob && !no_forms) { + collect_form_data(n, req, res, tag_end + 1, (parse_form == 2)); + maybe_add_pivot(n, NULL, 2); + } + + destroy_request(n); + + } + +next_tag: + + *tag_end = '>'; + + if (clean_url) ck_free(clean_url); + + } else tag_end = cur_str; + + /* Skip to next tag. */ + + cur_str = (u8*)strchr((char*)tag_end + 1, '<'); + + } while (cur_str); + + cur_str = res->payload; + + /* PASS 2: Extract links from non-HTML body, JS, etc; add keywords. */ + + do { + + u32 clean_len, alpha_cnt = 0, lower_cnt = 0, lead = 0, seg_len; + u8 *ext, *token, *clean_url, *tmp, *pos_at; + u8 last = 0, saved; + + /* Skip leading whitespaces, terminators. */ + + seg_len = strspn((char*)cur_str, " \t\r\n<>\"'"); + cur_str += seg_len; + + /* If there's a = character preceeded only by alnums or underscores, + skip this chunk (to handle something=http://www.example.com/ neatly) */ + + tmp = cur_str; + while (*tmp && (isalnum(*tmp) || *tmp == '_')) tmp++; + if (*tmp == '=') cur_str = tmp + 1; + + if (!*cur_str) break; + seg_len = strcspn((char*)cur_str + 1, " \t\r\n<>\"'") + 1; + + /* Extract the segment, decoding JS and HTML on the go. */ + + saved = cur_str[seg_len]; + cur_str[seg_len] = 0; + clean_url = html_decode_param(cur_str, 1); + cur_str[seg_len] = saved; + + tmp = clean_url; + + /* We want the entire extracted segment to consist only of nice + characters we would expect in a URL. If not, panic. */ + + while (*tmp) { + if (!isalnum(*tmp) && !isspace(*tmp) && + !strchr("_-.:@/?&=#%;$!+~()[]{}\\|^*", *tmp)) goto url_done; + tmp++; + } + + clean_len = tmp - clean_url; + + /* Strip trailing characters that are unlikely to appear in valid URLs + anyway, and could be a part of some message. */ + + while (clean_len && + strchr(".,:?!-$&", clean_url[clean_len-1])) clean_len--; + + clean_url[clean_len] = 0; + + /* URL CHECK 1: Things that start with ./ or ../ are obviously URLs. + We do not make assumptins about syntax such as /foo/, though, as + it could very well be a regex in a JS block. */ + + if (!strncmp((char*)clean_url, "./", 2) || !strncmp((char*)clean_url, + "../", 3)) { +add_link: + test_add_link(clean_url, base ? base : req, res, 0, 0); + goto url_done; + } + + /* URL CHECK 2: Things that start with :// are quite + clearly URLs. */ + + while (clean_url[lead] && (isalnum(clean_url[lead]))) lead++; + + if (lead && !strncmp((char*)clean_url + lead, "://", 3) && + clean_url[lead + 3]) goto add_link; + + /* URL CHECK 3: If the result ends with ., + and contains a slash anywhere, assume URL (without that + slash check, we would get duped by 'domain.com'. */ + + if (strchr((char*)clean_url, '/')) { + + i = 0; + + while ((ext = wordlist_get_extension(i++))) { + u32 ext_len = strlen((char*)ext); + + if (clean_len > ext_len + 2 && + !strncasecmp((char*)clean_url + clean_len - ext_len, + (char*)ext, ext_len) && + clean_url[clean_len - ext_len - 1] == '.') goto add_link; + + } + + } + + if (!(pos_at = (u8*)strchr((char*)clean_url, '@'))) { + + /* URL CHECK 4: ?= syntax is strongly indicative of + an URL (only if not e-mail). */ + + u8 *pos_qmark = (u8*)strchr((char*)clean_url, '?'), + *pos_eq = (u8*)strchr((char*)clean_url, '='), + *pos_amp = (u8*)strchr((char*)clean_url, '&'); + + if (pos_qmark && pos_eq && pos_qmark + 1 < pos_eq && + pos_eq[1] && (!pos_amp || pos_amp > pos_eq) && + pos_eq[1] != '=' && !strchr((char*)clean_url, '(') && + !strchr((char*)clean_url, '[') && + (u8*)strchr((char*)clean_url, ':') < pos_eq) + goto add_link; + + } else if (log_ext_urls) { + + /* EMAIL CHECK: If the string uses a limited set of characters, + starts with alpha, ahs at least one period after @, and both + @ and the period are immediately followed by alpha - assume + e-mail. */ + + u8 *pos_dot, + *pos_qmark = (u8*)strchr((char*)clean_url, '?'); + + if (pos_qmark && pos_qmark > pos_at) *pos_qmark = 0; + + lead = 0; + + while (clean_url[lead] && (isalnum(clean_url[lead]) || + strchr("._-+@", clean_url[lead]))) lead++; + + pos_dot = (u8*)strchr((char*)pos_at + 1, '.'); + + if (!clean_url[lead] && pos_at && pos_dot && isalpha(clean_url[0]) && + isalpha(pos_at[1]) && isalpha(pos_dot[1])) { + problem(PROB_MAIL_ADDR, req, res, clean_url, host_pivot(req->pivot), 0); + goto url_done; + } + + } + + /* LAST CHANCE: Try to detect base64; if the segment does not look like + base64, add each segment to try_list. */ + + tmp = clean_url; + + while (*tmp) { + if (isalpha(*tmp)) { + alpha_cnt++; + if (islower(*tmp)) lower_cnt++; + } + tmp++; + } + + if (alpha_cnt > 20 && (lower_cnt * 100 / alpha_cnt) > 35 && + (lower_cnt * 100 / alpha_cnt) < 65) goto url_done; + + token = clean_url; + + do { + while (*token && !isalnum(*token)) token++; + tmp = token; + while (*tmp && isalnum(*tmp)) tmp++; + if (!*tmp) last = 1; + *tmp = 0; + if (R(100) < GUESS_PROB) wordlist_add_guess(token); + token = tmp + 1; + } while (!last); + +url_done: + + ck_free(clean_url); + + cur_str += seg_len; + + } while (*cur_str); + + if (base) destroy_request(base); + + /* Phew! */ + +} + + +/* Returns 1 if document looks like standalone CSS. */ + +static u8 is_css(struct http_response* res) { + u8* text = res->payload; + u8 first = 0, last = 0; + + if (res->css_type) return (res->css_type == 2); + if (!text || !is_mostly_ascii(res)) return 0; + + do { + + /* Skip whitespaces... */ + + while (isspace(*text)) text++; + + /* Skip HTML, CSS comments. */ + + if (!strncmp((char*)text, " +Skipfish - scan results browser + + + + + + + + + + + + + + +
+
HTTP trace - click this bar or hit ESC to close
+ +
+
+ + + + + + +
Scanner version:Scan date:
Random seed:Total time:
+ +
+ +

Crawl results - click to expand:

+
+
+ +

Document type overview - click to expand:

+
+
+ +

Issue type overview - click to expand:

+
+
+

+NOTE: 100 samples maximum per issue or document type. + diff --git a/assets/mime_entry.png b/assets/mime_entry.png new file mode 100644 index 0000000..12eb14d Binary files /dev/null and b/assets/mime_entry.png differ diff --git a/assets/n_children.png b/assets/n_children.png new file mode 100644 index 0000000..4be7913 Binary files /dev/null and b/assets/n_children.png differ diff --git a/assets/n_clone.png b/assets/n_clone.png new file mode 100644 index 0000000..38939fb Binary files /dev/null and b/assets/n_clone.png differ diff --git a/assets/n_collapsed.png b/assets/n_collapsed.png new file mode 100644 index 0000000..ab776bc Binary files /dev/null and b/assets/n_collapsed.png differ diff --git a/assets/n_expanded.png b/assets/n_expanded.png new file mode 100644 index 0000000..ea1ead3 Binary files /dev/null and b/assets/n_expanded.png differ diff --git a/assets/n_failed.png b/assets/n_failed.png new file mode 100644 index 0000000..07c3b1f Binary files /dev/null and b/assets/n_failed.png differ diff --git a/assets/n_maybe_missing.png b/assets/n_maybe_missing.png new file mode 100644 index 0000000..3f53a38 Binary files /dev/null and b/assets/n_maybe_missing.png differ diff --git a/assets/n_missing.png b/assets/n_missing.png new file mode 100644 index 0000000..84cce41 Binary files /dev/null and b/assets/n_missing.png differ diff --git a/assets/n_unlinked.png b/assets/n_unlinked.png new file mode 100644 index 0000000..659234c Binary files /dev/null and b/assets/n_unlinked.png differ diff --git a/assets/p_dir.png b/assets/p_dir.png new file mode 100644 index 0000000..e6d20ee Binary files /dev/null and b/assets/p_dir.png differ diff --git a/assets/p_file.png b/assets/p_file.png new file mode 100644 index 0000000..6d3f7e5 Binary files /dev/null and b/assets/p_file.png differ diff --git a/assets/p_param.png b/assets/p_param.png new file mode 100644 index 0000000..df958fa Binary files /dev/null and b/assets/p_param.png differ diff --git a/assets/p_pinfo.png b/assets/p_pinfo.png new file mode 100644 index 0000000..59d6879 Binary files /dev/null and b/assets/p_pinfo.png differ diff --git a/assets/p_serv.png b/assets/p_serv.png new file mode 100644 index 0000000..b3cf7a0 Binary files /dev/null and b/assets/p_serv.png differ diff --git a/assets/p_unknown.png b/assets/p_unknown.png new file mode 100644 index 0000000..ab6b05e Binary files /dev/null and b/assets/p_unknown.png differ diff --git a/assets/p_value.png b/assets/p_value.png new file mode 100644 index 0000000..cbbd4a6 Binary files /dev/null and b/assets/p_value.png differ diff --git a/assets/sf_name.png b/assets/sf_name.png new file mode 100644 index 0000000..2b43202 Binary files /dev/null and b/assets/sf_name.png differ diff --git a/config.h b/config.h new file mode 100644 index 0000000..4873c67 --- /dev/null +++ b/config.h @@ -0,0 +1,242 @@ +/* + skipfish - configurable settings + -------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_CONFIG_H +#define _HAVE_CONFIG_H + +#define VERSION "1.00b" + +#define USE_COLOR 1 /* Use terminal colors */ + +/* Various default settings for HTTP client (cmdline override): */ + +#define MAX_CONNECTIONS 50 /* Simultaneous connection cap */ +#define MAX_CONN_HOST 10 /* Per-host connction cap */ +#define MAX_REQUESTS 1e8 /* Total request count cap */ +#define MAX_FAIL 100 /* Max consecutive failed requests */ +#define RW_TMOUT 10 /* Individual network R/W timeout */ +#define RESP_TMOUT 20 /* Total request time limit */ +#define IDLE_TMOUT 10 /* Connection tear down threshold */ +#define SIZE_LIMIT 200000 /* Response size cap */ +#define MAX_GUESSES 256 /* Guess-based wordlist size limit */ + +/* HTTP client constants: */ + +#define MAX_URL_LEN 1024 /* Maximum length of an URL */ +#define MAX_DNS_LEN 255 /* Maximum length of a host name */ +#define READ_CHUNK 4096 /* Read buffer size */ + +/* Define this to use FILO, rather than FIFO, scheduling for new requests. + FILO ensures a more uniform distribution of requests when fuzzing multiple + directories at once, but may reduce the odds of spotting some stored + XSSes, and increase memory usage a bit. */ + +// #define QUEUE_FILO 1 + +/* Dummy file to upload to the server where possible. */ + +#define DUMMY_EXT "gif" +#define DUMMY_FILE "GIF89a,\x01" +#define DUMMY_MIME "image/gif" + +/* Allocator settings: */ + +#define MAX_ALLOC 0x50000000 /* Refuse larger allocations. */ + +/* Configurable settings for crawl database (cmdline override): */ + +#define MAX_DEPTH 16 /* Maximum crawl tree depth */ +#define MAX_CHILDREN 1024 /* Maximum children per tree node */ + +#define DEF_WORDLIST "skipfish.wl" /* Default wordlist file */ + +/* Crawl / analysis constants: */ + +#define MAX_WORD 64 /* Maximum wordlist item length */ +#define GUESS_PROB 50 /* Guess word addition probability */ +#define WORD_HASH 256 /* Hash table for wordlists */ +#define SNIFF_LEN 1024 /* MIME sniffing buffer size */ +#define MAX_SAMPLES 1024 /* Max issue / MIME samples */ + +/* Page fingerprinting constants: */ + +#define FP_SIZE 10 /* Page fingerprint size */ +#define FP_MAX_LEN 15 /* Maximum word length to count */ +#define FP_T_REL 5 /* Relative matching tolerance (%) */ +#define FP_T_ABS 6 /* Absolute matching tolerance */ +#define FP_B_FAIL 3 /* Max number of failed buckets */ + +#define BH_CHECKS 15 /* Page verification check count */ + +/* Crawler / probe constants: */ + +#define BOGUS_FILE "sfi9876" /* Name that should not exist */ +#define MAX_404 4 /* Maximum number of 404 sigs */ +#define PAR_MAX_DIGITS 6 /* Max digits in a fuzzable int */ +#define PAR_INT_FUZZ 100 /* Fuzz by + / - this much */ + +#ifdef QUEUE_FILO +#define DICT_BATCH 200 /* Brute-force queue block */ +#else +#define DICT_BATCH 1000 /* Brute-force queue block */ +#endif /* ^QUEUE_FILO */ + +/* Single query for IPS detection - Evil Query of Doom (tm). */ + +#define IPS_TEST \ + "?_test1=c:\\windows\\system32\\cmd.exe" \ + "&_test2=/etc/passwd" \ + "&_test3=|/bin/sh" \ + "&_test4=(SELECT * FROM nonexistent) --" \ + "&_test5=>/no/such/file" \ + "&_test6=" \ + "&_test7=javascript:alert(1)" + +/* A benign query with a similar character set to compare with EQoD. */ + +#define IPS_SAFE \ + "?_test1=ccddeeeimmnossstwwxy.:\\\\\\" \ + "&_test2=acdepsstw//" \ + "&_test3=bhins//" \ + "&_test4=CEEFLMORSTeeinnnosttx--*" \ + "&_test5=cefhilnosu///" \ + "&_test6=acceiilpprrrssttt1)(" \ + "&_test7=aaaceijlprrsttv1):(" + +/* XSRF token detector settings: */ + +#define XSRF_B16_MIN 8 /* Minimum base10/16 token length */ +#define XSRF_B16_MAX 45 /* Maximum base10/16 token length */ +#define XSRF_B16_NUM 2 /* ...minimum digit count */ +#define XSRF_B64_MIN 6 /* Minimum base32/64 token length */ +#define XSRF_B64_MAX 32 /* Maximum base32/64 token length */ +#define XSRF_B64_NUM 1 /* ...minimum digit count && */ +#define XSRF_B64_CASE 2 /* ...minimum uppercase count */ +#define XSRF_B64_NUM2 3 /* ...digit count override */ +#define XSRF_B64_SLASH 2 /* ...maximum slash count */ + +#ifdef _VIA_DATABASE_C + +/* Domains we always trust (identical to -B options). These entries do not + generate cross-domain content inclusion warnings. NULL-terminated. */ + +static const char* always_trust_domains[] = { + ".google-analytics.com", + ".googleapis.com", + ".googleadservices.com", + ".googlesyndication.com", + "www.w3.org", + 0 +}; + +#endif /* _VIA_DATABASE_C */ + +#ifdef _VIA_ANALYSIS_C + +/* NULL-terminated list of JSON-like response prefixes we consider to + be sufficiently safe against cross-site script inclusion (courtesy + ratproxy). */ + +static const char* json_safe[] = { + "while(1);", /* Parser looping */ + "while (1);", /* ... */ + "while(true);", /* ... */ + "while (true);", /* ... */ + "&&&", /* Parser breaking */ + "//OK[", /* Line commenting */ + "{\"", /* Serialized object */ + "{{\"", /* Serialized object */ + "throw 1; <", /* Magical combo */ + ")]}'", /* Recommended magic */ + 0 +}; + +/* NULL-terminated list of known valid charsets. Charsets not on the list are + considered dangerous (as they may trigger charset sniffing). + + Note that many common misspellings, such as "utf8", are not valid and NOT + RECOGNIZED by browsers, leading to content sniffing. Do not add them here. + + Also note that SF does not support encoding not compatible with US ASCII + transport (e.g., UTF-16, UTF-32). Lastly, variable-length encodings + other than utf-8 may have character consumption issues that are not + tested for at this point. */ + +static const char* valid_charsets[] = { + "utf-8", /* Valid 8-bit safe Unicode */ + "iso8859-1", /* Western Europe */ + "iso8859-2", /* Central Europe */ + "iso8859-15", /* New flavor of ISO8859-1 */ + "iso8859-16", /* New flavor of ISO8859-2 */ + "iso-8859-1", /* Browser-supported misspellings */ + "iso-8859-2", /* - */ + "iso-8859-15", /* - */ + "iso-8859-16", /* - */ + "windows-1252", /* Microsoft's Western Europe */ + "windows-1250", /* Microsoft's Central Europe */ + "us-ascii", /* Old school but generally safe */ + "koi8-r", /* 8-bit and US ASCII compatible */ + 0 +}; + + +/* Default form auto-fill rules - used to pair up form fields with fun + values! Do not attempt security attacks here, though - this is to maximize + crawl coverage, not to exploit anything. The last item must have a name + of NULL, and the value will be used as a default option when no other + matches found. */ + +static const char* form_suggestion[][2] = { + + { "phone" , "6505550100" }, /* Reserved */ + { "zip" , "94043" }, + { "first" , "John" }, + { "last" , "Smith" }, + { "name" , "Smith" }, + { "mail" , "skipfish@example.com" }, + { "street" , "1600 Amphitheatre Pkwy" }, + { "city" , "Mountain View" }, + { "state" , "CA" }, + { "country" , "US" }, + { "language" , "en" }, + { "company" , "ACME" }, + { "search" , "skipfish" }, + { "login" , "skipfish" }, + { "user" , "skipfish" }, + { "pass" , "skipfish" }, + { "year" , "2010" }, + { "card" , "4111111111111111" }, /* Reserved */ + { "code" , "000" }, + { "cvv" , "000" }, + { "expir" , "1212" }, + { "ssn" , "987654320" }, /* Reserved */ + { "url" , "http://example.com/?sfish_form_test" }, + { "site" , "http://example.com/?sfish_form_test" }, + { "domain" , "example.com" }, + { "search" , "a" }, + { NULL , "1" } + +}; + +#endif /* _VIA_ANALYSIS_C */ + +#endif /* ! _HAVE_CONFIG_H */ diff --git a/crawler.c b/crawler.c new file mode 100644 index 0000000..b2219a5 --- /dev/null +++ b/crawler.c @@ -0,0 +1,2776 @@ +/* + skipfish - crawler state machine + -------------------------------- + + Includes dictionary and security injection logic. + + Author: Michal Zalewski + + Copyright 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#define _VIA_CRAWLER_C + +#include "debug.h" +#include "config.h" +#include "types.h" +#include "http_client.h" +#include "database.h" +#include "crawler.h" +#include "analysis.h" + +u32 crawl_prob = 100; /* Crawl probability (1-100%) */ +u8 no_fuzz_ext; /* Don't fuzz extensions for dirs */ + +/* + + ************************* + **** GENERAL HELPERS **** + ************************* + + Assorted functions used by all the crawl callbacks for manipulating + requests, parsing responses, etc. + + */ + + +/* Classifies a response, with a special handling of "unavailable" and + "gateway timeout" codes. */ + +#define FETCH_FAIL(_res) ((_res)->state != STATE_OK || (_res)->code == 503 || \ + (_res)->code == 504) + +/* Dumps request, response (for debugging only). */ + +u8 show_response(struct http_request* req, struct http_response* res) { + + dump_http_request(req); + + if (FETCH_FAIL(res)) { + SAY("^^^ REQUEST SHOWN ABOVE CAUSED ERROR: %d ^^^\n", res->state); + return 0; + } + + dump_http_response(res); + + return 0; /* Do not keep req/res */ + +} + + +/* Strips trailing / from a directory request, optionally replaces it with + a new value. */ + +static void replace_slash(struct http_request* req, u8* new_val) { + u32 i; + + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_PATH && !req->par.n[i] && !req->par.v[i][0]) { + if (new_val) { + ck_free(req->par.v[i]); + req->par.v[i] = ck_strdup(new_val); + } else req->par.t[i] = PARAM_NONE; + return; + } + + /* Could not find a slash segment - create a new segment instead. */ + + set_value(PARAM_PATH, 0, new_val, -1, &req->par); + +} + + +/* Releases children for crawling (called once parent node had 404, IPS + probes done, etc). Note that non-directories might have locked + children too. */ + +static void unlock_children(struct pivot_desc* pv) { + u32 i; + + DEBUG_HELPER(pv); + + for (i=0;ichild_cnt;i++) + if (pv->child[i]->state == PSTATE_PENDING) { + + pv->child[i]->state = PSTATE_FETCH; + + if (!pv->child[i]->res) async_request(pv->child[i]->req); + else switch (pv->child[i]->type) { + + case PIVOT_DIR: fetch_dir_callback(pv->req, pv->res); break; + case PIVOT_PARAM: + case PIVOT_FILE: fetch_file_callback(pv->req, pv->res); break; + case PIVOT_UNKNOWN: fetch_unknown_callback(pv->req, pv->res); break; + default: FATAL("Unknown pivot type '%u'", pv->type); + + } + + } + +} + + +/* Handles response error for callbacks in a generalized manner. If 'stop' is + 1, marks the entire pivot as busted, unlocks children. */ + +static void handle_error(struct http_request* req, struct http_response* res, + u8* desc, u8 stop) { + + DEBUG_CALLBACK(req, res); + + if (res->state == STATE_SUPPRESS) { + problem(PROB_LIMITS, req, res, (u8*)"Too many previous fetch failures", + req->pivot, 0); + } else { + problem(PROB_FETCH_FAIL, req, res, desc, req->pivot, 0); + } + + if (stop) { + req->pivot->state = PSTATE_DONE; + unlock_children(req->pivot); + } + +} + + +/* Finds nearest "real" directory parent, so that we can consult it for 404 + signatures, etc. Return NULL if dir found, but signature-less. */ + +static struct pivot_desc* dir_parent(struct pivot_desc* pv) { + struct pivot_desc* ret; + + ret = pv->parent; + + while (ret && ret->type != PIVOT_DIR && ret->type != PIVOT_SERV) + ret = ret->parent; + + if (ret && !ret->r404_cnt) return NULL; + return ret; +} + + +/* Deletes any cached requests and responses stored by injection probes. */ + +static void destroy_misc_data(struct pivot_desc* pv, + struct http_request* self) { + u32 i; + + for (i=0;i<10;i++) { + + if (pv->misc_req[i] != self) { + + if (pv->misc_req[i]) + destroy_request(pv->misc_req[i]); + + if (pv->misc_res[i]) + destroy_response(pv->misc_res[i]); + + } + + pv->misc_req[i] = NULL; + pv->misc_res[i] = NULL; + + } + + pv->misc_cnt = 0; + +} + + + +/* + + *************************************** + **** ASSORTED FORWARD DECLARATIONS **** + *************************************** + + */ + +static u8 dir_404_callback(struct http_request*, struct http_response*); +static u8 dir_ips_callback(struct http_request*, struct http_response*); +static void inject_init(struct pivot_desc*); +static void crawl_dir_dict_init(struct pivot_desc*); +static u8 dir_dict_callback(struct http_request*, struct http_response*); +static u8 inject_check0_callback(struct http_request*, struct http_response*); +static u8 inject_check1_callback(struct http_request*, struct http_response*); +static u8 inject_check2_callback(struct http_request*, struct http_response*); +static u8 inject_check3_callback(struct http_request*, struct http_response*); +static u8 inject_check4_callback(struct http_request*, struct http_response*); +static u8 inject_check5_callback(struct http_request*, struct http_response*); +static u8 inject_check6_callback(struct http_request*, struct http_response*); +static u8 inject_check7_callback(struct http_request*, struct http_response*); +static u8 inject_check8_callback(struct http_request*, struct http_response*); +static u8 inject_check9_callback(struct http_request*, struct http_response*); +static void crawl_par_numerical_init(struct pivot_desc*); +static u8 par_check_callback(struct http_request*, struct http_response*); +static u8 unknown_check_callback(struct http_request*, struct http_response*); +static u8 par_numerical_callback(struct http_request*, struct http_response*); +static u8 par_dict_callback(struct http_request*, struct http_response*); +static u8 par_trylist_callback(struct http_request*, struct http_response*); +static void crawl_par_dict_init(struct pivot_desc*); +static void crawl_parametric_init(struct pivot_desc*); +static void end_injection_checks(struct pivot_desc*); +static u8 par_ognl_callback(struct http_request*, struct http_response*); + + +/* + + ******************************** + **** CASE-SENSITIVITY CHECK **** + ******************************** + + */ + +static u8 check_case_callback(struct http_request* req, + struct http_response* res) { + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + RPAR(req)->c_checked = 0; + return 0; + } + + if (!same_page(&res->sig, &RPRES(req)->sig)) + RPAR(req)->csens = 1; + + return 0; + +} + + +static void check_case(struct pivot_desc* pv) { + u32 i, len; + s32 last = -1; + struct http_request* n; + + if (pv->parent->c_checked) return; + + DEBUG_HELPER(pv); + + for (i=0;ireq->par.c;i++) + if (PATH_SUBTYPE(pv->req->par.t[i]) && pv->req->par.v[i][0]) last = i; + + if (last < 0) return; + + len = strlen((char*)pv->req->par.v[last]); + + for (i=0;ireq->par.v[last][i])) break; + + if (i == len) return; + + pv->parent->c_checked = 1; + + n = req_copy(pv->req, pv, 1); + n->callback = check_case_callback; + + /* Change case. */ + + n->par.v[last][i] = islower(n->par.v[last][i]) ? toupper(n->par.v[last][i]) : + tolower(n->par.v[last][i]); + + DEBUG("* candidate parameter: %s -> %s\n", pv->req->par.v[last], + n->par.v[last]); + + async_request(n); + +} + + +/* + + ************************************ + **** SECONDARY EXTENSION PROBES **** + ************************************ + + For each new entry discovered through brute-force that already bears an + extension, we should also try appending a secondary extension. This is to + spot things such as foo.php.old, .inc, .gz, etc. + + */ + + +/* Schedules secondary extension tests, if warranted; is_param set to 1 + if this is a parametric node, 0 if the last path segment needs to be + checked. */ + +static void secondary_ext_init(struct pivot_desc* pv, struct http_request* req, + struct http_response* res, u8 is_param) { + + u8 *base_name, *fpos, *lpos, *ex; + s32 tpar = -1, i = 0, spar = -1; + + DEBUG_HELPER(req->pivot); + DEBUG_HELPER(pv); + + if (is_param) { + + tpar = pv->fuzz_par; + + } else { + + /* Find last path segment other than NULL-''. */ + for (i=0;ipar.c;i++) + if (PATH_SUBTYPE(req->par.t[i])) { + if ((req->par.t[i] == PARAM_PATH && + !req->par.n[i] && !req->par.v[i][0])) spar = i; else tpar = i; + } + + } + + if (tpar < 0) return; + + base_name = req->par.v[tpar]; + + /* Reject parameters with no '.' (unless in no_fuzz_ext mode), + with too many '.'s, or '.' in an odd location. */ + + fpos = (u8*)strchr((char*)base_name, '.'); + + if (!no_fuzz_ext || fpos) + if (!fpos || fpos == base_name || !fpos[1]) return; + + lpos = (u8*)strrchr((char*)base_name, '.'); + + if (fpos != lpos) return; + + i = 0; + + while ((ex = wordlist_get_extension(i))) { + u8* tmp = ck_alloc(strlen((char*)base_name) + strlen((char*)ex) + 2); + u32 c; + + sprintf((char*)tmp, "%s.%s", base_name, ex); + + /* Matching child? If yes, don't bother. */ + + for (c=0;cchild_cnt;c++) + if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->child[c]->name))) break; + + /* Matching current node? */ + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt; + + if (c == pv->child_cnt) { + struct http_request* n = req_copy(req, pv, 1); + + /* Remove trailing slash if present. */ + if (spar >= 0) n->par.t[spar] = PARAM_NONE; + + ck_free(n->par.v[tpar]); + n->par.v[tpar] = tmp; + + n->user_val = 1; + + memcpy(&n->same_sig, &res->sig, sizeof(struct http_sig)); + + n->callback = is_param ? par_dict_callback : dir_dict_callback; + /* Both handlers recognize user_val == 1 as a special indicator. */ + async_request(n); + + } else ck_free(tmp); + + i++; + } + +} + + +/* + + ************************************ + **** SECURITY INJECTION TESTING **** + ************************************ + + Generic attack vector injection tests for directories, parameters, etc. + + */ + +/* Internal helper macros: */ + +#define TPAR(_req) ((_req)->par.v[(_req)->pivot->fuzz_par]) + +#define SET_VECTOR(_state, _req, _str) do { \ + if (_state == PSTATE_CHILD_INJECT) { \ + replace_slash((_req), (u8*)_str); \ + } else { \ + ck_free(TPAR(_req)); \ + TPAR(_req) = ck_strdup((u8*)_str); \ + } \ + } while (0) + +#define APPEND_VECTOR(_state, _req, _str) do { \ + if (_state == PSTATE_CHILD_INJECT) { \ + replace_slash((_req), (u8*)_str); \ + } else { \ + u8* _n = ck_alloc(strlen((char*)TPAR(_req)) + strlen((char*)_str) + 1); \ + sprintf((char*)_n, "%s%s", TPAR(_req), _str); \ + ck_free(TPAR(_req)); \ + TPAR(_req) = _n; \ + } \ + } while (0) + + +/* Common initialization of security injection attacks. */ + +static void inject_init(struct pivot_desc* pv) { + struct http_request* n; + u32 i; + + /* pv->state may change after async_request() calls in + insta-fail mode, so we should cache accordingly. */ + + DEBUG_HELPER(pv); + + /* CHECK 0: See if the response is stable. If it fluctuates + randomly, we probably need to skip injection tests. */ + + pv->misc_cnt = BH_CHECKS; + + for (i=0;ireq, pv, 1); + n->callback = inject_check0_callback; + n->user_val = i; + async_request(n); + } + +} + + +/* CALLBACK FOR CHECK 0: Confirms that the location is behaving + reasonably. */ + +static u8 inject_check0_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + u8* tmp = NULL; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during page variability checks", 0); + } else { + if (!same_page(&RPRES(req)->sig, &res->sig)) { + req->pivot->res_varies = 1; + problem(PROB_VARIES, req, res, 0, req->pivot, 0); + } + } + + if ((--req->pivot->misc_cnt)) return 0; + + /* If response fluctuates, do not perform any injection checks at all. */ + + if (req->pivot->res_varies) { + end_injection_checks(req->pivot); + return 0; + } + + /* CHECK 1: Directory listing - 4 requests. The logic here is a bit + different for parametric targets (which are easy to examine with + a ./ trick) and directories (which require a more complex + comparison). */ + + req->pivot->misc_cnt = 0; + + n = req_copy(req->pivot->req, req->pivot, 1); + + if (orig_state == PSTATE_CHILD_INJECT) { + replace_slash(n, (u8*)"."); + set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par); + } else { + tmp = ck_alloc(strlen((char*)TPAR(n)) + 5); + sprintf((char*)tmp, ".../%s", TPAR(n)); + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(tmp); + req->pivot->i_skip_add = 6; + } + + n->callback = inject_check1_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(req->pivot->req, req->pivot, 1); + + if (orig_state == PSTATE_CHILD_INJECT) { + replace_slash(n, (u8*)".sf"); + set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par); + } else { + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(tmp + 2); + } + + n->callback = inject_check1_callback; + n->user_val = 1; + async_request(n); + + n = req_copy(req->pivot->req, req->pivot, 1); + + if (orig_state == PSTATE_CHILD_INJECT) { + replace_slash(n, (u8*)"\\.\\"); + } else { + tmp[3] = '\\'; + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(tmp); + } + + n->callback = inject_check1_callback; + n->user_val = 2; + async_request(n); + + n = req_copy(req->pivot->req, req->pivot, 1); + + if (orig_state == PSTATE_CHILD_INJECT) { + replace_slash(n, (u8*)"\\.sf\\"); + } else { + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(tmp + 2); + ck_free(tmp); + } + + n->callback = inject_check1_callback; + n->user_val = 3; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 1: Sees if we managed to list a directory, or find + a traversal vector. Called four times, parallelized. */ + +static u8 inject_check1_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[0 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during directory listing / traversal attacks", 0); + req->pivot->i_skip[0 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 4) return 1; + + /* Got all responses. For directories, this is: + + pivot = / + misc[0] = /./ + misc[1] = /.sf/ + misc[2] = \.\ + misc[3] = \.sf\ + + Here, if pivot != misc[0], and misc[0] != misc[1], we probably + managed to list a hidden dir. The same test is carried out for + misc[2] and misc[3]. + + For parameters, this is: + + misc[0] = .../known_val + misc[1] = ./known_val + misc[2] = ...\known_val + misc[3] = .\known_val + + Here, the test is simpler: if misc[1] != misc[0], or misc[3] != + misc[2], we probably have a bug. + + */ + + if (orig_state == PSTATE_CHILD_INJECT) { + + if (!same_page(&MRES(0)->sig, &RPRES(req)->sig) && + !same_page(&MRES(0)->sig, &MRES(1)->sig)) { + problem(PROB_DIR_LIST, MREQ(0), MRES(0), + (u8*)"unique response for /./", + req->pivot, 0); + + /* Use pivot's request, rather than MREQ(0), for link scraping; + MREQ(0) contains an "illegal" manually constructed path. */ + + RESP_CHECKS(RPREQ(req), MRES(0)); + } + + if (!same_page(&MRES(2)->sig, &RPRES(req)->sig) && + !same_page(&MRES(2)->sig, &MRES(3)->sig)) { + problem(PROB_DIR_LIST, MREQ(2), MRES(2), + (u8*)"unique response for \\.\\", + req->pivot, 0); + RESP_CHECKS(MREQ(2), MRES(2)); + } + + } else { + + if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) { + problem(PROB_DIR_TRAVERSAL, MREQ(1), MRES(1), + (u8*)"responses for ./val and .../val look different", + req->pivot, 0); + RESP_CHECKS(MREQ(0), MRES(0)); + } + + if (!same_page(&MRES(2)->sig, &MRES(3)->sig)) { + problem(PROB_DIR_TRAVERSAL, MREQ(3), MRES(3), + (u8*)"responses for .\\val and ...\\val look different", + req->pivot, 0); + RESP_CHECKS(MREQ(2), MRES(2)); + } + + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* CHECK 2: Backend XML injection - 2 requests. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "sfish>'>\">"); + n->callback = inject_check2_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "sfish>'>\">"); + n->callback = inject_check2_callback; + n->user_val = 1; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 2: Examines the response for XML injection. Called twice, + parallelized. */ + +static u8 inject_check2_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[1 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during backend XML injection attacks", 0); + req->pivot->i_skip[1 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 2) return 1; + + /* Got all responses: + + misc[0] = valid XML + misc[1] = bad XML + + If misc[0] != misc[1], we probably have XML injection on backend side. */ + + if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) { + problem(PROB_XML_INJECT, MREQ(0), MRES(0), + (u8*)"responses for and look different", + req->pivot, 0); + RESP_CHECKS(MREQ(1), MRES(1)); + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* CHECK 3: Shell command injection - 9 requests. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "`true`"); + n->callback = inject_check3_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "`false`"); + n->callback = inject_check3_callback; + n->user_val = 1; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "`uname`"); + n->callback = inject_check3_callback; + n->user_val = 2; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\"`true`\""); + n->callback = inject_check3_callback; + n->user_val = 3; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\"`false`\""); + n->callback = inject_check3_callback; + n->user_val = 4; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\"`uname`\""); + n->callback = inject_check3_callback; + n->user_val = 5; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "'`true`'"); + n->callback = inject_check3_callback; + n->user_val = 6; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "'`false`'"); + n->callback = inject_check3_callback; + n->user_val = 7; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "'`uname`'"); + n->callback = inject_check3_callback; + n->user_val = 8; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 3: Looks for shell injection patterns. Called several + times, parallelized. */ + +static u8 inject_check3_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[2 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during path-based shell injection attacks", 0); + req->pivot->i_skip[2 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 9) return 1; + + /* Got all responses: + + misc[0] = `true` + misc[1] = `false` + misc[2] = `uname` + misc[3] = "`true`" + misc[4] = "`false`" + misc[5] = "`uname`" + misc[6] = '`true`' + misc[7] = "`false`" + misc[8] = '`uname`' + + If misc[0] == misc[1], but misc[0] != misc[2], we probably have shell + injection. Ditto for the remaining triplets. We use the `false` case + to avoid errors on search fields, etc. */ + + if (same_page(&MRES(0)->sig, &MRES(1)->sig) && + !same_page(&MRES(0)->sig, &MRES(2)->sig)) { + problem(PROB_SH_INJECT, MREQ(0), MRES(0), + (u8*)"responses to `true` and `false` different than to `uname`", + req->pivot, 0); + RESP_CHECKS(MREQ(2), MRES(2)); + } + + if (same_page(&MRES(3)->sig, &MRES(4)->sig) && + !same_page(&MRES(3)->sig, &MRES(5)->sig)) { + problem(PROB_SH_INJECT, MREQ(3), MRES(3), + (u8*)"responses to `true` and `false` different than to `uname`", + req->pivot, 0); + RESP_CHECKS(MREQ(5), MRES(5)); + } + + if (same_page(&MRES(6)->sig, &MRES(7)->sig) && + !same_page(&MRES(6)->sig, &MRES(8)->sig)) { + problem(PROB_SH_INJECT, MREQ(6), MRES(6), + (u8*)"responses to `true` and `false` different than to `uname`", + req->pivot, 0); + RESP_CHECKS(MREQ(8), MRES(8)); + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* CHECK 4: Cross-site scripting - two requests (also test common + "special" error pages). */ + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, new_xss_tag(NULL)); + set_value(PARAM_HEADER, (u8*)"Referer", new_xss_tag(NULL), 0, &n->par); + register_xss_tag(n); + n->callback = inject_check4_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, new_xss_tag((u8*)".htaccess.aspx")); + register_xss_tag(n); + n->callback = inject_check4_callback; + n->user_val = 1; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 4: Checks for XSS. Called twice. */ + +static u8 inject_check4_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + /* Note that this is not a differential check, so we can let + 503, 504 codes slide. */ + + if (res->state != STATE_OK) { + handle_error(req, res, (u8*)"during cross-site scripting attacks", 0); + goto schedule_next; + } + + /* Content checks do automatic HTML parsing and XSS detection. + scrape_page() is generally not advisable here. */ + + content_checks(req, res); + + /* CHECK 5: URL redirection - 3 requests */ + +schedule_next: + + if (req->user_val) return 0; + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "http://skipfish.invalid/;?"); + n->callback = inject_check5_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "//skipfish.invalid/;?"); + n->callback = inject_check5_callback; + n->user_val = 1; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "skipfish://invalid/;?"); + n->callback = inject_check5_callback; + n->user_val = 2; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 5: Checks for URL redirection or XSS problems. Called + several times, paralallelized, can work on individual responses. */ + +static u8 inject_check5_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u8* val; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + /* Likewise, not a differential check. */ + + if (res->state != STATE_OK) { + handle_error(req, res, (u8*)"during URL injection attacks", 0); + goto schedule_next; + } + + /* Check Location, Refresh headers. */ + + val = GET_HDR((u8*)"Location", &res->hdr); + + if (val) { + + if (!strncasecmp((char*)val, "http://skipfish.invalid/", 25) || + !strncasecmp((char*)val, "//skipfish.invalid/", 21)) + problem(PROB_URL_REDIR, req, res, (u8*)"injected URL in 'Location' header", + req->pivot, 0); + + if (!strncasecmp((char*)val, "skipfish://", 12)) + problem(PROB_URL_XSS, req, res, (u8*)"injected URL in 'Location' header", + req->pivot, 0); + + } + + val = GET_HDR((u8*)"Refresh", &res->hdr); + + if (val && (val = (u8*)strchr((char*)val, '=')) && val++) { + u8 semi_safe = 0; + + if (*val == '\'' || *val == '"') { val++; semi_safe++; } + + if (!strncasecmp((char*)val, "http://skipfish.invalid/", 25) || + !strncasecmp((char*)val, "//skipfish.invalid/", 20)) + problem(PROB_URL_REDIR, req, res, (u8*)"injected URL in 'Refresh' header", + req->pivot, 0); + + /* Unescaped semicolon in Refresh headers is unsafe with MSIE6. */ + + if (!strncasecmp((char*)val, "skipfish://", 12) || + (!semi_safe && strchr((char*)val, ';'))) + problem(PROB_URL_XSS, req, res, (u8*)"injected URL in 'Refresh' header", + req->pivot, 0); + + } + + /* META tags and JS will be checked by content_checks(). We're not + calling scrape_page(), because we don't want to accumulate bogus, + injected links. */ + + content_checks(req, res); + +schedule_next: + + if (req->user_val != 2) return 0; + + /* CHECK 6: header splitting - 2 requests */ + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "bogus\nSkipfish-Inject:bogus"); + n->callback = inject_check6_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "bogus\rSkipfish-Inject:bogus"); + n->callback = inject_check6_callback; + n->user_val = 1; + async_request(n); + + return 0; + +} + + +/* CALLBACK FOR CHECK 6: A simple test for request splitting. Called + twice, parallelized, can work on individual responses. */ + +static u8 inject_check6_callback(struct http_request* req, + struct http_response* res) { + u8 is_num = 0; + struct http_request* n; + u32 orig_state = req->pivot->state; + + DEBUG_CALLBACK(req, res); + + /* Not differential. */ + + if (res->state != STATE_OK) { + handle_error(req, res, (u8*)"during header injection attacks", 0); + goto schedule_next; + } + + /* Check headers - that's all! */ + + if (GET_HDR((u8*)"Skipfish-Inject", &res->hdr)) + problem(PROB_HTTP_INJECT, req, res, + (u8*)"successfully injected 'Skipfish-Inject' header into response", + req->pivot, 0); + +schedule_next: + + if (req->user_val != 1) return 0; + + /* CHECK 7: SQL injection - 6 requests */ + + if (orig_state != PSTATE_CHILD_INJECT) { + u8* pstr = TPAR(RPREQ(req)); + u32 c = strspn((char*)pstr, "01234567890.+-"); + if (!pstr[c]) is_num = 1; + } + + n = req_copy(RPREQ(req), req->pivot, 1); + if (!is_num) SET_VECTOR(orig_state, n, "9-8"); + else APPEND_VECTOR(orig_state, n, "-0"); + n->callback = inject_check7_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + if (!is_num) SET_VECTOR(orig_state, n, "8-7"); + else APPEND_VECTOR(orig_state, n, "-0-0"); + n->callback = inject_check7_callback; + n->user_val = 1; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + if (!is_num) SET_VECTOR(orig_state, n, "9-1"); + else APPEND_VECTOR(orig_state, n, "-0-9"); + n->callback = inject_check7_callback; + n->user_val = 2; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\\\'\\\""); + set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\\\'\\\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\\\'\\\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\\\'\\\",en", 0, + &n->par); + n->callback = inject_check7_callback; + n->user_val = 3; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\'\""); + set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\'\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\'\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\'\",en", 0, + &n->par); + n->callback = inject_check7_callback; + n->user_val = 4; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + APPEND_VECTOR(orig_state, n, "\\\\\'\\\\\""); + set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\\\\\'\\\\\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\\\\\'\\\\\"", 0, &n->par); + set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\\\\\'\\\\\",en", 0, + &n->par); + n->callback = inject_check7_callback; + n->user_val = 5; + async_request(n); + + /* TODO: We should probably also attempt cookie injection here. */ + + return 0; + +} + + +/* CALLBACK FOR CHECK 7: See if we have any indication of SQL injection. */ + +static u8 inject_check7_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[3 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during SQL injection attacks", 0); + req->pivot->i_skip[3 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 6) return 1; + + /* Got all data: + + misc[0] = 9-8 (or orig-0) + misc[1] = 8-7 (or orig-0-0) + misc[2] = 9-1 (or orig-0-9) + misc[3] = [orig]\'\" + misc[4] = [orig]'" + misc[5] = [orig]\\'\\" + + If misc[0] == misc[1], but misc[0] != misc[2], probable (numeric) SQL + injection. If misc[3] != misc[4] and misc[4] != misc[5], + probable text SQL injection. + + */ + + if (same_page(&MRES(0)->sig, &MRES(1)->sig) && + !same_page(&MRES(0)->sig, &MRES(2)->sig)) { + problem(PROB_SQL_INJECT, MREQ(0), MRES(0), + (u8*)"response suggests arithmetic evaluation on server side", + req->pivot, 0); + RESP_CHECKS(MREQ(0), MRES(0)); + RESP_CHECKS(MREQ(2), MRES(2)); + } + + if (!same_page(&MRES(3)->sig, &MRES(4)->sig) && + !same_page(&MRES(3)->sig, &MRES(5)->sig)) { + problem(PROB_SQL_INJECT, MREQ(4), MRES(4), + (u8*)"response to '\" different than to \\'\\\"", req->pivot, 0); + RESP_CHECKS(MREQ(3), MRES(3)); + RESP_CHECKS(MREQ(4), MRES(4)); + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* CHECK 8: format string attacks - 2 requests. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "sfish%dn%dn%dn%dn%dn%dn%dn%dn"); + n->callback = inject_check8_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "sfish%nd%nd%nd%nd%nd%nd%nd%nd"); + n->callback = inject_check8_callback; + n->user_val = 1; + async_request(n); + + return 0; +} + + +/* Check for format string bugs. */ + +static u8 inject_check8_callback(struct http_request* req, + struct http_response* res) { + struct http_request* n; + u32 orig_state = req->pivot->state; + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[4 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during format string attacks", 0); + req->pivot->i_skip[4 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 2) return 1; + + /* Got all data: + + misc[0] = %dn... (harmless) + misc[1] = %nd... (crashy) + + If misc[0] != misc[1], probable format string vuln. + + */ + + if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) { + problem(PROB_FMT_STRING, MREQ(1), MRES(1), + (u8*)"response to %dn%dn%dn... different than to %nd%nd%nd...", + req->pivot, 0); + RESP_CHECKS(MREQ(1), MRES(1)); + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* CHECK 9: integer overflow bugs - 9 requests. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "-0000012345"); + n->callback = inject_check9_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "-2147483649"); + n->callback = inject_check9_callback; + n->user_val = 1; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "-2147483648"); + n->callback = inject_check9_callback; + n->user_val = 2; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "0000012345"); + n->callback = inject_check9_callback; + n->user_val = 3; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "2147483647"); + n->callback = inject_check9_callback; + n->user_val = 4; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "2147483648"); + n->callback = inject_check9_callback; + n->user_val = 5; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "4294967295"); + n->callback = inject_check9_callback; + n->user_val = 6; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "4294967296"); + n->callback = inject_check9_callback; + n->user_val = 7; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + SET_VECTOR(orig_state, n, "0000023456"); + n->callback = inject_check9_callback; + n->user_val = 8; + async_request(n); + + return 0; +} + + +/* Check for format string bugs, then wrap up the injection + phase.. */ + +static u8 inject_check9_callback(struct http_request* req, + struct http_response* res) { + + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[5 + req->pivot->i_skip_add]) return 0; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during integer overflow attacks", 0); + req->pivot->i_skip[5 + req->pivot->i_skip_add] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 9) return 1; + + /* Got all data: + + misc[0] = -12345 (baseline) + misc[1] = -(2^31-1) + misc[2] = -2^31 + misc[3] = 12345 (baseline) + misc[4] = 2^31-1 + misc[5] = 2^31 + misc[6] = 2^32-1 + misc[7] = 2^32 + misc[8] = 23456 (validation) + + If misc[3] != misc[8], skip tests - we're likely dealing with a + search field instead. + + If misc[0] != misc[1] or misc[2], probable integer overflow; + ditto for 3 vs 4, 5, 6, 7. + + */ + + if (!same_page(&MRES(3)->sig, &MRES(8)->sig)) + goto schedule_next; + + if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) { + problem(PROB_INT_OVER, MREQ(1), MRES(1), + (u8*)"response to -(2^31-1) different than to -12345", + req->pivot, 0); + RESP_CHECKS(MREQ(1), MRES(1)); + } + + if (!same_page(&MRES(0)->sig, &MRES(2)->sig)) { + problem(PROB_INT_OVER, MREQ(2), MRES(2), + (u8*)"response to -2^31 different than to -12345", + req->pivot, 0); + RESP_CHECKS(MREQ(2), MRES(2)); + } + + if (!same_page(&MRES(3)->sig, &MRES(4)->sig)) { + problem(PROB_INT_OVER, MREQ(4), MRES(4), + (u8*)"response to 2^31-1 different than to 12345", + req->pivot, 0); + RESP_CHECKS(MREQ(4), MRES(4)); + } + + if (!same_page(&MRES(3)->sig, &MRES(5)->sig)) { + problem(PROB_INT_OVER, MREQ(5), MRES(5), + (u8*)"response to 2^31 different than to 12345", + req->pivot, 0); + RESP_CHECKS(MREQ(5), MRES(5)); + } + + if (!same_page(&MRES(3)->sig, &MRES(6)->sig)) { + problem(PROB_INT_OVER, MREQ(6), MRES(6), + (u8*)"response to 2^32-1 different than to 12345", + req->pivot, 0); + RESP_CHECKS(MREQ(6), MRES(6)); + } + + if (!same_page(&MRES(3)->sig, &MRES(7)->sig)) { + problem(PROB_INT_OVER, MREQ(7), MRES(7), + (u8*)"response to 2^32 different than to 12345", + req->pivot, 0); + RESP_CHECKS(MREQ(7), MRES(7)); + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + end_injection_checks(req->pivot); + + return 0; + +} + + +/* Ends injection checks, proceeds with brute-force attacks, etc. */ + +static void end_injection_checks(struct pivot_desc* pv) { + + if (pv->state == PSTATE_CHILD_INJECT) { + + /* Do not proceed with parametric tests if pivot is not + in scope (but got added as a parent of an in-scope + node), or 404 checks went wrong. */ + + if (url_allowed(pv->req)) { + + if (pv->r404_cnt) { + pv->state = PSTATE_CHILD_DICT; + pv->cur_key = 0; + crawl_dir_dict_init(pv); + } else { + crawl_parametric_init(pv); + } + + } else { + + pv->state = PSTATE_DONE; + return; + + } + + } else { + + if (pv->bogus_par) { + pv->state = PSTATE_DONE; + } else { + crawl_par_numerical_init(pv); + } + + } + +} + + + +/* + + ***************************** + * GENERIC PARAMETRIC CHECKS * + ***************************** + + Tests specific to parametric nodes, such as foo=bar (query and + POST parameters, directories, etc). + + */ + +/* Initializes initial parametric testing probe. It may get called on + pivots with no specific parameters to fuzz, in which case, we want to + proceed to PSTATE_DONE. */ + +static void crawl_parametric_init(struct pivot_desc* pv) { + struct http_request* n; + u32 i; + + if (pv->fuzz_par < 0 || !url_allowed(pv->req)) { + pv->state = PSTATE_DONE; + return; + } + + DEBUG_HELPER(pv); + + pv->state = PSTATE_PAR_CHECK; + + /* TEST 1: parameter behavior. */ + + pv->ck_pending += BH_CHECKS; + + for (i=0;ireq, pv, 1); + ck_free(TPAR(n)); + TPAR(n) = ck_strdup((u8*)BOGUS_FILE); + n->callback = par_check_callback; + n->user_val = i; + async_request(n); + } + +} + + +/* CALLBACK FOR TEST 1: Checks if the parameter causes a significant + change on the resulting page (suggesting it should be brute-forced, + not just injection-tested). */ + +static u8 par_check_callback(struct http_request* req, + struct http_response* res) { + + struct http_request* n; + u8* tmp; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during parameter behavior tests", 0); + goto schedule_next; + } + + if (same_page(&res->sig, &RPRES(req)->sig)) { + DEBUG("* Parameter seems to have no effect.\n"); + req->pivot->bogus_par = 1; + goto schedule_next; + } + + DEBUG("* Parameter seems to have some effect:\n"); + debug_same_page(&res->sig, &RPRES(req)->sig); + + if (req->pivot->bogus_par) { + DEBUG("* We already classified it as having no effect, whoops.\n"); + req->pivot->res_varies = 1; + problem(PROB_VARIES, req, res, 0, req->pivot, 0); + goto schedule_next; + } + + /* If we do not have a signature yet, record it. Otherwise, make sure + it did not change. */ + + if (!req->pivot->r404_cnt) { + + DEBUG("* New signature, recorded.\n"); + memcpy(&req->pivot->r404[0], &res->sig, sizeof(struct http_sig)); + req->pivot->r404_cnt = 1; + + } else { + + if (!same_page(&res->sig, &req->pivot->r404[0])) { + DEBUG("* Signature does not match previous responses, whoops.\n"); + req->pivot->res_varies = 1; + problem(PROB_VARIES, req, res, 0, req->pivot, 0); + goto schedule_next; + } + + } + +schedule_next: + + if ((--req->pivot->ck_pending)) return 0; + + /* All probes failed? Assume bogus parameter, what else to do... */ + + if (!req->pivot->r404_cnt) + req->pivot->bogus_par = 1; + + /* If the parameter has an effect, schedule OGNL checks. */ + + if (!req->pivot->bogus_par && !req->pivot->res_varies && + req->par.n[req->pivot->fuzz_par]) { + + n = req_copy(req->pivot->req, req->pivot, 1); + tmp = ck_alloc(strlen((char*)n->par.n[req->pivot->fuzz_par]) + 8); + sprintf((char*)tmp, "[0]['%s']", n->par.n[req->pivot->fuzz_par]); + ck_free(n->par.n[req->pivot->fuzz_par]); + n->par.n[req->pivot->fuzz_par] = tmp; + n->callback = par_ognl_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(req->pivot->req, req->pivot, 1); + ck_free(n->par.n[req->pivot->fuzz_par]); + n->par.n[req->pivot->fuzz_par] = ck_strdup((u8*)"[0]['sfish']"); + n->callback = par_ognl_callback; + n->user_val = 1; + async_request(n); + + } + + /* Injection attacks should be carried out even if we think this + parameter has no visible effect; but injection checks will not proceed + to dictionary fuzzing if bogus_par or res_varies is set. */ + + req->pivot->state = PSTATE_PAR_INJECT; + inject_init(req->pivot); + + return 0; + +} + + +/* Said OGNL check... */ + +static u8 par_ognl_callback(struct http_request* req, + struct http_response* res) { + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during OGNL tests", 0); + return 0; + } + + /* First response is meant to give the same result. Second + is meant to give a different one. */ + + if (req->user_val == 0) { + if (same_page(&req->pivot->res->sig, &res->sig)) + req->pivot->ognl_check++; + } else { + if (!same_page(&req->pivot->res->sig, &res->sig)) + req->pivot->ognl_check++; + } + + if (req->pivot->ognl_check == 2) + problem(PROB_OGNL, req, res, + (u8*)"response to [0]['name']=... identical to name=...", + req->pivot, 0); + + return 0; + +} + + +/* STAGE 2: Tries numerical brute-force (if any reasonably sized + integer is actually found in the name). */ + +static void crawl_par_numerical_init(struct pivot_desc* pv) { + u8 *val = TPAR(pv->req), *out, fmt[16]; + u32 i, dig, tail; + s32 val_i, range_st, range_en; + u8 zero_padded = 0; + + DEBUG_HELPER(pv); + + if (pv->child_cnt >= max_children) goto schedule_next; + + /* Skip to the first digit, then to first non-digit. */ + + i = 0; + while (val[i] && !isdigit(val[i])) i++; + if (!val[i]) goto schedule_next; + + dig = i; + while (val[i] && isdigit(val[i])) i++; + tail = i; + + /* Too many digits is a no-go. */ + + if (tail - dig > PAR_MAX_DIGITS) goto schedule_next; + + if (val[dig] == '0' && tail - dig > 1) zero_padded = 1; + + val_i = atoi((char*)val + dig); + range_st = val_i - PAR_INT_FUZZ; + range_en = val_i + PAR_INT_FUZZ; + if (range_st < 0) range_st = 0; + + if (zero_padded) sprintf((char*)fmt, "%%.%us%%0%uu%%s", dig, tail - dig); + else sprintf((char*)fmt, "%%.%us%%%uu%%s", dig, tail - dig); + + out = ck_alloc(strlen((char*)val) + 16); + + /* Let's roll! */ + + pv->state = PSTATE_PAR_NUMBER; + + pv->num_pending = range_en - range_st + 1; + + for (i=range_st;i<=range_en;i++) { + struct http_request* n; + + if (i == val_i) continue; + sprintf((char*)out, (char*)fmt, val, i, val + tail); + + n = req_copy(pv->req, pv, 1); + ck_free(TPAR(n)); + TPAR(n) = ck_strdup((u8*)out); + n->callback = par_numerical_callback; + async_request(n); + + } + + ck_free(out); + + if (!pv->num_pending) goto schedule_next; + return; + +schedule_next: + + pv->state = PSTATE_PAR_DICT; + crawl_par_dict_init(pv); + + /* Pew pew! */ + +} + + +/* CALLBACK FOR STAGE 2: Examines the output of numerical brute-force, + creates PIVOT_VALUE nodes if the response looks different from pivot, + nearby 404 sigs. */ + +static u8 par_numerical_callback(struct http_request* req, + struct http_response* res) { + struct pivot_desc *par, *n = NULL, *orig_pv = req->pivot; + u32 i; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during numerical brute-force tests", 0); + goto schedule_next; + } + + /* Looks like parent, or like its 404 signature? */ + + if (same_page(&res->sig, &req->pivot->r404[0]) || + same_page(&res->sig, &req->pivot->res->sig)) + goto schedule_next; + + par = dir_parent(req->pivot); + + /* Check with parent if sigs available, but if not - no biggie. */ + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) goto schedule_next; + + /* Matching child? If yes, don't bother. */ + + for (i=0;ipivot->child_cnt;i++) + if (req->pivot->child[i]->type == PIVOT_VALUE && + !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req), + (char*)req->pivot->child[i]->name))) goto schedule_next; + + if (req->pivot->child_cnt >= max_children) goto schedule_next; + + /* Hmm, looks like we're onto something. Let's manually create a dummy + pivot and attach it to current node, without any activity planned. + Attach any response notes to that pivot. */ + + n = ck_alloc(sizeof(struct pivot_desc)); + + n->type = PIVOT_VALUE; + n->state = PSTATE_DONE; + n->name = ck_strdup(TPAR(req)); + n->req = req; + n->res = res; + n->fuzz_par = req->pivot->fuzz_par; + n->parent = req->pivot; + + DEBUG("--- New pivot (value): %s ---\n", n->name); + + req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1) + * sizeof(struct pivot_desc*)); + + req->pivot->child[req->pivot->child_cnt++] = n; + + req->pivot = n; + + RESP_CHECKS(req, res); + + secondary_ext_init(orig_pv, req, res, 1); + +schedule_next: + + if (!(--(orig_pv->num_pending))) { + orig_pv->state = PSTATE_PAR_DICT; + crawl_par_dict_init(orig_pv); + } + + /* Copied over to pivot. */ + return n ? 1 : 0; + +} + + +/* STAGE 3: Tries dictionary brute-force. This is fairly similar to the + directory dictionary version, but with additional try_list logic, etc. */ + +static void crawl_par_dict_init(struct pivot_desc* pv) { + static u8 in_dict_init; + struct http_request* n; + u8 *kw, *ex; + u32 i, c; + + /* Too many requests still pending, or already done? */ + + if (in_dict_init || pv->pdic_pending > DICT_BATCH || + pv->state != PSTATE_PAR_DICT) return; + + DEBUG_HELPER(pv); + +restart_dict: + + if (pv->child_cnt >= max_children) { + crawl_par_trylist_init(pv); + return; + } + + i = 0; + + kw = (pv->pdic_guess ? wordlist_get_guess : wordlist_get_word) + (pv->pdic_cur_key); + + if (!kw) { + + /* No more keywords. Move to guesswords if not there already, or + advance to try list otherwise. */ + + if (pv->pdic_guess) { crawl_par_trylist_init(pv); return; } + + pv->pdic_guess = 1; + pv->pdic_cur_key = 0; + goto restart_dict; + + } + + /* Use crawl_prob/100 dictionary entries. */ + + if (R(100) < crawl_prob) { + + /* Schedule extension-less probe, if the keyword is not + on the child list. */ + + for (c=0;cchild_cnt;c++) + if (pv->type == PIVOT_VALUE && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw, + (char*)pv->child[c]->name))) break; + + /* ...and does not match the node itself. */ + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw, + (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt; + + if (c == pv->child_cnt) { + n = req_copy(pv->req, pv, 1); + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(kw); + n->callback = par_dict_callback; + pv->pdic_pending++; + in_dict_init = 1; + async_request(n); + in_dict_init = 0; + } + + /* Schedule probes for all extensions for the current word, but + only if the original parameter contained '.' somewhere, + and only if string is not on the try list. */ + + if (strchr((char*)TPAR(pv->req), '.')) + while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) { + + u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2); + + sprintf((char*)tmp, "%s.%s", kw, ex); + + for (c=0;cchild_cnt;c++) + if (pv->type == PIVOT_VALUE && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->child[c]->name))) break; + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt; + + if (c == pv->child_cnt) { + n = req_copy(pv->req, pv, 1); + ck_free(TPAR(n)); + TPAR(n) = tmp; + n->callback = par_dict_callback; + pv->pdic_pending++; + in_dict_init = 1; + async_request(n); + in_dict_init = 0; + } else ck_free(tmp); + + i++; + } + + } + + pv->pdic_cur_key++; + + if (pv->pdic_pending < DICT_BATCH) goto restart_dict; + +} + + +/* CALLBACK FOR STAGE 3: Examines the output of directory brute-force. */ + +static u8 par_dict_callback(struct http_request* req, + struct http_response* res) { + struct pivot_desc *par, *n = NULL, *orig_pv = req->pivot; + u8 keep = 0; + u32 i; + + DEBUG_CALLBACK(req, res); + + if (!req->user_val) + req->pivot->pdic_pending--; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during parameter brute-force tests", 0); + goto schedule_next; + } + + /* Same as parent or parent's 404? Don't bother. */ + + if (same_page(&res->sig, &req->pivot->r404[0]) || + same_page(&res->sig, &RPRES(req)->sig)) goto schedule_next; + + par = dir_parent(req->pivot); + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) goto schedule_next; + + /* Matching child? If yes, don't bother. */ + + for (i=0;ipivot->child_cnt;i++) + if (req->pivot->child[i]->type == PIVOT_VALUE && + !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req), + (char*)req->pivot->child[i]->name))) goto schedule_next; + + if (req->pivot->child_cnt >= max_children) goto schedule_next; + + n = ck_alloc(sizeof(struct pivot_desc)); + + n->type = PIVOT_VALUE; + n->state = PSTATE_DONE; + n->name = ck_strdup(TPAR(req)); + n->req = req; + n->res = res; + n->fuzz_par = req->pivot->fuzz_par; + n->parent = req->pivot; + + DEBUG("--- New pivot (value): %s ---\n", n->name); + + req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1) + * sizeof(struct pivot_desc*)); + + req->pivot->child[req->pivot->child_cnt++] = n; + req->pivot = n; + + keep = 1; + + RESP_CHECKS(req, res); + + if (!req->user_val) + secondary_ext_init(orig_pv, req, res, 1); + +schedule_next: + + if (!req->user_val) + crawl_par_dict_init(orig_pv); + + return keep; + +} + + +/* STAGE 4: Handles try list (this may be called again after request is + completed, when new entries are added to the try list). */ + +void crawl_par_trylist_init(struct pivot_desc* pv) { + u32 i; + + /* If the parameter does not seem to be doing anything, there is + no point in going through the try list if restarted. */ + + if (pv->fuzz_par == -1 || pv->bogus_par || pv->res_varies + || pv->child_cnt >= max_children) { + pv->state = PSTATE_DONE; + return; + } else + pv->state = PSTATE_PAR_TRYLIST; + + DEBUG_HELPER(pv); + + pv->try_pending += (pv->try_cnt - pv->try_cur); + + for (i=pv->try_cur;itry_cnt;i++) { + u32 c; + + /* If we already have a child by this name, don't poke it again. */ + + for (c=0;cchild_cnt;c++) + if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)pv->try_list[i], + (char*)pv->child[c]->name))) break; + + /* Matching current node? Ditto. */ + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)pv->try_list[i], + (char*)pv->req->par.v[pv->fuzz_par]))) continue; + + if (c == pv->child_cnt && R(100) < crawl_prob) { + struct http_request* n; + n = req_copy(pv->req, pv, 1); + ck_free(TPAR(n)); + TPAR(n) = ck_strdup(pv->try_list[i]); + n->callback = par_trylist_callback; + async_request(n); + } else + if (!pv->child[c]->linked) pv->child[c]->linked = 1; + + } + + pv->try_cur = i; + + if (!pv->try_pending) { + pv->state = PSTATE_DONE; + return; + } + +} + + +/* CALLBACK FOR STAGE 4: Examines the output of try list fetches. */ + +static u8 par_trylist_callback(struct http_request* req, + struct http_response* res) { + struct pivot_desc *par, *n = NULL; + struct pivot_desc* orig_pv = req->pivot; + u32 i; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during try list fetches", 0); + goto schedule_next; + } + + /* Same as parent or parent's 404? Don't bother. */ + + if (same_page(&res->sig, &req->pivot->r404[0]) || + same_page(&res->sig, &RPRES(req)->sig)) goto schedule_next; + + par = dir_parent(req->pivot); + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) goto schedule_next; + + /* Name matching known child? If yes, don't bother. */ + + for (i=0;ipivot->child_cnt;i++) + if (req->pivot->child[i]->type == PIVOT_VALUE && + !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req), + (char*)req->pivot->child[i]->name))) goto schedule_next; + + if (req->pivot->child_cnt >= max_children) goto schedule_next; + + n = ck_alloc(sizeof(struct pivot_desc)); + + n->type = PIVOT_VALUE; + n->state = PSTATE_DONE; + n->name = ck_strdup(TPAR(req)); + n->req = req; + n->res = res; + n->fuzz_par = req->pivot->fuzz_par; + n->parent = req->pivot; + + DEBUG("--- New pivot (value): %s ---\n", n->name); + + req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1) + * sizeof(struct pivot_desc*)); + + req->pivot->child[req->pivot->child_cnt++] = n; + req->pivot = n; + + RESP_CHECKS(req, res); + + secondary_ext_init(orig_pv, req, res, 1); + +schedule_next: + + if (!(--(orig_pv->try_pending))) + orig_pv->state = PSTATE_DONE; + + /* Copied over to pivot. */ + return n ? 1 : 0; + +} + + +/* + + *************************** + **** PIVOT_FILE CHECKS **** + *************************** + + Used on confirmed file or parameter type pivots. + + */ + +/* Initial callback for content fetch. Nothing interesting here, spare for + basic sanity checks. */ + +u8 fetch_file_callback(struct http_request* req, struct http_response* res) { + u32 i = 0; + struct pivot_desc* par; + + RPRES(req) = res; + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during initial file fetch", 1); + return 1; + } + + /* Matches parent's 404? */ + + par = dir_parent(req->pivot); + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) break; + + /* If no signatures on parents, fall back to a basic 404 check, it's + the least we could do. */ + + if ((!par && res->code == 404) || (par && i != par->r404_cnt)) { + + req->pivot->missing = 1; + + } else { + + if (res->code > 400) + problem(PROB_NO_ACCESS, req, res, NULL, req->pivot, 0); + + /* Do not bother with checks on files or params if + content identical to parent. */ + + if (!RPAR(req)->res || !same_page(&res->sig, &RPAR(req)->res->sig)) { + RESP_CHECKS(req, res); + if (par && req->pivot->type != PIVOT_PARAM) + secondary_ext_init(par, req, res, 0); + } + + if (req->pivot->type == PIVOT_FILE) + check_case(req->pivot); + + } + + unlock_children(req->pivot); + crawl_parametric_init(req->pivot); + + /* This is the initial callback, keep the response. */ + return 1; + +} + + +/* + + ******************** + * PIVOT_DIR CHECKS * + ******************** + + These checks are called on all pivot points determined to correspond to + real directories. + + */ + + +/* STAGE 1: Handles initial fetch of a directory. Called once. */ + +u8 fetch_dir_callback(struct http_request* req, struct http_response* res) { + struct http_request* n; + struct pivot_desc* par; + RPRES(req) = res; + + DEBUG_CALLBACK(req, res); + + /* Error at this point means we should give up on other probes in this + directory. */ + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during initial directory fetch", 1); + return 1; + } + + if (req->pivot->type == PIVOT_SERV) + PIVOT_CHECKS(req, res); + + /* The next step is checking 404 responses for all extensions (starting + with an empty one), which would also determine if the directory exists + at all, etc. We make an exception for server pivot, though, which is + presumed to be a directory (so we do PIVOT_CHECKS right away). */ + + req->pivot->state = PSTATE_404_CHECK; + n = req_copy(req, req->pivot, 1); + replace_slash(n, (u8*)BOGUS_FILE); + + n->user_val = 0; + n->callback = dir_404_callback; + req->pivot->r404_pending++; + + async_request(n); + + par = dir_parent(req->pivot); + if (par) secondary_ext_init(par, req, res, 0); + + /* Header, response belong to pivot - keep. */ + return 1; +} + + +/* STAGE 2: Called on 404 checks, sequentially for each response. First + called once, with user_val = 0, for no extension; when called + multiple times to gather signatures. If not enough or too many + signatures found, the directory is deemed to be fubar. */ + +static u8 dir_404_callback(struct http_request* req, + struct http_response* res) { + + struct http_request* n; + u32 i; + + DEBUG_CALLBACK(req, res); + + if (req->pivot->r404_skip) goto schedule_next; + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during 404 response checks", 0); + goto schedule_next; + } + + /* If the first 404 probe returned something that looks like the + "root" page for the currently tested directory, panic. But don't + do that check on server pivots. */ + + if (!req->user_val && req->pivot->type != PIVOT_SERV && RPRES(req) && + same_page(&res->sig, &RPRES(req)->sig)) { + DEBUG("* First 404 probe identical with parent!\n"); + goto schedule_next; + } else if (!req->user_val) { + DEBUG("* First 404 probe differs from parent (%d)\n", + RPRES(req) ? RPRES(req)->code : 0); + } + + /* Check if this is a new signature. */ + + for (i=0;ipivot->r404_cnt;i++) + if (same_page(&res->sig, &req->pivot->r404[i])) break; + + if (i == req->pivot->r404_cnt) { + struct pivot_desc* par; + + DEBUG("* New signature found (%u).\n", req->pivot->r404_cnt); + + /* Need to add a new one. Make sure we're not over the limit. */ + + if (req->pivot->r404_cnt >= MAX_404) { + + req->pivot->r404_skip = 1; + + problem(PROB_404_FAIL, RPREQ(req), RPRES(req), + (u8*)"too many 404 signatures found", req->pivot, 0); + + goto schedule_next; + + } + + memcpy(&req->pivot->r404[i], &res->sig, sizeof(struct http_sig)); + req->pivot->r404_cnt++; + + /* Is this a new signature not seen on parent? Notify if so, + and check it thoroughly. */ + + par = dir_parent(req->pivot); + + if (par) { + + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) break; + + } + + if (!par || i == par->r404_cnt) { + problem(PROB_NEW_404, req, res, NULL, req->pivot, 1); + RESP_CHECKS(req, res); + } + + } + +schedule_next: + + /* First probe OK? */ + + if (!req->user_val) { + u8* nk; + u32 cur_ext = 0; + + /* First probe should already yield a 404 signature. */ + + if (!req->pivot->r404_cnt) { + DEBUG("* First probe failed to yield a signature.\n"); + goto bad_404; + } + + DEBUG("* First probe yielded a valid signature.\n"); + + /* At this point, we can be reasonably sure the response is + meaningful. */ + + PIVOT_CHECKS(req->pivot->req, req->pivot->res); + check_case(req->pivot); + + /* Aaand schedule all the remaining probes. */ + + while ((nk = wordlist_get_extension(cur_ext++))) { + u8* tmp = ck_alloc(strlen(BOGUS_FILE) + strlen((char*)nk) + 2); + + n = req_copy(RPREQ(req), req->pivot, 1); + + sprintf((char*)tmp, "%s.%s", BOGUS_FILE, nk); + replace_slash(n, tmp); + ck_free(tmp); + n->callback = dir_404_callback; + n->user_val = 1; + + /* r404_pending is at least 1 to begin with, so this is safe + even if async_request() has a synchronous effect. */ + + req->pivot->r404_pending++; + async_request(n); + + } + + /* Also issue 404 probe for "lpt9", as "con", "prn", "nul", "lpt#", + etc, are handled in a really annoying way by IIS. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + replace_slash(n, (u8*)"lpt9"); + n->callback = dir_404_callback; + n->user_val = 1; + req->pivot->r404_pending++; + async_request(n); + + /* ...and for ~user, since this sometimes has a custom response, too. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + replace_slash(n, (u8*)"~" BOGUS_FILE); + n->callback = dir_404_callback; + n->user_val = 1; + req->pivot->r404_pending++; + async_request(n); + + /* Lastly, make sure that directory 404 is on file. */ + + n = req_copy(RPREQ(req), req->pivot, 1); + replace_slash(n, (u8*)BOGUS_FILE); + set_value(PARAM_PATH, 0, (u8*)"", -1, &n->par); + n->callback = dir_404_callback; + n->user_val = 1; + req->pivot->r404_pending++; + async_request(n); + + } + + if (--(req->pivot->r404_pending)) return 0; + + /* If we're here, all probes completed, and we had no major errors. + If no signatures gathered, try to offer useful advice. */ + +bad_404: + + if (!req->pivot->r404_cnt || req->pivot->r404_skip) { + + DEBUG("* 404 detection failed.\n"); + + if (RPRES(req)->code == 404) { + + req->pivot->missing = 1; + + } else if (RPRES(req)->code >= 400) { + + problem(PROB_NO_ACCESS, RPREQ(req), RPRES(req), NULL, req->pivot, 0); + + /* Additional check for 401, 500 codes, as we're not calling + content_checks() otherwise. */ + + if (RPRES(req)->code == 401) + problem(PROB_AUTH_REQ, RPREQ(req), RPRES(req), NULL, req->pivot, 0); + else if (RPRES(req)->code >= 500) + problem(PROB_SERV_ERR, RPREQ(req), RPRES(req), NULL, req->pivot, 0); + + } else { + + if (req->pivot->type != PIVOT_SERV) { + req->pivot->type = PIVOT_PATHINFO; + replace_slash(req->pivot->req, NULL); + } else + problem(PROB_404_FAIL, RPREQ(req), RPRES(req), + (u8*)"no distinctive 404 behavior detected", req->pivot, 0); + } + + req->pivot->r404_cnt = 0; + + /* We can still try parsing the response, if it differs from parent + in any way... */ + + if (!RPAR(req)->res || !same_page(&RPRES(req)->sig, &RPAR(req)->res->sig)) + PIVOT_CHECKS(req->pivot->req, req->pivot->res); + + } else DEBUG("* 404 detection successful.\n"); + + /* Note that per-extension 404 probes coupled with a limit on the number of + 404 signatures largely eliminates the need for BH_COUNT identical probes + to confirm sane behavior here. */ + + /* Regardless of the outcome, let's schedule a final IPS check. Theoretically, + a single request would be fine; but some servers, such as gws, tend + to respond to /?foo very differently than to /. */ + + req->pivot->state = PSTATE_IPS_CHECK; + + n = req_copy(RPREQ(req), req->pivot, 1); + tokenize_path((u8*)IPS_TEST, n, 0); + n->callback = dir_ips_callback; + n->user_val = 0; + async_request(n); + + n = req_copy(RPREQ(req), req->pivot, 1); + tokenize_path((u8*)IPS_SAFE, n, 0); + n->callback = dir_ips_callback; + n->user_val = 1; + async_request(n); + + return 0; + +} + + +/* STAGE 3: Called on IPS check, twice. */ + +static u8 dir_ips_callback(struct http_request* req, + struct http_response* res) { + struct pivot_desc* par; + + DEBUG_CALLBACK(req, res); + + if (req->pivot->i_skip[4]) return 0; + + if (req->user_val == 1 && FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during IPS tests", 0); + req->pivot->i_skip[4] = 1; + goto schedule_next; + } + + req->pivot->misc_req[req->user_val] = req; + req->pivot->misc_res[req->user_val] = res; + if ((++req->pivot->misc_cnt) != 2) return 1; + + par = dir_parent(req->pivot); + + if (!par || !par->uses_ips) { + + if (MRES(0)->state != STATE_OK) + problem(PROB_IPS_FILTER, MREQ(0), MRES(0), + (u8*)"request timed out (could also be a flaky server)", + req->pivot, 0); + else if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) + problem(PROB_IPS_FILTER, MREQ(0), MRES(0), NULL, req->pivot, 0); + + } else { + + if (MRES(0)->state == STATE_OK && same_page(&MRES(0)->sig, &MRES(1)->sig)) + problem(PROB_IPS_FILTER_OFF, MREQ(0), MRES(0), NULL, req->pivot, 0); + + } + +schedule_next: + + destroy_misc_data(req->pivot, req); + + /* Schedule injection attacks. */ + + unlock_children(req->pivot); + + req->pivot->state = PSTATE_CHILD_INJECT; + inject_init(req->pivot); + + return 0; +} + + +/* STAGE 5: Start / update directory brute-force. */ + +static void crawl_dir_dict_init(struct pivot_desc* pv) { + static u8 in_dict_init; + struct http_request* n; + u8 *kw, *ex; + u32 i, c; + + /* Too many requests still pending, or already moved on to + parametric tests? */ + + if (in_dict_init || pv->pending > DICT_BATCH || pv->state != PSTATE_CHILD_DICT) + return; + + if (pv->child_cnt >= max_children) { + crawl_parametric_init(pv); + return; + } + + if (pv->no_fuzz) { + if (pv->no_fuzz == 1) + problem(PROB_LIMITS, pv->req, pv->res, + (u8*)"Recursion limit reached, not fuzzing", pv, 0); + else + problem(PROB_LIMITS, pv->req, pv->res, + (u8*)"Directory out of scope, not fuzzing", pv, 0); + crawl_parametric_init(pv); + return; + } + + DEBUG_HELPER(pv); + +restart_dict: + + kw = (pv->guess ? wordlist_get_guess : wordlist_get_word)(pv->cur_key); + + if (!kw) { + + /* No more keywords. Move to guesswords if not there already, or + advance to parametric tests otherwise. */ + + if (pv->guess) { crawl_parametric_init(pv); return; } + + pv->guess = 1; + pv->cur_key = 0; + goto restart_dict; + + } + + /* Only schedule crawl_prob% dictionary entries. */ + + if (R(100) < crawl_prob) { + + /* Schedule extension-less probe, unless the name is already + on child list. */ + + for (c=0;cchild_cnt;c++) + if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw, + (char*)pv->child[c]->name))) break; + + /* Matching current node? */ + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw, + (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt; + + if (c == pv->child_cnt) { + n = req_copy(pv->req, pv, 1); + replace_slash(n, kw); + n->callback = dir_dict_callback; + pv->pending++; + in_dict_init = 1; + async_request(n); + in_dict_init = 0; + + /* Some web frameworks respond with 404 to /foo, but + something else to /foo/. Let's try to account for these, too, + to the extend possible. */ + + n = req_copy(pv->req, pv, 1); + replace_slash(n, kw); + set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par); + n->callback = dir_dict_callback; + pv->pending++; + in_dict_init = 1; + async_request(n); + in_dict_init = 0; + + } + + /* Schedule probes for all extensions for the current word, + likewise. */ + + i = 0; + + while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) { + + u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2); + + sprintf((char*)tmp, "%s.%s", kw, ex); + + for (c=0;cchild_cnt;c++) + if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->child[c]->name))) break; + + if (pv->fuzz_par != -1 && + !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp, + (char*)pv->req->par.v[pv->fuzz_par]))) c = pv->child_cnt; + + if (c == pv->child_cnt) { + n = req_copy(pv->req, pv, 1); + replace_slash(n, tmp); + n->callback = dir_dict_callback; + pv->pending++; + in_dict_init = 1; + async_request(n); + in_dict_init = 0; + } + + ck_free(tmp); + + i++; + } + + } + + pv->cur_key++; + + /* This scheduled extension_cnt + 1 requests - which, depending on + settings, may be anywhere from 1 to 200 or so. Grab more keywords + until we have a decent number scheduled, to improve parallelism. */ + + if (pv->pending < DICT_BATCH) goto restart_dict; + +} + + +/* CALLBACK FOR STAGE 5: Checks for a hit, schedules some more. */ + +static u8 dir_dict_callback(struct http_request* req, + struct http_response* res) { + u32 i; + u8* lp = NULL; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during path-based dictionary probes", 0); + } else { + + /* Check if 404 */ + + if (!req->pivot->r404_cnt) + DEBUG("Bad pivot with no sigs! Pivot name = '%s'\n", + req->pivot->name); + + if (res->code == 403) + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_PATH && req->par.v[i][0]) + lp = req->par.v[i]; + + for (i=0;ipivot->r404_cnt;i++) + if (same_page(&res->sig, &req->pivot->r404[i])) break; + + /* Special case for secondary extension fuzzing - skip secondary + extensions that seemingly return the same document. */ + + if (req->user_val && same_page(&res->sig, &req->same_sig)) + i = ~req->pivot->r404_cnt; + + /* Do not add 403 responses to .ht* requests - workaround for + Apache filtering to keep reports clean. */ + + if (lp && !strncmp((char*)lp,".ht",3)) + i = ~req->pivot->r404_cnt; + + /* If not 404, do response, and does not look like + parent's original file signature, add pivot. */ + + if (i == req->pivot->r404_cnt) maybe_add_pivot(req, res, 0); + + } + + /* Try replenishing the queue. */ + + if (!req->user_val) { + req->pivot->pending--; + crawl_dir_dict_init(req->pivot); + } + + return 0; + +} + + +/* + + ************************ + * PIVOT_UNKNOWN CHECKS * + ************************ + + Callbacks used on resources of unknown type. Proceed to parametric checks + if something goes wrong, or file / dir checks if detection successful. + + */ + +/* STAGE 1: callback on the original request. */ + +u8 fetch_unknown_callback(struct http_request* req, struct http_response* res) { + u32 i = 0 /* bad gcc */; + struct pivot_desc* par; + struct http_request* n; + + RPRES(req) = res; + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during initial resource fetch", 1); + return 1; + } + + /* Matches parent's 404? */ + + par = dir_parent(req->pivot); + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) break; + + /* Again, 404 is the least we could do. */ + + if ((!par && res->code == 404) || (par && i != par->r404_cnt)) { + + req->pivot->missing = 1; + unlock_children(req->pivot); + crawl_parametric_init(req->pivot); + return 1; + + } + + /* If the response looks like parent's original unknown_callback() + response, assume file. This is a workaround for some really + quirky architectures. */ + + if (par && res->pay_len && res->code == 200 && + same_page(&par->unk_sig, &res->sig)) { + + req->pivot->type = PIVOT_FILE; + return fetch_file_callback(req, res); + + } + + /* Schedule a request to settle the type of this pivot point. */ + + n = req_copy(req, req->pivot, 1); + set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par); + n->callback = unknown_check_callback; + async_request(n); + + /* This is the initial callback, keep the response. */ + + return 1; + +} + + +/* CALLBACK FOR STAGE 1: Tries to figure out if this is a directory. */ + +static u8 unknown_check_callback(struct http_request* req, + struct http_response* res) { + u8 keep = 0; + + DEBUG_CALLBACK(req, res); + + if (FETCH_FAIL(res)) { + handle_error(req, res, (u8*)"during node type checks", 0); + goto schedule_next; + } + + /* If pivot == res, we are probably dealing with PATH_INFO-style + plot device, which is best approached as a directory anyway + (worst-case scenario, dir handlers will dismiss it as + misbehaving and demote it to PIVOT_PATHINFO after some extra + checks). + + If pivot != res, and res is not a 404 response, assume dir; + and if it is 404, assume file. + + We also have a special case if the original request returned a + non-empty 2xx response, but the new one returned 4xx or 5xx - this is + likely a file, too. */ + + if (same_page(&RPRES(req)->sig, &res->sig)) goto assume_dir; else { + u32 i = 0; + struct pivot_desc* par = dir_parent(req->pivot); + + if (par) + for (i=0;ir404_cnt;i++) + if (same_page(&res->sig, &par->r404[i])) break; + + if ((!par && res->code == 404) || (par && i != par->r404_cnt) || + (RPRES(req)->code < 300 && res->code >= 400 && RPRES(req)->pay_len)) { + + req->pivot->type = PIVOT_FILE; + + } else { + +assume_dir: + + req->pivot->type = PIVOT_DIR; + + /* Replace original request, response with new data. */ + + destroy_request(RPREQ(req)); + + if (RPRES(req)) { + memcpy(&req->pivot->unk_sig, &RPRES(req)->sig, sizeof(struct http_sig)); + destroy_response(RPRES(req)); + } + + RPREQ(req) = req; + RPRES(req) = res; + + keep = 1; + + } + + } + +schedule_next: + + /* Well, we need to do something. */ + + if (req->pivot->type == PIVOT_DIR) + fetch_dir_callback(RPREQ(req), RPRES(req)); + else fetch_file_callback(RPREQ(req), RPRES(req)); + + return keep; +} + diff --git a/crawler.h b/crawler.h new file mode 100644 index 0000000..8f7e77a --- /dev/null +++ b/crawler.h @@ -0,0 +1,96 @@ +/* + skipfish - crawler state machine + -------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_CRAWLER_H + +#include "types.h" +#include "http_client.h" +#include "database.h" + +extern u32 crawl_prob; /* Crawl probability (1-100%) */ +extern u8 no_parse, /* Disable HTML link detection */ + warn_mixed, /* Warn on mixed content? */ + no_fuzz_ext, /* Don't fuzz ext in dirs? */ + log_ext_urls; /* Log external URLs? */ + +/* Provisional debugging callback. */ + +u8 show_response(struct http_request* req, struct http_response* res); + +/* Asynchronous request callback for the initial PSTATE_FETCH request of + PIVOT_UNKNOWN resources. */ + +u8 fetch_unknown_callback(struct http_request* req, struct http_response* res); + +/* Asynchronous request callback for the initial PSTATE_FETCH request of + PIVOT_FILE resources. */ + +u8 fetch_file_callback(struct http_request* req, struct http_response* res); + +/* Asynchronous request callback for the initial PSTATE_FETCH request of + PIVOT_DIR resources. */ + +u8 fetch_dir_callback(struct http_request* req, struct http_response* res); + +/* Initializes the crawl of try_list items for a pivot point (if any still + not crawled). */ + +void crawl_par_trylist_init(struct pivot_desc* pv); + +/* Adds new name=value to form hints list. */ + +void add_form_hint(u8* name, u8* value); + +/* Macros to access various useful pivot points: */ + +#define MREQ(_x) (req->pivot->misc_req[_x]) +#define MRES(_x) (req->pivot->misc_res[_x]) +#define RPAR(_req) ((_req)->pivot->parent) +#define RPREQ(_req) ((_req)->pivot->req) +#define RPRES(_req) ((_req)->pivot->res) + +/* Debugging instrumentation for callbacks and callback helpers: */ + +#ifdef LOG_STDERR + +#define DEBUG_CALLBACK(_req, _res) do { \ + u8* _url = serialize_path(_req, 1, 1); \ + DEBUG("* %s: URL %s (%u, len %u)\n", __FUNCTION__, _url, (_res) ? \ + (_res)->code : 0, (_res) ? (_res)->pay_len : 0); \ + ck_free(_url); \ + } while (0) + +#define DEBUG_HELPER(_pv) do { \ + u8* _url = serialize_path((_pv)->req, 1, 1); \ + DEBUG("* %s: URL %s (%u, len %u)\n", __FUNCTION__, _url, (_pv)->res ? \ + (_pv)->res->code : 0, (_pv)->res ? (_pv)->res->pay_len : 0); \ + ck_free(_url); \ + } while (0) + +#else + +#define DEBUG_CALLBACK(_req, _res) +#define DEBUG_HELPER(_pv) + +#endif /* ^LOG_STDERR */ + +#endif /* !_HAVE_CRAWLER_H */ diff --git a/database.c b/database.c new file mode 100644 index 0000000..94b6d5d --- /dev/null +++ b/database.c @@ -0,0 +1,1356 @@ +/* + skipfish - database & crawl management + -------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#define _VIA_DATABASE_C + +#include +#include +#include +#include + +#include "debug.h" +#include "config.h" +#include "types.h" +#include "http_client.h" +#include "database.h" +#include "crawler.h" +#include "string-inl.h" + +struct pivot_desc root_pivot; + +u8 **deny_urls, /* List of banned URL substrings */ + **deny_strings, /* List of banned page substrings */ + **allow_urls, /* List of required URL substrings */ + **allow_domains, /* List of allowed vhosts */ + **trust_domains; /* List of trusted vhosts */ + +u32 num_deny_urls, + num_deny_strings, + num_allow_urls, + num_allow_domains, + num_trust_domains; + +u32 max_depth = MAX_DEPTH, + max_children = MAX_CHILDREN, + max_guesses = MAX_GUESSES; + +u8 dont_add_words; /* No auto dictionary building */ + +struct kw_entry { + u8* word; /* Keyword itself */ + u32 hit_cnt; /* Number of confirmed sightings */ + u8 is_ext; /* Is an extension? */ + u8 hit_already; /* Had its hit count bumped up? */ + u32 total_age; /* Total age (in scan cycles) */ + u32 last_age; /* Age since last hit */ +}; + +static struct kw_entry* + keyword[WORD_HASH]; /* Keyword collection (bucketed) */ + +static u32 keyword_cnt[WORD_HASH]; /* Per-bucket keyword counts */ + +static u8 **extension, /* Extension list */ + **guess; /* Keyword candidate list */ + +static u32 guess_cnt, /* Number of keyword candidates */ + extension_cnt, /* Number of extensions */ + keyword_total_cnt, /* Current keyword count */ + keyword_orig_cnt; /* At-boot keyword count */ + +static u32 cur_xss_id, scan_id; /* Stored XSS manager IDs */ +static struct http_request** xss_req; /* Stored XSS manager req cache */ + + + + +/* Maps a parsed URL (in req) to the pivot tree, creating or modifying nodes + as necessary, and scheduling them for crawl. This should be called only + on requests that were *not* yet retrieved. */ + +void maybe_add_pivot(struct http_request* req, struct http_response* res, + u8 via_link) { + + struct pivot_desc *cur = NULL; + + u32 i, par_cnt = 0, path_cnt = 0, pno; + u8 ends_with_slash = 0; + +#ifdef LOG_STDERR + + u8* url = serialize_path(req, 1, 1); + DEBUG("--- New pivot requested: %s (%d) --\n", url, via_link); + ck_free(url); + +#endif /* LOG_STDERR */ + + if (!req) FATAL("Invalid request data."); + + /* Initialize root pivot if not done already. */ + + if (!root_pivot.type) { + root_pivot.type = PIVOT_ROOT; + root_pivot.state = PSTATE_DONE; + root_pivot.linked = 2; + root_pivot.fuzz_par = -1; + root_pivot.name = ck_strdup((u8*)"[root]"); + } + + if (!url_allowed(req)) { url_scope++; return; } + + /* Count the number of path and query parameters in the request. */ + + for (i=0;ipar.c;i++) { + + if (QUERY_SUBTYPE(req->par.t[i]) || POST_SUBTYPE(req->par.t[i])) par_cnt++; + + if (PATH_SUBTYPE(req->par.t[i])) { + + if (req->par.t[i] == PARAM_PATH && !req->par.n[i] && !req->par.v[i][0]) + ends_with_slash = 0; else ends_with_slash = 1; + + path_cnt++; + + } + + /* While we're at it, try to learn new keywords. */ + + if (PATH_SUBTYPE(req->par.t[i]) || QUERY_SUBTYPE(req->par.t[i])) { + if (req->par.n[i]) wordlist_confirm_word(req->par.n[i]); + wordlist_confirm_word(req->par.v[i]); + } + + } + + /* Try to find pivot point for the host. */ + + for (i=0;ireq->host, (char*)req->host) && + cur->req->port == req->port && + cur->req->proto == req->proto) break; + } + + if (i == root_pivot.child_cnt) { + + /* No server pivot found, we need to create one. */ + + cur = ck_alloc(sizeof(struct pivot_desc)); + + root_pivot.child = ck_realloc(root_pivot.child, + (root_pivot.child_cnt + 1) * sizeof(struct pivot_desc*)); + + root_pivot.child[root_pivot.child_cnt++] = cur; + + cur->type = PIVOT_SERV; + cur->state = PSTATE_FETCH; + cur->linked = 2; + cur->fuzz_par = -1; + cur->parent = &root_pivot; + + /* Copy the original request, sans path. Create a dummy + root dir entry instead. Derive pivot name by serializing + the URL of the associated stub request. */ + + cur->req = req_copy(req, cur, 0); + set_value(PARAM_PATH, NULL, (u8*)"", -1, &cur->req->par); + cur->name = serialize_path(cur->req, 1, 0); + cur->req->callback = fetch_dir_callback; + + /* If matching response not provided, schedule request. */ + + if (res && !par_cnt && path_cnt == 1) { + cur->res = res_copy(res); + fetch_dir_callback(req, cur->res); + } else async_request(cur->req); + + wordlist_confirm_word(req->host); + + } + + /* One way or the other, 'cur' now points to server pivot. Let's + walk through all path elements, and follow or create sub-pivots + for them. */ + + pno = 0; + + for (i=0;ipar.t[pno])) pno++; + + /* Bail out on the trailing NULL-'' indicator, if present. It is + used to denote a directory, and will always be the last path + element. */ + + if (i == path_cnt - 1 && req->par.t[pno] == PARAM_PATH && + !req->par.n[pno] && !req->par.v[pno][0]) break; + + pname = req->par.n[pno] ? req->par.n[pno] : req->par.v[pno]; + + ccnt = cur->child_cnt; + + /* Try to find a matching node. */ + + for (c=0;cchild[c]->name)) { + cur = cur->child[c]; + if (cur->linked < via_link) cur->linked = via_link; + break; + } + + if (c == ccnt) { + + /* Node not found. We need to create one. */ + + struct pivot_desc* n; + + /* Enforce user limits. */ + + if ((i + 1) >= max_depth || cur->child_cnt > max_children) + return; + + /* Create and link back to parent. */ + + n = ck_alloc(sizeof(struct pivot_desc)); + + cur->child = ck_realloc(cur->child, (cur->child_cnt + 1) * + sizeof(struct pivot_desc*)); + + cur->child[cur->child_cnt++] = n; + + n->parent = cur; + n->linked = via_link; + n->name = ck_strdup(pname); + + /* Copy the original request, then copy over path up to the + current point. */ + + n->req = req_copy(req, n, 0); + + for (c=0;c<=pno;c++) + if (PATH_SUBTYPE(req->par.t[c])) + set_value(req->par.t[c], req->par.n[c], req->par.v[c], -1, + &n->req->par); + + /* If name is parametric, indicate which parameter to fuzz. */ + + if (req->par.n[pno]) n->fuzz_par = n->req->par.c - 1; + else n->fuzz_par = -1; + + /* Do not fuzz out-of-scope or limit exceeded dirs... */ + + if ((i + 1) == max_depth - 1) n->no_fuzz = 1; + + if (i != path_cnt - 1) { + + /* This is not the last path segment, so let's assume a "directory" + (hierarchy node, to be more accurate), and schedule directory + tests. */ + + set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->req->par); + n->type = PIVOT_DIR; + n->req->callback = fetch_dir_callback; + + if (!url_allowed(n->req)) n->no_fuzz = 2; + + /* Subdirectory tests require parent directory 404 testing to complete + first. If these are still pending, wait a bit. */ + + if (cur->state > PSTATE_IPS_CHECK) { + + n->state = PSTATE_FETCH; + + /* If this actually *is* the last parameter, taking into account the + early-out hack mentioned above, and we were offered a response - + make use of it and don't schedule a new request. */ + + if (i == path_cnt - 2 && ends_with_slash && res) { + + n->res = res_copy(res); + fetch_dir_callback(n->req, n->res); + + } else async_request(n->req); + + } else n->state = PSTATE_PENDING; + + } else { + + /* Last segment. If no parameters, copy response body, mark type as + "unknown", schedule extra checks. */ + + if (!url_allowed(n->req)) n->no_fuzz = 2; + + if (!par_cnt) { + + n->type = PIVOT_UNKNOWN; + n->res = res_copy(res); + n->req->callback = fetch_unknown_callback; + + if (cur->state > PSTATE_IPS_CHECK) { + + n->state = PSTATE_FETCH; + + /* If we already have a response, call the callback directly + (it will schedule further requests on its own). */ + + if (!res) { + n->state = PSTATE_FETCH; + async_request(n->req); + } else fetch_unknown_callback(n->req, n->res); + + } else n->state = PSTATE_PENDING; + + } else { + + /* Parameters found. Assume file, schedule a fetch. */ + + n->type = PIVOT_FILE; + n->req->callback = fetch_file_callback; + + if (cur->state > PSTATE_IPS_CHECK) { + n->state = PSTATE_FETCH; + async_request(n->req); + } else n->state = PSTATE_PENDING; + + } + + } + + cur = n; + + } + + /* At this point, 'cur' points to a newly created or existing node + for the path element. If this element is parametric, make sure + that its value is on the 'try' list. */ + + if (req->par.n[pno]) { + + for (c=0;ctry_cnt;c++) + if (cur->try_list[c] && !(is_c_sens(cur) ? strcmp : strcasecmp) + ((char*)req->par.v[pno], (char*)cur->try_list[c])) break; + + /* Not found on the list - try adding. */ + + if (c == cur->try_cnt) { + + cur->try_list = ck_realloc(cur->try_list, (cur->try_cnt + 1) * + sizeof(u8*)); + cur->try_list[cur->try_cnt++] = ck_strdup(req->par.v[pno]); + + if (cur->state == PSTATE_DONE) + crawl_par_trylist_init(cur); + + } + + } + + pno++; + + } + + /* Phew! At this point, 'cur' points to the final path element, and now, + we just need to take care of parameters. Each parameter has its own + pivot point, and a full copy of the request. */ + + pno = 0; + + for (i=0;ipar.t[pno]) && !POST_SUBTYPE(req->par.t[pno])) + pno++; + + pname = req->par.n[pno] ? req->par.n[pno] : (u8*)"[blank]"; + ccnt = cur->child_cnt; + + /* Try to find a matching node. */ + + for (c=0;cchild[c]->name)) { + cur = cur->child[c]; + if (cur->linked < via_link) cur->linked = via_link; + break; + } + + if (c == ccnt) { + + /* Node not found. We need to create one. */ + + struct pivot_desc* n; + + /* Enforce user limits. */ + + if (cur->child_cnt > max_children) { + problem(PROB_LIMITS, req, res, (u8*)"Child node limit exceeded", cur, 0); + return; + } + + /* Create and link back to parent. */ + + n = ck_alloc(sizeof(struct pivot_desc)); + + cur->child = ck_realloc(cur->child, (cur->child_cnt + 1) * + sizeof(struct pivot_desc*)); + + cur->child[cur->child_cnt++] = n; + + n->parent = cur; + n->type = PIVOT_PARAM; + n->linked = via_link; + n->name = ck_strdup(pname); + + /* Copy the original request, in full. Remember not to fuzz + file inputs. */ + + n->req = req_copy(req, n, 1); + n->fuzz_par = req->par.t[pno] == PARAM_POST_F ? -1 : pno; + n->res = res_copy(res); + + /* File fetcher does everything we need. */ + + n->req->callback = fetch_file_callback; + + if (cur->state > PSTATE_IPS_CHECK) { + n->state = PSTATE_FETCH; + if (res) fetch_file_callback(n->req, n->res); + else async_request(n->req); + } else n->state = PSTATE_PENDING; + + cur = n; + + } + + /* Ok, again, 'cur' is at the appropriate node. Make sure the + current value is on the 'try' list. */ + + for (c=0;ctry_cnt;c++) + if (cur->try_list[c] && !(is_c_sens(cur) ? strcmp : strcasecmp) + ((char*)req->par.v[pno], (char*)cur->try_list[c])) break; + + /* Not found on the list - try adding. */ + + if (c == cur->try_cnt) { + + cur->try_list = ck_realloc(cur->try_list, (cur->try_cnt + 1) * + sizeof(u8*)); + cur->try_list[cur->try_cnt++] = ck_strdup(req->par.v[pno]); + + if (cur->state == PSTATE_DONE) + crawl_par_trylist_init(cur); + + } + + /* Parameters are not hierarchical, so go back to the parent node. */ + + cur = cur->parent; + pno++; + + } + + /* Done, at last! */ + +} + + +/* Finds the host-level pivot point for global issues. */ + +struct pivot_desc* host_pivot(struct pivot_desc* pv) { + while (pv->parent && pv->parent->parent) pv = pv->parent; + return pv; +} + + +/* Gets case sensitivity info from the nearest DIR / SERV node. */ + +u8 is_c_sens(struct pivot_desc* pv) { + while (pv->parent && (pv->type != PIVOT_DIR || pv->type != PIVOT_SERV)) + pv = pv->parent; + return pv->csens; +} + + +/* Registers a problem, if not duplicate (res, extra may be NULL): */ + +void problem(u32 type, struct http_request* req, struct http_response* res, + u8* extra, struct pivot_desc* pv, u8 allow_dup) { + + u32 i; + + if (pv->type == PIVOT_NONE) FATAL("Uninitialized pivot point"); + if (type == PROB_NONE || !req) FATAL("Invalid issue data"); + + DEBUG("--- NEW PROBLEM - type: %u, extra: '%s' ---\n", type, extra); + + /* Check for duplicates */ + + if (!allow_dup) + for (i=0;iissue_cnt;i++) + if (type == pv->issue[i].type && !strcmp(extra ? (char*)extra : "", + pv->issue[i].extra ? (char*)pv->issue[i].extra : "")) return; + + pv->issue = ck_realloc(pv->issue, (pv->issue_cnt + 1) * + sizeof(struct issue_desc)); + + pv->issue[pv->issue_cnt].type = type; + pv->issue[pv->issue_cnt].extra = extra ? ck_strdup(extra) : NULL; + pv->issue[pv->issue_cnt].req = req_copy(req, pv, 1); + pv->issue[pv->issue_cnt].res = res_copy(res); + + /* Mark copies of half-baked requests as done. */ + + if (res && res->state < STATE_OK) { + pv->issue[pv->issue_cnt].res->state = STATE_OK; + ck_free(pv->issue[pv->issue_cnt].res->payload); + pv->issue[pv->issue_cnt].res->payload = + ck_strdup((u8*)"[...truncated...]\n"); + pv->issue[pv->issue_cnt].res->pay_len = 18; + } + + pv->issue_cnt++; + +} + + + +/* Three functions to check if the URL is permitted under current rules + (0 = no, 1 = yes): */ + +u8 url_allowed_host(struct http_request* req) { + u32 i; + + for (i=0;ihost, allow_domains[i]); + + if (pos && strlen((char*)req->host) == + strlen((char*)allow_domains[i]) + (pos - req->host)) + return 1; + + } else + if (!strcasecmp((char*)req->host, (char*)allow_domains[i])) + return 1; + + } + + return 0; +} + + +u8 url_trusted_host(struct http_request* req) { + u32 i; + + i = 0; + + while (always_trust_domains[i]) { + + if (always_trust_domains[i][0] == '.') { + + u8* pos = inl_strcasestr(req->host, (u8*)always_trust_domains[i]); + + if (pos && strlen((char*)req->host) == + strlen(always_trust_domains[i]) + (pos - req->host)) + return 1; + } else + if (!strcasecmp((char*)req->host, (char*)always_trust_domains[i])) + return 1; + + i++; + + } + + for (i=0;ihost, trust_domains[i]); + + if (pos && strlen((char*)req->host) == + strlen((char*)trust_domains[i]) + (pos - req->host)) + return 1; + + } + + return 0; +} + +u8 url_allowed(struct http_request* req) { + u8* url = serialize_path(req, 1, 0); + u32 i; + + /* Check blacklist first */ + + for (i=0;icode != sig2->code) return 0; + + for (i=0;idata[i] - sig2->data[i]; + u32 scale = sig1->data[i] + sig2->data[i]; + + if (abs(diff) > 1 + (scale * FP_T_REL / 100) || + abs(diff) > FP_T_ABS) + if (++bucket_fail > FP_B_FAIL) return 0; + + total_diff += diff; + total_scale += scale; + + } + + if (abs(total_diff) > 1 + (total_scale * FP_T_REL / 100)) + return 0; + + return 1; + +} + + +/* Dumps signature data: */ + +void dump_signature(struct http_sig* sig) { + u32 i; + + DEBUG("SIG %03d: ", sig->code); + for (i=0;idata[i]); + DEBUG("\n"); + +} + + +/* Debugs signature comparison: */ + +void debug_same_page(struct http_sig* sig1, struct http_sig* sig2) { + +#ifdef LOG_STDERR + + u32 i; + s32 total_diff = 0; + u32 total_scale = 0; + + dump_signature(sig1); + dump_signature(sig2); + + DEBUG(" "); + + for (i=0;idata[i] - sig2->data[i]; + DEBUG("[%04d] ", diff); + } + + DEBUG("(diff)\n "); + + for (i=0;idata[i] - sig2->data[i]; + u32 scale = sig1->data[i] + sig2->data[i]; + + if (abs(diff) > 1 + (scale * FP_T_REL / 100) || + abs(diff) > FP_T_ABS) + DEBUG("[FAIL] "); else DEBUG("[pass] "); + + total_diff += diff; + total_scale += scale; + } + + DEBUG("\n "); + + for (i=0;idata[i] + sig2->data[i]; + + DEBUG("[%04d] ", (u32)( 1 + (scale * FP_T_REL / 100))); + } + + DEBUG("(allow)\n"); + + DEBUG("Total diff: %d, scale %d, allow %d\n", + total_diff, total_scale, 1 + (u32)(total_scale * FP_T_REL / 100)); + +#endif /* LOG_STDERR */ + +} + + + +/* Keyword management: */ + + +/* Word hashing helper. */ + +static inline u32 hash_word(u8* str) { + register u32 ret = 0; + register u8 cur; + + if (str) + while ((cur=*str)) { + ret = ~ret ^ (cur) ^ + (cur << 5) ^ (~cur >> 5) ^ + (cur << 10) ^ (~cur << 15) ^ + (cur << 20) ^ (~cur << 25) ^ + (cur << 30); + str++; + } + + return ret % WORD_HASH; +} + + +/* Adds a new keyword candidate to the global "guess" list. This + list is always case-insensitive. */ + +void wordlist_add_guess(u8* text) { + u32 target, i, kh; + + if (dont_add_words) return; + + /* Check if this is a bad or known guess or keyword. */ + + if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return; + + for (i=0;i= max_guesses) target = R(max_guesses); + else target = guess_cnt++; + + ck_free(guess[target]); + guess[target] = ck_strdup(text); + +} + + +/* Adds a single, sanitized keyword to the list, or increases its hit count. + Keyword list is case-insensitive - first capitalization wins. */ + +static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits, + u32 total_age, u32 last_age) { + u32 kh, i; + + if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return; + + /* Check if this is a known keyword. */ + + kh = hash_word(text); + + for (i=0;i 4) return; + + if (ppos != -1) { + + /* Period only? Too long? */ + if (tlen == 1 || tlen - ppos > 12) return; + + if (ppos && ppos != tlen - 1 && !isdigit(text[ppos] + 1)) { + wordlist_confirm_single(text + ppos + 1, 1, 1, 0, 0); + text[ppos] = 0; + wordlist_confirm_single(text, 0, 1, 0, 0); + text[ppos] = '.'; + return; + } + + } + + wordlist_confirm_single(text, 0, 1, 0, 0); +} + + +/* Returns wordlist item at a specified offset (NULL if no more available). */ + +u8* wordlist_get_word(u32 offset) { + u32 cur_off = 0, kh; + + for (kh=0;kh offset) break; + cur_off += keyword_cnt[kh]; + } + + if (kh == WORD_HASH) return NULL; + + return keyword[kh][offset - cur_off].word; +} + + +/* Returns keyword candidate at a specified offset (or NULL). */ + +u8* wordlist_get_guess(u32 offset) { + if (offset >= guess_cnt) return NULL; + return guess[offset]; +} + + +/* Returns extension at a specified offset (or NULL). */ + +u8* wordlist_get_extension(u32 offset) { + if (offset >= extension_cnt) return NULL; + return extension[offset]; +} + + +/* Loads keywords from file. */ + +void load_keywords(u8* fname, u32 purge_age) { + FILE* in; + u32 hits, total_age, last_age, lines = 0; + u8 type; + s32 fields; + u8 kword[MAX_WORD + 1]; + char fmt[32]; + + kword[MAX_WORD] = 0; + + in = fopen((char*)fname, "r"); + + if (!in) { + + PFATAL("Unable to open wordlist '%s'", fname); + + WARN("Wordlist '%s' not found, not loaded.", fname); + return; + + } + + sprintf(fmt, "%%c %%u %%u %%u %%%u[^\x01-\x1f]", MAX_WORD); + + while ((fields = fscanf(in, fmt, &type, &hits, &total_age, &last_age, kword)) + == 5) { + if (!purge_age || last_age < purge_age) + wordlist_confirm_single(kword, (type == 'e'), hits, + total_age + 1, last_age + 1); + lines++; + fgetc(in); /* sink \n */ + } + + if (fields != -1 && fields != 5) + FATAL("Wordlist '%s': syntax error in line %u.\n", fname, lines + 1); + + if (!lines) + WARN("Wordlist '%s' contained no valid entries.", fname); + + keyword_orig_cnt = keyword_total_cnt; + + fclose(in); + +} + + +/* qsort() callback for sorting keywords in save_keywords(). */ + +static int keyword_sorter(const void* word1, const void* word2) { + if (((struct kw_entry*)word1)->hit_cnt < ((struct kw_entry*)word2)->hit_cnt) + return 1; + else if (((struct kw_entry*)word1)->hit_cnt == + ((struct kw_entry*)word2)->hit_cnt) + return 0; + else return -1; +} + + +/* Saves all keywords to a file. */ + +void save_keywords(u8* fname) { + struct stat st; + FILE* out; + s32 fd; + u32 i, kh; + u8* old; + +#ifndef O_NOFOLLOW +#define O_NOFOLLOW 0 +#endif /* !O_NOFOLLOW */ + + if (stat((char*)fname, &st) || !S_ISREG(st.st_mode)) return; + + /* First, sort the list. */ + + for (kh=0;khtype) { + case PIVOT_SERV: pivot_serv++; /* Fall through */ + case PIVOT_DIR: pivot_dir++; break; + case PIVOT_FILE: pivot_file++; break; + case PIVOT_PATHINFO: pivot_pinfo++; break; + case PIVOT_UNKNOWN: pivot_unknown++; break; + case PIVOT_PARAM: pivot_param++; break; + case PIVOT_VALUE: pivot_value++; break; + } + + if (pv->missing) pivot_missing++; + + switch (pv->state) { + case PSTATE_PENDING: pivot_pending++; break; + case PSTATE_FETCH ... PSTATE_IPS_CHECK: pivot_init++; break; + case PSTATE_CHILD_INJECT: + case PSTATE_PAR_INJECT: pivot_attack++; break; + case PSTATE_DONE: pivot_done++; break; + default: pivot_bf++; + } + + for (i=0;iissue_cnt;i++) + issue_cnt[PSEV(pv->issue[i].type)]++; + + for (i=0;ichild_cnt;i++) + pv_stat_crawl(pv->child[i]); + +} + + +void database_stats() { + + pivot_pending = pivot_init = pivot_attack = pivot_bf = pivot_pinfo = + pivot_done = pivot_serv = pivot_dir = pivot_file = pivot_param = + pivot_value = pivot_missing = pivot_unknown = pivot_cnt = 0; + + memset(issue_cnt, 0, sizeof(issue_cnt)); + + pv_stat_crawl(&root_pivot); + + SAY("Database statistics\n" + "-------------------\n\n" + cGRA " Pivots : " cNOR "%u total, %u done (%.02f%%) \n" + cGRA " In progress : " cNOR "%u pending, %u init, %u attacks, " + "%u dict \n" + cGRA " Missing nodes : " cNOR "%u spotted\n" + cGRA " Node types : " cNOR "%u serv, %u dir, %u file, %u pinfo, " + "%u unkn, %u par, %u val\n" + cGRA " Issues found : " cNOR "%u info, %u warn, %u low, %u medium, " + "%u high impact\n" + cGRA " Dict size : " cNOR "%u words (%u new), %u extensions, " + "%u candidates\n", + pivot_cnt, pivot_done, pivot_cnt ? ((100.0 * pivot_done) / (pivot_cnt)) + : 0, pivot_pending, pivot_init, pivot_attack, pivot_bf, pivot_missing, + pivot_serv, pivot_dir, pivot_file, pivot_pinfo, pivot_unknown, + pivot_param, pivot_value, issue_cnt[1], issue_cnt[2], issue_cnt[3], + issue_cnt[4], issue_cnt[5], keyword_total_cnt, keyword_total_cnt - + keyword_orig_cnt, extension_cnt, guess_cnt); + +} + + +/* Dumps pivot database, for debugging purposes. */ + +void dump_pivots(struct pivot_desc* cur, u8 nest) { + + u8* indent = ck_alloc(nest + 1); + u8* url; + u32 i; + + if (!cur) cur = &root_pivot; + + memset(indent, ' ', nest); + + SAY(cBRI "\n%s== Pivot " cLGN "%s" cBRI " [%d] ==\n", + indent, cur->name, cur->dupe); + SAY(cGRA "%sType : " cNOR, indent); + + switch (cur->type) { + case PIVOT_NONE: SAY(cLRD "PIVOT_NONE (bad!)\n" cNOR); break; + case PIVOT_ROOT: SAY("PIVOT_ROOT\n"); break; + case PIVOT_SERV: SAY("PIVOT_SERV\n"); break; + case PIVOT_DIR: SAY("PIVOT_DIR\n"); break; + case PIVOT_FILE: SAY("PIVOT_FILE\n"); break; + case PIVOT_PATHINFO: SAY("PIVOT_PATHINFO\n"); break; + case PIVOT_VALUE: SAY("PIVOT_VALUE\n"); break; + case PIVOT_UNKNOWN: SAY("PIVOT_UNKNOWN\n"); break; + case PIVOT_PARAM: SAY("PIVOT_PARAM\n"); break; + default: SAY(cLRD " (bad!)\n" cNOR, cur->type); + } + + SAY(cGRA "%sState : " cNOR, indent); + + switch (cur->state) { + case PSTATE_NONE: SAY(cLRD "PSTATE_NONE (bad!)\n" cNOR); break; + case PSTATE_PENDING: SAY("PSTATE_PENDING\n"); break; + case PSTATE_FETCH: SAY("PSTATE_FETCH\n"); break; + case PSTATE_TYPE_CHECK: SAY("PSTATE_TYPE_CHECK\n"); break; + case PSTATE_404_CHECK: SAY("PSTATE_404_CHECK\n"); break; + case PSTATE_IPS_CHECK: SAY("PSTATE_IPS_CHECK\n"); break; + case PSTATE_CHILD_INJECT: SAY("PSTATE_CHILD_INJECT\n"); break; + case PSTATE_CHILD_DICT: SAY("PSTATE_CHILD_DICT\n"); break; + case PSTATE_PAR_CHECK: SAY("PSTATE_PAR_CHECK\n"); break; + case PSTATE_PAR_INJECT: SAY("PSTATE_PAR_INJECT\n"); break; + case PSTATE_PAR_NUMBER: SAY("PSTATE_PAR_NUMBER\n"); break; + case PSTATE_PAR_DICT: SAY("PSTATE_PAR_DICT\n"); break; + case PSTATE_PAR_TRYLIST: SAY("PSTATE_PAR_TRYLIST\n"); break; + case PSTATE_DONE: SAY("PSTATE_DONE\n"); break; + default: SAY(cLRD " (bad!)\n" cNOR, + cur->state); + } + + if (cur->missing) { + if (cur->linked == 2) + SAY(cGRA "%sMissing : " cMGN "YES\n" cNOR, indent); + else + SAY(cGRA "%sMissing : " cLBL "YES (followed a dodgy link)\n" cNOR, + indent); + } + + SAY(cGRA "%sFlags : " cNOR "linked %u, case %u/%u, fuzz_par %d, ips %u, " + "sigs %u, reqs %u\n", indent, cur->linked, cur->csens, cur->c_checked, + cur->fuzz_par, cur->uses_ips, cur->r404_cnt, cur->pending); + + if (cur->req) { + url = serialize_path(cur->req, 1, 0); + SAY(cGRA "%sTarget : " cNOR "%s (" cYEL "%d" cNOR ")\n", indent, url, + cur->res ? cur->res->code : 0); + ck_free(url); + + if (cur->res) + SAY(cGRA "%sMIME : " cNOR "%s -> %s [" + "%s:%s]\n", indent, cur->res->header_mime ? cur->res->header_mime : + (u8*)"-", cur->res->sniffed_mime ? cur->res->sniffed_mime : (u8*)"-", + cur->res->header_charset ? cur->res->header_charset : (u8*)"-", + cur->res->meta_charset ? cur->res->meta_charset : (u8*)"-"); + } + + if (cur->try_cnt) { + SAY(cGRA "%sTry : " cNOR, indent); + for (i=0;itry_cnt;i++) + SAY("%s%s", cur->try_list[i], (i == cur->try_cnt - 1) ? "" : ", "); + SAY("\n"); + } + + /* Dump issues. */ + + for (i=0;iissue_cnt;i++) { + if (cur->issue[i].req) url = serialize_path(cur->issue[i].req, 0, 0); + else url = ck_strdup((u8*)"[none]"); + SAY(cGRA "%s-> Issue : " cNOR "type %d, extra '%s', URL: " cLGN "%s" + cNOR " (" cYEL "%u" cNOR ")\n", indent, cur->issue[i].type, + cur->issue[i].extra, url, cur->issue[i].res ? cur->issue[i].res->code + : 0); + ck_free(url); + } + + ck_free(indent); + + for (i=0;ichild_cnt;i++) + dump_pivots(cur->child[i], nest + 1); + +} + + +/* Cleans up pivot structure for memory debugging. */ + +static void dealloc_pivots(struct pivot_desc* cur) { + u32 i; + + if (!cur) cur = &root_pivot; + + if (cur->req) destroy_request(cur->req); + if (cur->res) destroy_response(cur->res); + + ck_free(cur->name); + + if (cur->try_cnt) { + for (i=0;itry_cnt;i++) ck_free(cur->try_list[i]); + ck_free(cur->try_list); + } + + if (cur->issue) { + for (i=0;iissue_cnt;i++) { + ck_free(cur->issue[i].extra); + if (cur->issue[i].req) destroy_request(cur->issue[i].req); + if (cur->issue[i].res) destroy_response(cur->issue[i].res); + } + ck_free(cur->issue); + } + + for (i=0;ichild_cnt;i++) + dealloc_pivots(cur->child[i]); + + ck_free(cur->child); + + if (cur != &root_pivot) ck_free(cur); + +} + + + +/* Creates a new XSS location tag. */ + +u8* new_xss_tag(u8* prefix) { + static u8* ret; + + if (ret) free(ret); + ret = __DFL_ck_alloc((prefix ? strlen((char*)prefix) : 0) + 32); + + if (!scan_id) scan_id = R(999999) + 1; + + sprintf((char*)ret, "%s>\">'>'\"", + prefix ? prefix : (u8*)"", cur_xss_id, scan_id); + + return ret; + +} + + +/* Registers last XSS tag along with a completed http_request */ + +void register_xss_tag(struct http_request* req) { + xss_req = ck_realloc(xss_req, (cur_xss_id + 1) * + (sizeof(struct http_request*))); + xss_req[cur_xss_id] = req_copy(req, 0, 1); + cur_xss_id++; +} + + +/* Gets the request that submitted the tag in the first place */ + +struct http_request* get_xss_request(u32 xid, u32 sid) { + if (sid != scan_id || xid >= cur_xss_id) return NULL; + return xss_req[xid]; +} + + +/* Cleans up other database entries, for memory profiling purposes. */ + +void destroy_database() { + u32 i, kh; + + dealloc_pivots(0); + + ck_free(deny_urls); + ck_free(deny_strings); + ck_free(allow_urls); + ck_free(allow_domains); + ck_free(trust_domains); + + for (kh=0;kh + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_DATABASE_H +#define _HAVE_DATABASE_H + +#include "debug.h" +#include "config.h" +#include "types.h" +#include "http_client.h" + +/* Testing pivot points - used to organize the scan: */ + +/* - Pivot types: */ + +#define PIVOT_NONE 0 /* Invalid */ +#define PIVOT_ROOT 1 /* Root pivot */ + +#define PIVOT_SERV 10 /* Top-level host pivot */ +#define PIVOT_DIR 11 /* Directory pivot */ +#define PIVOT_FILE 12 /* File pivot */ +#define PIVOT_PATHINFO 13 /* PATH_INFO script */ + +#define PIVOT_UNKNOWN 18 /* (Currently) unknown type */ + +#define PIVOT_PARAM 100 /* Parameter fuzzing pivot */ +#define PIVOT_VALUE 101 /* Parameter value pivot */ + +/* - Pivot states (initialized to PENDING or FETCH by database.c, then + advanced by crawler.c): */ + +#define PSTATE_NONE 0 /* Invalid */ +#define PSTATE_PENDING 1 /* Pending parent tests */ + +#define PSTATE_FETCH 10 /* Initial data fetch */ + +#define PSTATE_TYPE_CHECK 20 /* Type check (unknown only) */ +#define PSTATE_404_CHECK 22 /* 404 check (dir only) */ +#define PSTATE_IPS_CHECK 25 /* IPS filtering check */ + +/* For directories only (injecting children nodes): */ + +#define PSTATE_CHILD_INJECT 50 /* Common security attacks */ +#define PSTATE_CHILD_DICT 55 /* Dictionary brute-force */ + +/* For parametric nodes only (replacing parameter value): */ + +#define PSTATE_PAR_CHECK 60 /* Parameter works at all? */ +#define PSTATE_PAR_INJECT 65 /* Common security attacks */ +#define PSTATE_PAR_NUMBER 70 /* Numeric ID traversal */ +#define PSTATE_PAR_DICT 75 /* Dictionary brute-force */ +#define PSTATE_PAR_TRYLIST 99 /* 'Try list' fetches */ + +#define PSTATE_DONE 100 /* Analysis done */ + +/* - Descriptor of a pivot point: */ + +struct pivot_desc { + u8 type; /* PIVOT_* */ + u8 state; /* PSTATE_* */ + u8 linked; /* Linked to? (0/1/2) */ + u8 missing; /* Determined to be missing? */ + + u8 csens; /* Case sensitive names? */ + u8 c_checked; /* csens check done? */ + + u8* name; /* Directory / script name */ + + struct http_request* req; /* Prototype HTTP request */ + + s32 fuzz_par; /* Fuzz target parameter */ + u8** try_list; /* Values to try */ + u32 try_cnt; /* Number of values to try */ + u32 try_cur; /* Last tested try list offs */ + + struct pivot_desc* parent; /* Parent pivot, if any */ + struct pivot_desc** child; /* List of children */ + u32 child_cnt; /* Number of children */ + + struct issue_desc* issue; /* List of issues found */ + u32 issue_cnt; /* Number of issues */ + + struct http_response* res; /* HTTP response seen */ + + u8 res_varies; /* Response varies? */ + + /* Fuzzer and probe state data: */ + + u8 no_fuzz; /* Do not attepmt fuzzing. */ + + u8 uses_ips; /* Uses IPS filtering? */ + + u32 cur_key; /* Current keyword */ + u32 pdic_cur_key; /* ...for param dict */ + + u8 guess; /* Guess list keywords? */ + u8 pdic_guess; /* ...for param dict */ + + u32 pending; /* Number of pending reqs */ + u32 pdic_pending; /* ...for param dict */ + u32 num_pending; /* ...for numerical enum */ + u32 try_pending; /* ...for try list */ + u32 r404_pending; /* ...for 404 probes */ + u32 ck_pending; /* ...for behavior checks */ + + struct http_sig r404[MAX_404]; /* 404 response signatures */ + u32 r404_cnt; /* Number of sigs collected */ + struct http_sig unk_sig; /* Original "unknown" sig. */ + + /* Injection attack logic scratchpad: */ + + struct http_request* misc_req[10]; /* Saved requests */ + struct http_response* misc_res[10]; /* Saved responses */ + u8 misc_cnt; /* Request / response count */ + + u8 i_skip[15]; /* Injection step skip flags */ + u8 i_skip_add; + u8 r404_skip; + + u8 bogus_par; /* fuzz_par does nothing? */ + + u8 ognl_check; /* OGNL check flags */ + + /* Reporting information: */ + + u32 total_child_cnt; /* All children */ + u32 total_issues[6]; /* Issues by severity */ + u8 dupe; /* Looks like a duplicate? */ + u32 pv_sig; /* Simple pivot signature */ + +}; + +extern struct pivot_desc root_pivot; + +/* Maps a parsed URL (in req) to the pivot tree, creating or modifying nodes + as necessary, and scheduling them for crawl; via_link should be 1 if the + URL came from an explicit link or user input, 0 if brute-forced. + + Always makes a copy of req, res; they can be destroyed safely; via_link + set to 2 means we're sure it's a valid link; 1 means "probably". */ + +void maybe_add_pivot(struct http_request* req, struct http_response* res, + u8 via_link); + +/* Creates a working copy of a request for use in db and crawl functions. If all + is 0, does not copy path, query parameters, or POST data (but still + copies headers); and forces GET method. */ + +struct http_request* req_copy(struct http_request* req, + struct pivot_desc* pv, u8 all); + +/* Finds the host-level pivot point for global issues. */ + +struct pivot_desc* host_pivot(struct pivot_desc* pv); + +/* Case sensitivity helper. */ + +u8 is_c_sens(struct pivot_desc* pv); + +/* Recorded security issues: */ + +/* - Informational data (non-specific security-relevant notes): */ + +#define PROB_NONE 0 /* Invalid */ + +#define PROB_SSL_CERT 10101 /* SSL issuer data */ + +#define PROB_NEW_COOKIE 10201 /* New cookie added */ +#define PROB_SERVER_CHANGE 10202 /* New Server: value seen */ +#define PROB_VIA_CHANGE 10203 /* New Via: value seen */ +#define PROB_X_CHANGE 10204 /* New X-*: value seen */ +#define PROB_NEW_404 10205 /* New 404 signatures seen */ + +#define PROB_NO_ACCESS 10401 /* Resource not accessible */ +#define PROB_AUTH_REQ 10402 /* Authentication requires */ +#define PROB_SERV_ERR 10403 /* Server error */ + +#define PROB_EXT_LINK 10501 /* External link */ +#define PROB_EXT_REDIR 10502 /* External redirector */ +#define PROB_MAIL_ADDR 10503 /* E-mail address seen */ +#define PROB_UNKNOWN_PROTO 10504 /* Unknown protocol in URL */ +#define PROB_UNKNOWN_FIELD 10505 /* Unknown form field */ + +#define PROB_FORM 10601 /* XSRF-safe form */ +#define PROB_PASS_FORM 10602 /* Password form */ + +#define PROB_USER_LINK 10701 /* User-supplied A link */ + +#define PROB_BAD_MIME_STAT 10801 /* Bad MIME type, low risk */ +#define PROB_GEN_MIME_STAT 10802 /* Generic MIME, low risk */ +#define PROB_BAD_CSET_STAT 10803 /* Bad charset, low risk */ +#define PROB_CFL_HDRS_STAT 10804 /* Conflicting hdr, low risk */ + +#define PROB_FUZZ_DIGIT 10901 /* Try fuzzing file name */ +#define PROB_OGNL 10902 /* OGNL-like parameter */ + +/* - Internal warnings (scan failures, etc): */ + +#define PROB_FETCH_FAIL 20101 /* Fetch failed. */ +#define PROB_LIMITS 20102 /* Crawl limits exceeded. */ + +#define PROB_404_FAIL 20201 /* Behavior probe failed. */ +#define PROB_IPS_FILTER 20202 /* IPS behavior detected. */ +#define PROB_IPS_FILTER_OFF 20203 /* IPS no longer active. */ +#define PROB_VARIES 20204 /* Response varies. */ + +#define PROB_NOT_DIR 20301 /* Node should be a dir. */ + +/* - Low severity issues (limited impact or check specificity): */ + +#define PROB_URL_AUTH 30101 /* HTTP credentials in URL */ + +#define PROB_SSL_CERT_DATE 30201 /* SSL cert date invalid */ +#define PROB_SSL_SELF_CERT 30202 /* Self-signed SSL cert */ +#define PROB_SSL_BAD_HOST 30203 /* Certificate host mismatch */ +#define PROB_SSL_NO_CERT 30204 /* No certificate data? */ + +#define PROB_DIR_LIST 30301 /* Dir listing bypass */ + +#define PROB_URL_REDIR 30401 /* URL redirection */ +#define PROB_USER_URL 30402 /* URL content inclusion */ + +#define PROB_EXT_OBJ 30501 /* External obj standalone */ +#define PROB_MIXED_OBJ 30502 /* Mixed content standalone */ + +#define PROB_VULN_FORM 30601 /* Form w/o anti-XSRF token */ +#define PROB_JS_XSSI 30602 /* Script with no XSSI prot */ + +#define PROB_CACHE_LOW 30701 /* Cache nit-picking */ + +/* - Moderate severity issues (data compromise): */ + +#define PROB_BODY_XSS 40101 /* Document body XSS */ +#define PROB_URL_XSS 40102 /* URL-based XSS */ +#define PROB_HTTP_INJECT 40103 /* Header splitting */ +#define PROB_USER_URL_ACT 40104 /* Active user content */ + +#define PROB_EXT_SUB 40201 /* External subresource */ +#define PROB_MIXED_SUB 40202 /* Mixed content subresource */ + +#define PROB_BAD_MIME_DYN 40301 /* Bad MIME type, hi risk */ +#define PROB_GEN_MIME_DYN 40302 /* Generic MIME, hi risk */ +#define PROB_BAD_CSET_DYN 40304 /* Bad charset, hi risk */ +#define PROB_CFL_HDRS_DYN 40305 /* Conflicting hdr, hi risk */ + +#define PROB_FILE_POI 40401 /* Interesting file */ +#define PROB_ERROR_POI 40402 /* Interesting error message */ + +#define PROB_DIR_TRAVERSAL 40501 /* Directory traversal */ + +#define PROB_CACHE_HI 40601 /* Serious caching issues */ + +/* - High severity issues (system compromise): */ + +#define PROB_XML_INJECT 50101 /* Backend XML injection */ +#define PROB_SH_INJECT 50102 /* Shell cmd injection */ +#define PROB_SQL_INJECT 50103 /* SQL injection */ +#define PROB_FMT_STRING 50104 /* Format string attack */ +#define PROB_INT_OVER 50105 /* Integer overflow attack */ + +#define PROB_SQL_PARAM 50201 /* SQL-like parameter */ + +/* - Severity macros: */ + +#define PSEV(_x) ((_x) / 10000) +#define PSEV_INFO 1 +#define PSEV_WARN 2 +#define PSEV_LOW 3 +#define PSEV_MED 4 +#define PSEV_HI 5 + +/* Issue descriptor: */ + +struct issue_desc { + u32 type; /* PROB_* */ + u8* extra; /* Problem-specific string */ + struct http_request* req; /* HTTP request sent */ + struct http_response* res; /* HTTP response seen */ +}; + +/* Register a problem, if not duplicate (res, extra may be NULL): */ + +void problem(u32 type, struct http_request* req, struct http_response* res, + u8* extra, struct pivot_desc* pv, u8 allow_dup); + +/* Compare the checksums for two responses: */ + +u8 same_page(struct http_sig* sig1, struct http_sig* sig2); + +/* URL filtering constraints (exported from database.c): */ + +#define APPEND_FILTER(_ptr, _cnt, _val) do { \ + (_ptr) = ck_realloc(_ptr, ((_cnt) + 1) * sizeof(u8*)); \ + (_ptr)[_cnt] = (u8*)(_val); \ + (_cnt)++; \ + } while (0) + +extern u8 **deny_urls, **deny_strings, **allow_urls, **allow_domains, + **trust_domains; + +extern u32 num_deny_urls, + num_deny_strings, + num_allow_urls, + num_allow_domains, + num_trust_domains; + +extern u32 max_depth, + max_children, + max_trylist, + max_guesses; + +/* Check if the URL is permitted under current rules (0 = no, 1 = yes): */ + +u8 url_allowed_host(struct http_request* req); +u8 url_trusted_host(struct http_request* req); +u8 url_allowed(struct http_request* req); + +/* Keyword management: */ + +extern u8 dont_add_words; + +/* Adds a new keyword candidate to the "guess" list. */ + +void wordlist_add_guess(u8* text); + +/* Adds non-sanitized keywords to the list. */ + +void wordlist_confirm_word(u8* text); + +/* Returns wordlist item at a specified offset (NULL if no more available). */ + +u8* wordlist_get_word(u32 offset); + +/* Returns keyword candidate at a specified offset (or NULL). */ + +u8* wordlist_get_guess(u32 offset); + +/* Returns extension at a specified offset (or NULL). */ + +u8* wordlist_get_extension(u32 offset); + +/* Loads keywords from file. */ + +void load_keywords(u8* fname, u32 purge_age); + +/* Saves all keywords to a file. */ + +void save_keywords(u8* fname); + +/* Database maintenance: */ + +/* Dumps pivot database, for debugging purposes. */ + +void dump_pivots(struct pivot_desc* cur, u8 nest); + +/* Deallocates all data, for debugging purposes. */ + +void destroy_database(); + +/* Prints DB stats. */ + +void database_stats(); + +/* XSS manager: */ + +/* Creates a new stored XSS id (buffer valid only until next call). */ + +u8* new_xss_tag(u8* prefix); + +/* Registers last XSS tag along with a completed http_request. */ + +void register_xss_tag(struct http_request* req); + +/* Returns request associated with a stored XSS id. */ + +struct http_request* get_xss_request(u32 xid, u32 sid); + +/* Dumps signature data: */ + +void dump_signature(struct http_sig* sig); + +/* Displays debug information for same_page() checks. */ + +void debug_same_page(struct http_sig* sig1, struct http_sig* sig2); + +#endif /* _HAVE_DATABASE_H */ + diff --git a/debug.h b/debug.h new file mode 100644 index 0000000..d7e0b78 --- /dev/null +++ b/debug.h @@ -0,0 +1,96 @@ +/* + + skipfish - debugging and messaging macros + ----------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +*/ + +#ifndef _HAVE_DEBUG_H +#define _HAVE_DEBUG_H + +#include +#include "config.h" + +#ifdef USE_COLOR +# define cBLK "\x1b[0;30m" +# define cRED "\x1b[0;31m" +# define cGRN "\x1b[0;32m" +# define cBRN "\x1b[0;33m" +# define cBLU "\x1b[0;34m" +# define cMGN "\x1b[0;35m" +# define cCYA "\x1b[0;36m" +# define cNOR "\x1b[0;37m" +# define cGRA "\x1b[1;30m" +# define cLRD "\x1b[1;31m" +# define cLGN "\x1b[1;32m" +# define cYEL "\x1b[1;33m" +# define cLBL "\x1b[1;34m" +# define cPIN "\x1b[1;35m" +# define cLCY "\x1b[1;36m" +# define cBRI "\x1b[1;37m" +#else +# define cBLK +# define cRED +# define cGRN +# define cBRN +# define cBLU +# define cMGN +# define cCYA +# define cNOR +# define cGRA +# define cLRD +# define cLGN +# define cYEL +# define cLBL +# define cPIN +# define cLCY +# define cBRI +#endif /* ^USE_COLOR */ + +#ifdef LOG_STDERR +# define DEBUG(x...) fprintf(stderr,x) +#else +# define DEBUG(x...) +#endif /* ^LOG_STDERR */ + +#define F_DEBUG(x...) fprintf(stderr,x) +#define SAY(x...) printf(x) + +#define WARN(x...) do { \ + F_DEBUG(cYEL "[!] WARNING: " cBRI x); \ + F_DEBUG(cNOR "\n"); \ + } while (0) + +#define FATAL(x...) do { \ + F_DEBUG(cLRD "[-] PROGRAM ABORT : " cBRI x); \ + F_DEBUG(cLRD "\n Stop location : " cNOR "%s(), %s:%u\n", \ + __FUNCTION__, __FILE__, __LINE__); \ + exit(1); \ + } while (0) + +#define PFATAL(x...) do { \ + F_DEBUG(cLRD "[-] SYSTEM ERROR : " cBRI x); \ + F_DEBUG(cLRD "\n Stop location : " cNOR "%s(), %s:%u\n", \ + __FUNCTION__, __FILE__, __LINE__); \ + perror(cLRD " OS message " cNOR); \ + exit(1); \ + } while (0) + + +#endif /* ! _HAVE_DEBUG_H */ diff --git a/dictionaries/README-FIRST b/dictionaries/README-FIRST new file mode 100644 index 0000000..cfa49df --- /dev/null +++ b/dictionaries/README-FIRST @@ -0,0 +1,186 @@ +This directory contains four alternative, hand-picked Skipfish dictionaries. + +Before you pick one, you should understand several basic concepts related to +dictionary management in this scanner, as this topic is of critical importance +to the quality of your scans. + +----------------------------- +Dictionary management basics: +----------------------------- + +1) Each dictionary may consist of a number of extensions, and a number of + "regular" keywords. Extensions are considered just a special subset of + the keyword list. + +2) You can specify the dictionary to use with a -W option. The file must + conform to the following format: + + type hits total_age last_age keyword + + ...where 'type' is either 'e' or 'w' (extension or wordlist); 'hits' + is the total number of times this keyword resulted in a non-404 hit + in all previous scans; 'total_age' is the number of scan cycles this + word is in the dictionary; 'last_age' is the number of scan cycles + since the last 'hit'; and 'keyword' is the actual keyword. + + Do not duplicate extensions as keywords - if you already have 'html' as + an 'e' entry, there is no need to also create a 'w' one. + + There must be no empty or malformed lines, comments, etc, in the wordlist + file. Extension keywords must have no leading dot (e.g., 'exe', not '.exe'), + and all keywords should be NOT url-encoded (e.g., 'Program Files', not + 'Program%20Files'). No keyword should exceed 64 characters. + + If you omit -W in the command line, 'skipfish.wl' is assumed. + +3) When loading a dictionary, you can use -R option to drop any entries + that had no hits for a specified number of scans. + +4) Unless -L is specified in the command line, the scanner will also + automatically learn new keywords and extensions based on any links + discovered during the scan. + +5) Unless -L is specified, the scanner will also analyze pages and extract + words that would serve as keyword guesses. A capped number of guesses + is maintained by the scanner, with older entries being removed from the + list as new ones are found (the size of this jar is adjustable with the + -G option). + + These guesses would be tested along with regular keywords during brute-force + steps. If they result in a non-404 hit at some point, they are promoted to + the "proper" keyword list. + +6) Unless -V is specified in the command line, all newly discovered keywords + are saved back to the input wordlist file, along with their hit statistics. + +---------------------------------------------- +Dictionaries are used for the following tasks: +---------------------------------------------- + +1) When a new directory, or a file-like query or POST parameter is discovered, + the scanner attempts passing all possible values to discover new + files, directories, etc. + +2) If you did NOT specify -Y in the command line, the scanner also tests all + possible . pairs in these cases. Note that this may + result in several orders of magnitude more requests, but is the only way + to discover files such as 'backup.tar.gz', 'database.csv', etc. + +3) For any non-404 file or directory discovered by any other means, the scanner + also attempts all . combinations, to discover, + for example, entries such as 'index.php.old'. + +---------------------- +Supplied dictionaries: +---------------------- + +1) Empty dictionary (-). + + Simply create an empty file, then load it via -W. If you use this option + in conjunction with -L, this essentially inhibits all brute-force testing, + and results in an orderly, link-based crawl. + + If -L is not used, the crawler will still attempt brute-force, but only + based on the keywords and extensions discovered when crawling the site. + This means it will likely learn keywords such as 'index' or extensions + such as 'html' - but may never attempt probing for 'log', 'old', 'bak', etc. + + Both these variants are very useful for lightweight scans, but are not + particularly exhaustive. + +2) Extension-only dictionary (extensions-only.wl). + + This dictionary contains about 90 common file extensions, and no other + keywords. It must be used in conjunction with -Y (otherwise, it will not + behave as expected). + + This is often a better alternative to a null dictionary: the scanner will + still limit brute-force primarily to file names learned on the site, but + will know about extensions such as 'log' or 'old', and will test for them + accordingly. + +3) Basic extensions dictionary (minimal.wl). + + This dictionary contains about 25 extensions, focusing on common entries + most likely to spell trouble (.bak, .old, .conf, .zip, etc); and about 1,700 + hand-picked keywords. + + This is useful for quick assessments where no obscure technologies are used. + The principal scan cost is about 42,000 requests per each fuzzed directory. + Using it without -L is recommended, as the list of extensions does not + include standard framework-specific cases (.asp, .jsp, .php, etc), and + these are best learned on the fly. + + You can also use this dictionary with -Y option enabled, approximating the + behavior of most other security scanners; in this case, it will send only + about 1,700 requests per directory, and will look for 25 secondary extensions + only on otherwise discovered resources. + +3) Standard extensions dictionary (default.wl). + + This dictionary contains about 60 common extensions, plus the same set of + 1,700 keywords. The extensions cover most of the common, interesting web + resources. + + This is a good starting point for assessments where scan times are not + a critical factor; the cost is about 100,000 requests per each fuzzed + directory. + + In -Y mode, it behaves nearly identical to minimal.wl, but will test a + greater set of extensions on otherwise discovered resources, at a relatively + minor expense. + +4) Complete extensions dictionary (complete.wl). + + Contains about 90 common extensions and 1,700 keywords. These extensions + cover a broader range of media types, including some less common programming + languages, image and video formats, etc. + + Useful for comprehensive assessments, over 150,000 requests per each fuzzed + directory. + + In -Y mode - see default.wl, offers the best coverage of all three wordlists + at a relatively low cost. + +Of course, you can customize these dictionaries as seen fit. It might be, for +example, a good idea to downgrade file extensions not likely to occur given +the technologies used by your target host to regular 'w' records. + +Whichever option you choose, be sure to make a *copy* of this dictionary, and +load that copy, not the original, via -W. The specified file will be overwritten +with site-specific information (unless -V used). + +---------------------------------- +Bah, these dictionaries are small! +---------------------------------- + +Keep in mind that web crawling is not password guessing; it is exceedingly +unlikely for web servers to have directories or files named 'henceforth', +'abating', or 'witlessly'. Because of this, using 200,000+ entry English +wordlists, or similar data sets, is largely pointless. + +More importantly, doing so often leads to reduced coverage or unacceptable +scan times; with a 200k wordlist and 80 extensions, trying all combinations +for a single directory would take 30-40 hours against a slow server; and even +with a fast one, at least 5 hours is to be expected. + +DirBuster uses a unique approach that seems promising at first sight - to +base their wordlists depending on how often a particular keyword appeared in +URLs seen on the Internet. This is interesting, but comes with two gotchas: + + - Keywords related to popular websites and brands are heavily + overrepresented; DirBuster wordlists have 'bbc_news_24', 'beebie_bunny', + and 'koalabrothers' near the top of their list, but it is pretty unlikely + these keywords would be of any use in real-world assessments of a typical + site, unless it happens to be BBC. + + - Some of the most interesting security-related keywords are not commonly + indexed, and may appear, say, on no more than few dozen or few thousand + crawled websites in Google index. But, that does not make 'AggreSpy' or + '.ssh/authorized_keys' any less interesting. + +Bottom line is, poor wordlists are one of the reasons why some other web +security scanners perform worse than expected, so please - be careful. You will +almost always be better off narrowing down or selectively extending the +supplied set (and possibly contributing back your changes upstream!), than +importing a giant wordlist from elsewhere. diff --git a/dictionaries/complete.wl b/dictionaries/complete.wl new file mode 100644 index 0000000..1f150d5 --- /dev/null +++ b/dictionaries/complete.wl @@ -0,0 +1,1894 @@ +e 1 1 1 7z +e 1 1 1 asmx +e 1 1 1 asp +e 1 1 1 aspx +e 1 1 1 bak +e 1 1 1 bat +e 1 1 1 bin +e 1 1 1 bz2 +e 1 1 1 c +e 1 1 1 cc +e 1 1 1 cfg +e 1 1 1 cfm +e 1 1 1 cgi +e 1 1 1 class +e 1 1 1 cnf +e 1 1 1 conf +e 1 1 1 config +e 1 1 1 cpp +e 1 1 1 cs +e 1 1 1 csv +e 1 1 1 dat +e 1 1 1 db +e 1 1 1 dll +e 1 1 1 do +e 1 1 1 doc +e 1 1 1 dump +e 1 1 1 ep +e 1 1 1 err +e 1 1 1 error +e 1 1 1 exe +e 1 1 1 gif +e 1 1 1 gz +e 1 1 1 htm +e 1 1 1 html +e 1 1 1 inc +e 1 1 1 ini +e 1 1 1 java +e 1 1 1 jhtml +e 1 1 1 jpg +e 1 1 1 js +e 1 1 1 jsf +e 1 1 1 jsp +e 1 1 1 key +e 1 1 1 lib +e 1 1 1 log +e 1 1 1 lst +e 1 1 1 manifest +e 1 1 1 mdb +e 1 1 1 meta +e 1 1 1 msg +e 1 1 1 nsf +e 1 1 1 o +e 1 1 1 old +e 1 1 1 ora +e 1 1 1 orig +e 1 1 1 out +e 1 1 1 part +e 1 1 1 pdf +e 1 1 1 php +e 1 1 1 php3 +e 1 1 1 pl +e 1 1 1 pm +e 1 1 1 png +e 1 1 1 ppt +e 1 1 1 properties +e 1 1 1 py +e 1 1 1 rar +e 1 1 1 rss +e 1 1 1 rtf +e 1 1 1 save +e 1 1 1 sh +e 1 1 1 shtml +e 1 1 1 so +e 1 1 1 sql +e 1 1 1 stackdump +e 1 1 1 swf +e 1 1 1 tar +e 1 1 1 tar.bz2 +e 1 1 1 tar.gz +e 1 1 1 temp +e 1 1 1 test +e 1 1 1 tgz +e 1 1 1 tmp +e 1 1 1 trace +e 1 1 1 txt +e 1 1 1 vb +e 1 1 1 vbs +e 1 1 1 ws +e 1 1 1 xls +e 1 1 1 xml +e 1 1 1 xsl +e 1 1 1 zip +w 1 1 1 .bash_history +w 1 1 1 .bashrc +w 1 1 1 .cvsignore +w 1 1 1 .history +w 1 1 1 .htaccess +w 1 1 1 .htpasswd +w 1 1 1 .passwd +w 1 1 1 .perf +w 1 1 1 .ssh +w 1 1 1 .svn +w 1 1 1 .web +w 1 1 1 0 +w 1 1 1 00 +w 1 1 1 01 +w 1 1 1 02 +w 1 1 1 03 +w 1 1 1 04 +w 1 1 1 05 +w 1 1 1 06 +w 1 1 1 07 +w 1 1 1 08 +w 1 1 1 09 +w 1 1 1 1 +w 1 1 1 10 +w 1 1 1 100 +w 1 1 1 1000 +w 1 1 1 1001 +w 1 1 1 101 +w 1 1 1 11 +w 1 1 1 12 +w 1 1 1 13 +w 1 1 1 14 +w 1 1 1 15 +w 1 1 1 1990 +w 1 1 1 1991 +w 1 1 1 1992 +w 1 1 1 1993 +w 1 1 1 1994 +w 1 1 1 1995 +w 1 1 1 1996 +w 1 1 1 1997 +w 1 1 1 1998 +w 1 1 1 1999 +w 1 1 1 2 +w 1 1 1 20 +w 1 1 1 200 +w 1 1 1 2000 +w 1 1 1 2001 +w 1 1 1 2002 +w 1 1 1 2003 +w 1 1 1 2004 +w 1 1 1 2005 +w 1 1 1 2006 +w 1 1 1 2007 +w 1 1 1 2008 +w 1 1 1 2009 +w 1 1 1 2010 +w 1 1 1 2011 +w 1 1 1 2012 +w 1 1 1 21 +w 1 1 1 22 +w 1 1 1 23 +w 1 1 1 24 +w 1 1 1 25 +w 1 1 1 2g +w 1 1 1 3 +w 1 1 1 300 +w 1 1 1 3g +w 1 1 1 4 +w 1 1 1 42 +w 1 1 1 5 +w 1 1 1 50 +w 1 1 1 500 +w 1 1 1 51 +w 1 1 1 6 +w 1 1 1 7 +w 1 1 1 8 +w 1 1 1 9 +w 1 1 1 ADM +w 1 1 1 ADMIN +w 1 1 1 AggreSpy +w 1 1 1 AppsLocalLogin +w 1 1 1 AppsLogin +w 1 1 1 BUILD +w 1 1 1 CMS +w 1 1 1 CVS +w 1 1 1 DB +w 1 1 1 DMSDump +w 1 1 1 Documents and Settings +w 1 1 1 Entries +w 1 1 1 FCKeditor +w 1 1 1 JMXSoapAdapter +w 1 1 1 LICENSE +w 1 1 1 MANIFEST.MF +w 1 1 1 META-INF +w 1 1 1 Makefile +w 1 1 1 OA +w 1 1 1 OAErrorDetailPage +w 1 1 1 OA_HTML +w 1 1 1 Program Files +w 1 1 1 README +w 1 1 1 Readme +w 1 1 1 Recycled +w 1 1 1 Root +w 1 1 1 SQL +w 1 1 1 SUNWmc +w 1 1 1 SiteScope +w 1 1 1 SiteServer +w 1 1 1 Spy +w 1 1 1 TEMP +w 1 1 1 TMP +w 1 1 1 TODO +w 1 1 1 Thumbs.db +w 1 1 1 WEB-INF +w 1 1 1 WS_FTP +w 1 1 1 XXX +w 1 1 1 _ +w 1 1 1 _adm +w 1 1 1 _admin +w 1 1 1 _files +w 1 1 1 _include +w 1 1 1 _js +w 1 1 1 _mem_bin +w 1 1 1 _old +w 1 1 1 _pages +w 1 1 1 _private +w 1 1 1 _res +w 1 1 1 _source +w 1 1 1 _src +w 1 1 1 _test +w 1 1 1 _vti_bin +w 1 1 1 _vti_cnf +w 1 1 1 _vti_pvt +w 1 1 1 _vti_txt +w 1 1 1 _www +w 1 1 1 a +w 1 1 1 aa +w 1 1 1 aaa +w 1 1 1 abc +w 1 1 1 abc123 +w 1 1 1 abcd +w 1 1 1 abcd1234 +w 1 1 1 about +w 1 1 1 access +w 1 1 1 access-log +w 1 1 1 access-log.1 +w 1 1 1 access.1 +w 1 1 1 access_log +w 1 1 1 access_log.1 +w 1 1 1 accessibility +w 1 1 1 account +w 1 1 1 accounting +w 1 1 1 accounts +w 1 1 1 action +w 1 1 1 actions +w 1 1 1 active +w 1 1 1 activex +w 1 1 1 ad +w 1 1 1 adclick +w 1 1 1 add +w 1 1 1 addressbook +w 1 1 1 adm +w 1 1 1 admin +w 1 1 1 admin_ +w 1 1 1 ads +w 1 1 1 adv +w 1 1 1 advertise +w 1 1 1 advertising +w 1 1 1 affiliate +w 1 1 1 affiliates +w 1 1 1 agenda +w 1 1 1 agent +w 1 1 1 agents +w 1 1 1 ajax +w 1 1 1 album +w 1 1 1 albums +w 1 1 1 alert +w 1 1 1 alerts +w 1 1 1 alias +w 1 1 1 aliases +w 1 1 1 all +w 1 1 1 alpha +w 1 1 1 alumni +w 1 1 1 analog +w 1 1 1 announcement +w 1 1 1 announcements +w 1 1 1 anon +w 1 1 1 anonymous +w 1 1 1 ansi +w 1 1 1 apac +w 1 1 1 apache +w 1 1 1 apexec +w 1 1 1 api +w 1 1 1 apis +w 1 1 1 app +w 1 1 1 appeal +w 1 1 1 appeals +w 1 1 1 append +w 1 1 1 appl +w 1 1 1 apple +w 1 1 1 appliation +w 1 1 1 applications +w 1 1 1 apps +w 1 1 1 apr +w 1 1 1 arch +w 1 1 1 archive +w 1 1 1 archives +w 1 1 1 array +w 1 1 1 art +w 1 1 1 article +w 1 1 1 articles +w 1 1 1 artwork +w 1 1 1 ascii +w 1 1 1 asdf +w 1 1 1 asset +w 1 1 1 assets +w 1 1 1 atom +w 1 1 1 attach +w 1 1 1 attachment +w 1 1 1 attachments +w 1 1 1 attachs +w 1 1 1 attic +w 1 1 1 audio +w 1 1 1 audit +w 1 1 1 audits +w 1 1 1 auth +w 1 1 1 author +w 1 1 1 authorized_keys +w 1 1 1 authors +w 1 1 1 auto +w 1 1 1 automatic +w 1 1 1 automation +w 1 1 1 avatar +w 1 1 1 avatars +w 1 1 1 award +w 1 1 1 awards +w 1 1 1 awl +w 1 1 1 awstats +w 1 1 1 b +w 1 1 1 b2b +w 1 1 1 b2c +w 1 1 1 back +w 1 1 1 backdoor +w 1 1 1 backend +w 1 1 1 backup +w 1 1 1 backups +w 1 1 1 bandwidth +w 1 1 1 bank +w 1 1 1 banks +w 1 1 1 banner +w 1 1 1 banners +w 1 1 1 bar +w 1 1 1 base +w 1 1 1 bash +w 1 1 1 basic +w 1 1 1 basket +w 1 1 1 baskets +w 1 1 1 batch +w 1 1 1 baz +w 1 1 1 bb +w 1 1 1 bb-hist +w 1 1 1 bb-histlog +w 1 1 1 bboard +w 1 1 1 bbs +w 1 1 1 beans +w 1 1 1 beehive +w 1 1 1 benefits +w 1 1 1 beta +w 1 1 1 bfc +w 1 1 1 big +w 1 1 1 bigip +w 1 1 1 bill +w 1 1 1 billing +w 1 1 1 binaries +w 1 1 1 binary +w 1 1 1 bins +w 1 1 1 bio +w 1 1 1 bios +w 1 1 1 biz +w 1 1 1 bkup +w 1 1 1 blah +w 1 1 1 blank +w 1 1 1 blog +w 1 1 1 blogger +w 1 1 1 bloggers +w 1 1 1 blogs +w 1 1 1 board +w 1 1 1 bofh +w 1 1 1 book +w 1 1 1 books +w 1 1 1 boot +w 1 1 1 bottom +w 1 1 1 broken +w 1 1 1 broker +w 1 1 1 browse +w 1 1 1 bs +w 1 1 1 bsd +w 1 1 1 bugs +w 1 1 1 build +w 1 1 1 buildr +w 1 1 1 bulk +w 1 1 1 bullet +w 1 1 1 business +w 1 1 1 button +w 1 1 1 buttons +w 1 1 1 buy +w 1 1 1 buynow +w 1 1 1 bypass +w 1 1 1 ca +w 1 1 1 cache +w 1 1 1 cal +w 1 1 1 calendar +w 1 1 1 camel +w 1 1 1 car +w 1 1 1 card +w 1 1 1 cards +w 1 1 1 career +w 1 1 1 careers +w 1 1 1 cars +w 1 1 1 cart +w 1 1 1 carts +w 1 1 1 cat +w 1 1 1 catalog +w 1 1 1 catalogs +w 1 1 1 catalyst +w 1 1 1 categories +w 1 1 1 category +w 1 1 1 catinfo +w 1 1 1 cats +w 1 1 1 ccbill +w 1 1 1 cd +w 1 1 1 cerificate +w 1 1 1 cert +w 1 1 1 certificate +w 1 1 1 certificates +w 1 1 1 certs +w 1 1 1 cf +w 1 1 1 cfcache +w 1 1 1 cfdocs +w 1 1 1 cfide +w 1 1 1 cfusion +w 1 1 1 cgi-bin +w 1 1 1 cgi-bin2 +w 1 1 1 cgi-home +w 1 1 1 cgi-local +w 1 1 1 cgi-pub +w 1 1 1 cgi-script +w 1 1 1 cgi-shl +w 1 1 1 cgi-sys +w 1 1 1 cgi-web +w 1 1 1 cgi-win +w 1 1 1 cgibin +w 1 1 1 cgiwrap +w 1 1 1 cgm-web +w 1 1 1 change +w 1 1 1 changed +w 1 1 1 changes +w 1 1 1 charge +w 1 1 1 charges +w 1 1 1 chat +w 1 1 1 chats +w 1 1 1 checkout +w 1 1 1 child +w 1 1 1 children +w 1 1 1 cisco +w 1 1 1 cisweb +w 1 1 1 citrix +w 1 1 1 cl +w 1 1 1 claim +w 1 1 1 claims +w 1 1 1 classes +w 1 1 1 classified +w 1 1 1 classifieds +w 1 1 1 clear +w 1 1 1 click +w 1 1 1 clicks +w 1 1 1 client +w 1 1 1 clientaccesspolicy +w 1 1 1 clients +w 1 1 1 close +w 1 1 1 closed +w 1 1 1 closing +w 1 1 1 club +w 1 1 1 cluster +w 1 1 1 clusters +w 1 1 1 cmd +w 1 1 1 cms +w 1 1 1 cnf +w 1 1 1 cnt +w 1 1 1 cocoon +w 1 1 1 code +w 1 1 1 codec +w 1 1 1 codecs +w 1 1 1 codes +w 1 1 1 cognos +w 1 1 1 coldfusion +w 1 1 1 columns +w 1 1 1 com +w 1 1 1 comment +w 1 1 1 comments +w 1 1 1 commerce +w 1 1 1 commercial +w 1 1 1 common +w 1 1 1 communicator +w 1 1 1 community +w 1 1 1 compact +w 1 1 1 company +w 1 1 1 complaint +w 1 1 1 complaints +w 1 1 1 compliance +w 1 1 1 component +w 1 1 1 compressed +w 1 1 1 computer +w 1 1 1 computers +w 1 1 1 computing +w 1 1 1 conference +w 1 1 1 conferences +w 1 1 1 configs +w 1 1 1 console +w 1 1 1 consumer +w 1 1 1 contact +w 1 1 1 contacts +w 1 1 1 content +w 1 1 1 contents +w 1 1 1 contract +w 1 1 1 contracts +w 1 1 1 control +w 1 1 1 controlpanel +w 1 1 1 cookie +w 1 1 1 cookies +w 1 1 1 copies +w 1 1 1 copy +w 1 1 1 copyright +w 1 1 1 core +w 1 1 1 corp +w 1 1 1 corpo +w 1 1 1 corporate +w 1 1 1 corrections +w 1 1 1 count +w 1 1 1 counter +w 1 1 1 counters +w 1 1 1 counts +w 1 1 1 course +w 1 1 1 courses +w 1 1 1 cover +w 1 1 1 cpanel +w 1 1 1 cr +w 1 1 1 crack +w 1 1 1 crash +w 1 1 1 crashes +w 1 1 1 create +w 1 1 1 credits +w 1 1 1 crm +w 1 1 1 cron +w 1 1 1 crons +w 1 1 1 crontab +w 1 1 1 crontabs +w 1 1 1 crossdomain +w 1 1 1 crypt +w 1 1 1 crypto +w 1 1 1 css +w 1 1 1 current +w 1 1 1 custom +w 1 1 1 custom-log +w 1 1 1 custom_log +w 1 1 1 customer +w 1 1 1 customers +w 1 1 1 cv +w 1 1 1 cxf +w 1 1 1 czcmdcvt +w 1 1 1 d +w 1 1 1 daemon +w 1 1 1 daily +w 1 1 1 dana-na +w 1 1 1 data +w 1 1 1 database +w 1 1 1 databases +w 1 1 1 date +w 1 1 1 dba +w 1 1 1 dbase +w 1 1 1 dbman +w 1 1 1 dc +w 1 1 1 dcforum +w 1 1 1 de +w 1 1 1 dealer +w 1 1 1 debug +w 1 1 1 decl +w 1 1 1 declaration +w 1 1 1 declarations +w 1 1 1 decode +w 1 1 1 decrypt +w 1 1 1 def +w 1 1 1 default +w 1 1 1 defaults +w 1 1 1 definition +w 1 1 1 definitions +w 1 1 1 del +w 1 1 1 delete +w 1 1 1 deleted +w 1 1 1 demo +w 1 1 1 demos +w 1 1 1 denied +w 1 1 1 deny +w 1 1 1 design +w 1 1 1 desktop +w 1 1 1 desktops +w 1 1 1 detail +w 1 1 1 details +w 1 1 1 dev +w 1 1 1 devel +w 1 1 1 developer +w 1 1 1 developers +w 1 1 1 development +w 1 1 1 device +w 1 1 1 devices +w 1 1 1 devs +w 1 1 1 df +w 1 1 1 dialog +w 1 1 1 dialogs +w 1 1 1 diff +w 1 1 1 diffs +w 1 1 1 digest +w 1 1 1 digg +w 1 1 1 dir +w 1 1 1 directories +w 1 1 1 directory +w 1 1 1 dirs +w 1 1 1 disabled +w 1 1 1 disclaimer +w 1 1 1 display +w 1 1 1 django +w 1 1 1 dl +w 1 1 1 dm +w 1 1 1 dm-config +w 1 1 1 dms +w 1 1 1 dms0 +w 1 1 1 dns +w 1 1 1 dock +w 1 1 1 docroot +w 1 1 1 docs +w 1 1 1 document +w 1 1 1 documentation +w 1 1 1 documents +w 1 1 1 domain +w 1 1 1 domains +w 1 1 1 down +w 1 1 1 download +w 1 1 1 downloads +w 1 1 1 drop +w 1 1 1 dropped +w 1 1 1 drupal +w 1 1 1 dummy +w 1 1 1 dumps +w 1 1 1 dvd +w 1 1 1 dwr +w 1 1 1 dynamic +w 1 1 1 e +w 1 1 1 e2fs +w 1 1 1 ear +w 1 1 1 ecommerce +w 1 1 1 edge +w 1 1 1 edit +w 1 1 1 editor +w 1 1 1 edits +w 1 1 1 edu +w 1 1 1 education +w 1 1 1 ee +w 1 1 1 effort +w 1 1 1 efforts +w 1 1 1 egress +w 1 1 1 ejb +w 1 1 1 element +w 1 1 1 elements +w 1 1 1 em +w 1 1 1 email +w 1 1 1 emails +w 1 1 1 emea +w 1 1 1 employees +w 1 1 1 employment +w 1 1 1 empty +w 1 1 1 emu +w 1 1 1 emulator +w 1 1 1 en +w 1 1 1 en_US +w 1 1 1 encode +w 1 1 1 encrypt +w 1 1 1 eng +w 1 1 1 engine +w 1 1 1 english +w 1 1 1 enterprise +w 1 1 1 entertainment +w 1 1 1 entries +w 1 1 1 entry +w 1 1 1 env +w 1 1 1 environ +w 1 1 1 environment +w 1 1 1 error +w 1 1 1 error-log +w 1 1 1 error_log +w 1 1 1 errors +w 1 1 1 es +w 1 1 1 esale +w 1 1 1 esales +w 1 1 1 etc +w 1 1 1 europe +w 1 1 1 event +w 1 1 1 events +w 1 1 1 evil +w 1 1 1 evt +w 1 1 1 ews +w 1 1 1 ex +w 1 1 1 example +w 1 1 1 examples +w 1 1 1 excalibur +w 1 1 1 exchange +w 1 1 1 exec +w 1 1 1 export +w 1 1 1 ext +w 1 1 1 ext2 +w 1 1 1 extern +w 1 1 1 external +w 1 1 1 ezshopper +w 1 1 1 f +w 1 1 1 face +w 1 1 1 faces +w 1 1 1 faculty +w 1 1 1 fail +w 1 1 1 failure +w 1 1 1 family +w 1 1 1 faq +w 1 1 1 faqs +w 1 1 1 fcgi-bin +w 1 1 1 feature +w 1 1 1 features +w 1 1 1 feed +w 1 1 1 feedback +w 1 1 1 feeds +w 1 1 1 felix +w 1 1 1 field +w 1 1 1 fields +w 1 1 1 file +w 1 1 1 fileadmin +w 1 1 1 files +w 1 1 1 filez +w 1 1 1 finance +w 1 1 1 financial +w 1 1 1 find +w 1 1 1 finger +w 1 1 1 firewall +w 1 1 1 fixed +w 1 1 1 flags +w 1 1 1 flash +w 1 1 1 flow +w 1 1 1 flows +w 1 1 1 flv +w 1 1 1 fn +w 1 1 1 folder +w 1 1 1 folders +w 1 1 1 font +w 1 1 1 fonts +w 1 1 1 foo +w 1 1 1 footer +w 1 1 1 footers +w 1 1 1 form +w 1 1 1 formatting +w 1 1 1 formmail +w 1 1 1 forms +w 1 1 1 forrest +w 1 1 1 fortune +w 1 1 1 forum +w 1 1 1 forum1 +w 1 1 1 forum2 +w 1 1 1 forumdisplay +w 1 1 1 forums +w 1 1 1 forward +w 1 1 1 foto +w 1 1 1 foundation +w 1 1 1 fr +w 1 1 1 frame +w 1 1 1 frames +w 1 1 1 framework +w 1 1 1 free +w 1 1 1 freebsd +w 1 1 1 friend +w 1 1 1 friends +w 1 1 1 frob +w 1 1 1 frontend +w 1 1 1 fs +w 1 1 1 ftp +w 1 1 1 fuck +w 1 1 1 fuckoff +w 1 1 1 fuckyou +w 1 1 1 full +w 1 1 1 fun +w 1 1 1 func +w 1 1 1 funcs +w 1 1 1 function +w 1 1 1 functions +w 1 1 1 fusion +w 1 1 1 fw +w 1 1 1 g +w 1 1 1 galleries +w 1 1 1 gallery +w 1 1 1 game +w 1 1 1 games +w 1 1 1 ganglia +w 1 1 1 garbage +w 1 1 1 gateway +w 1 1 1 gb +w 1 1 1 geeklog +w 1 1 1 general +w 1 1 1 geronimo +w 1 1 1 get +w 1 1 1 getaccess +w 1 1 1 getjobid +w 1 1 1 gfx +w 1 1 1 gitweb +w 1 1 1 glimpse +w 1 1 1 global +w 1 1 1 globals +w 1 1 1 glossary +w 1 1 1 go +w 1 1 1 goaway +w 1 1 1 google +w 1 1 1 government +w 1 1 1 gprs +w 1 1 1 grant +w 1 1 1 grants +w 1 1 1 graphics +w 1 1 1 group +w 1 1 1 groupcp +w 1 1 1 groups +w 1 1 1 gsm +w 1 1 1 guest +w 1 1 1 guestbook +w 1 1 1 guests +w 1 1 1 guide +w 1 1 1 guides +w 1 1 1 gump +w 1 1 1 gwt +w 1 1 1 h +w 1 1 1 hack +w 1 1 1 hacker +w 1 1 1 hacking +w 1 1 1 hackme +w 1 1 1 hadoop +w 1 1 1 hardcore +w 1 1 1 hardware +w 1 1 1 harmony +w 1 1 1 head +w 1 1 1 header +w 1 1 1 headers +w 1 1 1 health +w 1 1 1 hello +w 1 1 1 help +w 1 1 1 helper +w 1 1 1 helpers +w 1 1 1 hi +w 1 1 1 hidden +w 1 1 1 hide +w 1 1 1 high +w 1 1 1 hipaa +w 1 1 1 history +w 1 1 1 hit +w 1 1 1 hits +w 1 1 1 hole +w 1 1 1 home +w 1 1 1 homepage +w 1 1 1 hop +w 1 1 1 horde +w 1 1 1 hosting +w 1 1 1 hosts +w 1 1 1 howto +w 1 1 1 hp +w 1 1 1 hr +w 1 1 1 hta +w 1 1 1 htbin +w 1 1 1 htdoc +w 1 1 1 htdocs +w 1 1 1 htpasswd +w 1 1 1 http +w 1 1 1 httpd +w 1 1 1 https +w 1 1 1 httpuser +w 1 1 1 hu +w 1 1 1 hyper +w 1 1 1 i +w 1 1 1 ia +w 1 1 1 ibm +w 1 1 1 icat +w 1 1 1 icon +w 1 1 1 icons +w 1 1 1 id +w 1 1 1 idea +w 1 1 1 ideas +w 1 1 1 ids +w 1 1 1 ie +w 1 1 1 iframe +w 1 1 1 ig +w 1 1 1 ignore +w 1 1 1 iisadmin +w 1 1 1 iisadmpwd +w 1 1 1 iissamples +w 1 1 1 image +w 1 1 1 imagefolio +w 1 1 1 images +w 1 1 1 img +w 1 1 1 imgs +w 1 1 1 imp +w 1 1 1 import +w 1 1 1 important +w 1 1 1 in +w 1 1 1 inbound +w 1 1 1 incl +w 1 1 1 include +w 1 1 1 includes +w 1 1 1 incoming +w 1 1 1 incubator +w 1 1 1 index +w 1 1 1 index1 +w 1 1 1 index2 +w 1 1 1 index_1 +w 1 1 1 index_2 +w 1 1 1 inetpub +w 1 1 1 inetsrv +w 1 1 1 inf +w 1 1 1 info +w 1 1 1 information +w 1 1 1 ingress +w 1 1 1 init +w 1 1 1 inline +w 1 1 1 input +w 1 1 1 inquire +w 1 1 1 inquiries +w 1 1 1 inquiry +w 1 1 1 insert +w 1 1 1 install +w 1 1 1 int +w 1 1 1 interim +w 1 1 1 intermediate +w 1 1 1 internal +w 1 1 1 international +w 1 1 1 internet +w 1 1 1 intl +w 1 1 1 intranet +w 1 1 1 intro +w 1 1 1 ip +w 1 1 1 ipc +w 1 1 1 ips +w 1 1 1 irc +w 1 1 1 is +w 1 1 1 isapi +w 1 1 1 iso +w 1 1 1 issues +w 1 1 1 it +w 1 1 1 item +w 1 1 1 j +w 1 1 1 j2ee +w 1 1 1 j2me +w 1 1 1 jakarta +w 1 1 1 java-plugin +w 1 1 1 javadoc +w 1 1 1 javascript +w 1 1 1 javax +w 1 1 1 jboss +w 1 1 1 jdbc +w 1 1 1 jigsaw +w 1 1 1 jj +w 1 1 1 jmx-console +w 1 1 1 job +w 1 1 1 jobs +w 1 1 1 joe +w 1 1 1 john +w 1 1 1 join +w 1 1 1 joomla +w 1 1 1 journal +w 1 1 1 jp +w 1 1 1 jpa +w 1 1 1 jre +w 1 1 1 jrun +w 1 1 1 json +w 1 1 1 jsso +w 1 1 1 jsx +w 1 1 1 juniper +w 1 1 1 junk +w 1 1 1 jvm +w 1 1 1 k +w 1 1 1 kboard +w 1 1 1 keep +w 1 1 1 kernel +w 1 1 1 keygen +w 1 1 1 keys +w 1 1 1 kids +w 1 1 1 kill +w 1 1 1 known_hosts +w 1 1 1 l +w 1 1 1 labs +w 1 1 1 lang +w 1 1 1 large +w 1 1 1 law +w 1 1 1 layout +w 1 1 1 layouts +w 1 1 1 ldap +w 1 1 1 leader +w 1 1 1 leaders +w 1 1 1 left +w 1 1 1 legacy +w 1 1 1 legal +w 1 1 1 lenya +w 1 1 1 letters +w 1 1 1 level +w 1 1 1 lg +w 1 1 1 library +w 1 1 1 libs +w 1 1 1 license +w 1 1 1 licenses +w 1 1 1 line +w 1 1 1 link +w 1 1 1 links +w 1 1 1 linux +w 1 1 1 list +w 1 1 1 listinfo +w 1 1 1 lists +w 1 1 1 live +w 1 1 1 lo +w 1 1 1 loader +w 1 1 1 loading +w 1 1 1 loc +w 1 1 1 local +w 1 1 1 location +w 1 1 1 lock +w 1 1 1 locked +w 1 1 1 log4j +w 1 1 1 logfile +w 1 1 1 logging +w 1 1 1 login +w 1 1 1 logins +w 1 1 1 logo +w 1 1 1 logoff +w 1 1 1 logon +w 1 1 1 logos +w 1 1 1 logout +w 1 1 1 logs +w 1 1 1 lost+found +w 1 1 1 low +w 1 1 1 ls +w 1 1 1 lucene +w 1 1 1 m +w 1 1 1 mac +w 1 1 1 mail +w 1 1 1 mailer +w 1 1 1 mailing +w 1 1 1 mailman +w 1 1 1 mails +w 1 1 1 main +w 1 1 1 manage +w 1 1 1 management +w 1 1 1 manager +w 1 1 1 manual +w 1 1 1 manuals +w 1 1 1 map +w 1 1 1 maps +w 1 1 1 mark +w 1 1 1 marketing +w 1 1 1 master +w 1 1 1 master.passwd +w 1 1 1 match +w 1 1 1 matrix +w 1 1 1 maven +w 1 1 1 mbox +w 1 1 1 me +w 1 1 1 media +w 1 1 1 medium +w 1 1 1 mem +w 1 1 1 member +w 1 1 1 members +w 1 1 1 membership +w 1 1 1 memory +w 1 1 1 menu +w 1 1 1 messaging +w 1 1 1 microsoft +w 1 1 1 migrate +w 1 1 1 migration +w 1 1 1 mina +w 1 1 1 mirror +w 1 1 1 mirrors +w 1 1 1 misc +w 1 1 1 mission +w 1 1 1 mix +w 1 1 1 mms +w 1 1 1 mobi +w 1 1 1 mobile +w 1 1 1 mock +w 1 1 1 mod +w 1 1 1 modify +w 1 1 1 mods +w 1 1 1 module +w 1 1 1 modules +w 1 1 1 mojo +w 1 1 1 money +w 1 1 1 monitoring +w 1 1 1 more +w 1 1 1 move +w 1 1 1 movie +w 1 1 1 movies +w 1 1 1 mp +w 1 1 1 mp3 +w 1 1 1 mp3s +w 1 1 1 ms +w 1 1 1 ms-sql +w 1 1 1 msadc +w 1 1 1 msadm +w 1 1 1 msie +w 1 1 1 msql +w 1 1 1 mssql +w 1 1 1 mta +w 1 1 1 multimedia +w 1 1 1 music +w 1 1 1 mx +w 1 1 1 my +w 1 1 1 myfaces +w 1 1 1 myphpnuke +w 1 1 1 mysql +w 1 1 1 mysqld +w 1 1 1 n +w 1 1 1 nav +w 1 1 1 navigation +w 1 1 1 net +w 1 1 1 netbsd +w 1 1 1 nethome +w 1 1 1 nets +w 1 1 1 network +w 1 1 1 networking +w 1 1 1 new +w 1 1 1 news +w 1 1 1 newsletter +w 1 1 1 newsletters +w 1 1 1 next +w 1 1 1 nfs +w 1 1 1 nice +w 1 1 1 nl +w 1 1 1 nobody +w 1 1 1 node +w 1 1 1 none +w 1 1 1 note +w 1 1 1 notes +w 1 1 1 notification +w 1 1 1 notifications +w 1 1 1 notified +w 1 1 1 notify +w 1 1 1 ns +w 1 1 1 nuke +w 1 1 1 nul +w 1 1 1 null +w 1 1 1 oa_servlets +w 1 1 1 oauth +w 1 1 1 obdc +w 1 1 1 obsolete +w 1 1 1 obsoleted +w 1 1 1 odbc +w 1 1 1 ode +w 1 1 1 oem +w 1 1 1 ofbiz +w 1 1 1 office +w 1 1 1 onbound +w 1 1 1 online +w 1 1 1 op +w 1 1 1 open +w 1 1 1 openbsd +w 1 1 1 opendir +w 1 1 1 openejb +w 1 1 1 openjpa +w 1 1 1 operations +w 1 1 1 opinion +w 1 1 1 oprocmgr-status +w 1 1 1 opt +w 1 1 1 option +w 1 1 1 options +w 1 1 1 oracle +w 1 1 1 oracle.xml.xsql.XSQLServlet +w 1 1 1 order +w 1 1 1 ordered +w 1 1 1 orders +w 1 1 1 org +w 1 1 1 osc +w 1 1 1 oscommerce +w 1 1 1 other +w 1 1 1 outgoing +w 1 1 1 outline +w 1 1 1 output +w 1 1 1 outreach +w 1 1 1 overview +w 1 1 1 owa +w 1 1 1 ows +w 1 1 1 ows-bin +w 1 1 1 p +w 1 1 1 p2p +w 1 1 1 pack +w 1 1 1 packages +w 1 1 1 page +w 1 1 1 page1 +w 1 1 1 page2 +w 1 1 1 page_1 +w 1 1 1 page_2 +w 1 1 1 pages +w 1 1 1 paid +w 1 1 1 panel +w 1 1 1 paper +w 1 1 1 papers +w 1 1 1 parse +w 1 1 1 partner +w 1 1 1 partners +w 1 1 1 party +w 1 1 1 pass +w 1 1 1 passwd +w 1 1 1 password +w 1 1 1 passwords +w 1 1 1 past +w 1 1 1 patch +w 1 1 1 patches +w 1 1 1 paypal +w 1 1 1 pc +w 1 1 1 pci +w 1 1 1 pda +w 1 1 1 pdfs +w 1 1 1 peek +w 1 1 1 pending +w 1 1 1 people +w 1 1 1 perf +w 1 1 1 performance +w 1 1 1 perl +w 1 1 1 personal +w 1 1 1 pg +w 1 1 1 phf +w 1 1 1 phone +w 1 1 1 phones +w 1 1 1 phorum +w 1 1 1 photo +w 1 1 1 photos +w 1 1 1 phpBB +w 1 1 1 phpBB2 +w 1 1 1 phpEventCalendar +w 1 1 1 phpMyAdmin +w 1 1 1 phpbb +w 1 1 1 phpmyadmin +w 1 1 1 phpnuke +w 1 1 1 phps +w 1 1 1 pic +w 1 1 1 pics +w 1 1 1 pictures +w 1 1 1 pii +w 1 1 1 ping +w 1 1 1 pipermail +w 1 1 1 piranha +w 1 1 1 pix +w 1 1 1 pixel +w 1 1 1 pkg +w 1 1 1 pkgs +w 1 1 1 plain +w 1 1 1 play +w 1 1 1 pls +w 1 1 1 plugin +w 1 1 1 plugins +w 1 1 1 pm +w 1 1 1 poi +w 1 1 1 policies +w 1 1 1 policy +w 1 1 1 politics +w 1 1 1 poll +w 1 1 1 polls +w 1 1 1 pop +w 1 1 1 pop3 +w 1 1 1 porn +w 1 1 1 port +w 1 1 1 portal +w 1 1 1 portals +w 1 1 1 portfolio +w 1 1 1 pos +w 1 1 1 post +w 1 1 1 posted +w 1 1 1 postgres +w 1 1 1 postgresql +w 1 1 1 postnuke +w 1 1 1 postpaid +w 1 1 1 posts +w 1 1 1 pr +w 1 1 1 pr0n +w 1 1 1 premium +w 1 1 1 prepaid +w 1 1 1 presentation +w 1 1 1 presentations +w 1 1 1 preserve +w 1 1 1 press +w 1 1 1 preview +w 1 1 1 previews +w 1 1 1 previous +w 1 1 1 pricing +w 1 1 1 print +w 1 1 1 printenv +w 1 1 1 printer +w 1 1 1 printers +w 1 1 1 priv +w 1 1 1 privacy +w 1 1 1 private +w 1 1 1 problems +w 1 1 1 proc +w 1 1 1 procedures +w 1 1 1 prod +w 1 1 1 product +w 1 1 1 product_info +w 1 1 1 production +w 1 1 1 products +w 1 1 1 profile +w 1 1 1 profiles +w 1 1 1 profiling +w 1 1 1 program +w 1 1 1 programming +w 1 1 1 programs +w 1 1 1 project +w 1 1 1 projects +w 1 1 1 promo +w 1 1 1 prop +w 1 1 1 properties +w 1 1 1 property +w 1 1 1 props +w 1 1 1 protect +w 1 1 1 proto +w 1 1 1 proxies +w 1 1 1 proxy +w 1 1 1 prv +w 1 1 1 ps +w 1 1 1 psql +w 1 1 1 pt +w 1 1 1 pub +w 1 1 1 public +w 1 1 1 publication +w 1 1 1 publications +w 1 1 1 pubs +w 1 1 1 pull +w 1 1 1 purchase +w 1 1 1 purchases +w 1 1 1 purchasing +w 1 1 1 push +w 1 1 1 pw +w 1 1 1 pwd +w 1 1 1 python +w 1 1 1 q +w 1 1 1 qpid +w 1 1 1 queries +w 1 1 1 query +w 1 1 1 queue +w 1 1 1 queues +w 1 1 1 quote +w 1 1 1 quotes +w 1 1 1 r +w 1 1 1 radio +w 1 1 1 random +w 1 1 1 rdf +w 1 1 1 read +w 1 1 1 readme +w 1 1 1 realestate +w 1 1 1 receive +w 1 1 1 received +w 1 1 1 recharge +w 1 1 1 record +w 1 1 1 records +w 1 1 1 recovery +w 1 1 1 recycle +w 1 1 1 recycled +w 1 1 1 redir +w 1 1 1 redirect +w 1 1 1 reference +w 1 1 1 reg +w 1 1 1 register +w 1 1 1 registered +w 1 1 1 registration +w 1 1 1 registrations +w 1 1 1 release +w 1 1 1 releases +w 1 1 1 remote +w 1 1 1 remove +w 1 1 1 removed +w 1 1 1 render +w 1 1 1 rendered +w 1 1 1 rep +w 1 1 1 repl +w 1 1 1 replica +w 1 1 1 replicas +w 1 1 1 replicate +w 1 1 1 replicated +w 1 1 1 replication +w 1 1 1 replicator +w 1 1 1 reply +w 1 1 1 report +w 1 1 1 reporting +w 1 1 1 reports +w 1 1 1 reprints +w 1 1 1 req +w 1 1 1 reqs +w 1 1 1 request +w 1 1 1 requests +w 1 1 1 requisition +w 1 1 1 requisitions +w 1 1 1 res +w 1 1 1 research +w 1 1 1 resin +w 1 1 1 resource +w 1 1 1 resources +w 1 1 1 rest +w 1 1 1 restore +w 1 1 1 restored +w 1 1 1 restricted +w 1 1 1 results +w 1 1 1 retail +w 1 1 1 reverse +w 1 1 1 reversed +w 1 1 1 revert +w 1 1 1 reverted +w 1 1 1 review +w 1 1 1 reviews +w 1 1 1 right +w 1 1 1 roam +w 1 1 1 roaming +w 1 1 1 robot +w 1 1 1 robots +w 1 1 1 roller +w 1 1 1 room +w 1 1 1 root +w 1 1 1 rpc +w 1 1 1 ru +w 1 1 1 rule +w 1 1 1 rules +w 1 1 1 run +w 1 1 1 rwservlet +w 1 1 1 s +w 1 1 1 sale +w 1 1 1 sales +w 1 1 1 sam +w 1 1 1 samba +w 1 1 1 sample +w 1 1 1 samples +w 1 1 1 sav +w 1 1 1 saved +w 1 1 1 saves +w 1 1 1 sbin +w 1 1 1 scan +w 1 1 1 scanned +w 1 1 1 scans +w 1 1 1 sched +w 1 1 1 schedule +w 1 1 1 scheduled +w 1 1 1 scheduling +w 1 1 1 schema +w 1 1 1 science +w 1 1 1 screen +w 1 1 1 screens +w 1 1 1 screenshot +w 1 1 1 screenshots +w 1 1 1 script +w 1 1 1 scriptlet +w 1 1 1 scriptlets +w 1 1 1 scripts +w 1 1 1 sdk +w 1 1 1 se +w 1 1 1 search +w 1 1 1 sec +w 1 1 1 secret +w 1 1 1 section +w 1 1 1 sections +w 1 1 1 secure +w 1 1 1 secured +w 1 1 1 security +w 1 1 1 seed +w 1 1 1 sell +w 1 1 1 send +w 1 1 1 sendmail +w 1 1 1 sendto +w 1 1 1 sent +w 1 1 1 serial +w 1 1 1 serv +w 1 1 1 serve +w 1 1 1 server +w 1 1 1 server-info +w 1 1 1 server-status +w 1 1 1 servers +w 1 1 1 service +w 1 1 1 services +w 1 1 1 servlet +w 1 1 1 servlets +w 1 1 1 session +w 1 1 1 sessions +w 1 1 1 setting +w 1 1 1 settings +w 1 1 1 setup +w 1 1 1 share +w 1 1 1 shared +w 1 1 1 shares +w 1 1 1 shell +w 1 1 1 ship +w 1 1 1 shipped +w 1 1 1 shipping +w 1 1 1 shop +w 1 1 1 shopper +w 1 1 1 shopping +w 1 1 1 shops +w 1 1 1 shoutbox +w 1 1 1 show +w 1 1 1 show_post +w 1 1 1 show_thread +w 1 1 1 showcat +w 1 1 1 showenv +w 1 1 1 showjobs +w 1 1 1 showmap +w 1 1 1 showmsg +w 1 1 1 showpost +w 1 1 1 showthread +w 1 1 1 sign +w 1 1 1 signoff +w 1 1 1 signon +w 1 1 1 signup +w 1 1 1 simple +w 1 1 1 sink +w 1 1 1 site +w 1 1 1 site-map +w 1 1 1 site_map +w 1 1 1 sitemap +w 1 1 1 sites +w 1 1 1 skel +w 1 1 1 skin +w 1 1 1 skins +w 1 1 1 skip +w 1 1 1 sl +w 1 1 1 sling +w 1 1 1 sm +w 1 1 1 small +w 1 1 1 sms +w 1 1 1 smtp +w 1 1 1 snoop +w 1 1 1 soap +w 1 1 1 soaprouter +w 1 1 1 soft +w 1 1 1 software +w 1 1 1 solaris +w 1 1 1 sold +w 1 1 1 solution +w 1 1 1 solutions +w 1 1 1 source +w 1 1 1 sources +w 1 1 1 soutbox +w 1 1 1 sox +w 1 1 1 sp +w 1 1 1 space +w 1 1 1 spacer +w 1 1 1 spam +w 1 1 1 special +w 1 1 1 specials +w 1 1 1 sponsor +w 1 1 1 sponsors +w 1 1 1 spool +w 1 1 1 sport +w 1 1 1 sports +w 1 1 1 sqlnet +w 1 1 1 squirrel +w 1 1 1 squirrelmail +w 1 1 1 src +w 1 1 1 srv +w 1 1 1 ss +w 1 1 1 ssh +w 1 1 1 ssi +w 1 1 1 ssl +w 1 1 1 sslvpn +w 1 1 1 ssn +w 1 1 1 sso +w 1 1 1 staff +w 1 1 1 staging +w 1 1 1 standard +w 1 1 1 standards +w 1 1 1 star +w 1 1 1 start +w 1 1 1 stat +w 1 1 1 statement +w 1 1 1 statements +w 1 1 1 static +w 1 1 1 staticpages +w 1 1 1 statistic +w 1 1 1 statistics +w 1 1 1 stats +w 1 1 1 status +w 1 1 1 stock +w 1 1 1 storage +w 1 1 1 store +w 1 1 1 stored +w 1 1 1 stories +w 1 1 1 story +w 1 1 1 strut +w 1 1 1 struts +w 1 1 1 student +w 1 1 1 students +w 1 1 1 stuff +w 1 1 1 style +w 1 1 1 styles +w 1 1 1 submissions +w 1 1 1 submit +w 1 1 1 subscribe +w 1 1 1 subscriber +w 1 1 1 subscribers +w 1 1 1 subscription +w 1 1 1 subscriptions +w 1 1 1 success +w 1 1 1 suite +w 1 1 1 suites +w 1 1 1 sun +w 1 1 1 sunos +w 1 1 1 super +w 1 1 1 support +w 1 1 1 surf +w 1 1 1 survey +w 1 1 1 surveys +w 1 1 1 sws +w 1 1 1 synapse +w 1 1 1 sync +w 1 1 1 synced +w 1 1 1 sys +w 1 1 1 system +w 1 1 1 systems +w 1 1 1 sysuser +w 1 1 1 t +w 1 1 1 tag +w 1 1 1 tags +w 1 1 1 tape +w 1 1 1 tapes +w 1 1 1 tapestry +w 1 1 1 tb +w 1 1 1 tcl +w 1 1 1 team +w 1 1 1 tech +w 1 1 1 technical +w 1 1 1 technology +w 1 1 1 tel +w 1 1 1 tele +w 1 1 1 templ +w 1 1 1 template +w 1 1 1 templates +w 1 1 1 terms +w 1 1 1 test-cgi +w 1 1 1 test-env +w 1 1 1 test1 +w 1 1 1 test123 +w 1 1 1 test1234 +w 1 1 1 test2 +w 1 1 1 test3 +w 1 1 1 testimonial +w 1 1 1 testimonials +w 1 1 1 testing +w 1 1 1 tests +w 1 1 1 texis +w 1 1 1 text +w 1 1 1 texts +w 1 1 1 theme +w 1 1 1 themes +w 1 1 1 thread +w 1 1 1 threads +w 1 1 1 thumb +w 1 1 1 thumbnail +w 1 1 1 thumbnails +w 1 1 1 thumbs +w 1 1 1 tickets +w 1 1 1 tiki +w 1 1 1 tiles +w 1 1 1 tip +w 1 1 1 tips +w 1 1 1 title +w 1 1 1 tls +w 1 1 1 tmpl +w 1 1 1 tmps +w 1 1 1 tn +w 1 1 1 toc +w 1 1 1 todo +w 1 1 1 toggle +w 1 1 1 tomcat +w 1 1 1 tool +w 1 1 1 toolbar +w 1 1 1 toolkit +w 1 1 1 tools +w 1 1 1 top +w 1 1 1 topic +w 1 1 1 topics +w 1 1 1 torrent +w 1 1 1 torrents +w 1 1 1 tos +w 1 1 1 tour +w 1 1 1 tpl +w 1 1 1 tpv +w 1 1 1 tr +w 1 1 1 traceroute +w 1 1 1 traces +w 1 1 1 track +w 1 1 1 trackback +w 1 1 1 tracker +w 1 1 1 trackers +w 1 1 1 tracking +w 1 1 1 tracks +w 1 1 1 traffic +w 1 1 1 trailer +w 1 1 1 trailers +w 1 1 1 training +w 1 1 1 trans +w 1 1 1 transparent +w 1 1 1 transport +w 1 1 1 trash +w 1 1 1 travel +w 1 1 1 treasury +w 1 1 1 tree +w 1 1 1 trees +w 1 1 1 trial +w 1 1 1 trunk +w 1 1 1 tsweb +w 1 1 1 tt +w 1 1 1 turbine +w 1 1 1 tuscany +w 1 1 1 tutorial +w 1 1 1 tutorials +w 1 1 1 tv +w 1 1 1 tweak +w 1 1 1 type +w 1 1 1 typo3 +w 1 1 1 typo3conf +w 1 1 1 u +w 1 1 1 ubb +w 1 1 1 uds +w 1 1 1 uk +w 1 1 1 umts +w 1 1 1 union +w 1 1 1 unix +w 1 1 1 unlock +w 1 1 1 unreg +w 1 1 1 unregister +w 1 1 1 up +w 1 1 1 upd +w 1 1 1 update +w 1 1 1 updated +w 1 1 1 updater +w 1 1 1 updates +w 1 1 1 upload +w 1 1 1 uploads +w 1 1 1 url +w 1 1 1 us +w 1 1 1 usa +w 1 1 1 usage +w 1 1 1 user +w 1 1 1 userlog +w 1 1 1 users +w 1 1 1 usr +w 1 1 1 util +w 1 1 1 utilities +w 1 1 1 utility +w 1 1 1 utils +w 1 1 1 v +w 1 1 1 v1 +w 1 1 1 v2 +w 1 1 1 var +w 1 1 1 vault +w 1 1 1 vector +w 1 1 1 velocity +w 1 1 1 vendor +w 1 1 1 ver +w 1 1 1 ver1 +w 1 1 1 ver2 +w 1 1 1 version +w 1 1 1 vfs +w 1 1 1 video +w 1 1 1 videos +w 1 1 1 view +w 1 1 1 view-source +w 1 1 1 viewcvs +w 1 1 1 viewforum +w 1 1 1 viewonline +w 1 1 1 views +w 1 1 1 viewsource +w 1 1 1 viewsvn +w 1 1 1 viewtopic +w 1 1 1 viewvc +w 1 1 1 virtual +w 1 1 1 vm +w 1 1 1 voip +w 1 1 1 vol +w 1 1 1 vpn +w 1 1 1 w +w 1 1 1 w3 +w 1 1 1 w3c +w 1 1 1 wa +w 1 1 1 wap +w 1 1 1 war +w 1 1 1 warez +w 1 1 1 way-board +w 1 1 1 wbboard +w 1 1 1 wc +w 1 1 1 weather +w 1 1 1 web +w 1 1 1 web-beans +w 1 1 1 web-console +w 1 1 1 webaccess +w 1 1 1 webadmin +w 1 1 1 webagent +w 1 1 1 webalizer +w 1 1 1 webapp +w 1 1 1 webb +w 1 1 1 webbbs +w 1 1 1 webboard +w 1 1 1 webcalendar +w 1 1 1 webcart +w 1 1 1 webcasts +w 1 1 1 webcgi +w 1 1 1 webchat +w 1 1 1 webdata +w 1 1 1 webdav +w 1 1 1 weblog +w 1 1 1 weblogic +w 1 1 1 weblogs +w 1 1 1 webmail +w 1 1 1 webplus +w 1 1 1 webshop +w 1 1 1 website +w 1 1 1 websphere +w 1 1 1 webstats +w 1 1 1 websvn +w 1 1 1 webwork +w 1 1 1 welcome +w 1 1 1 whitepapers +w 1 1 1 whois +w 1 1 1 whosonline +w 1 1 1 wicket +w 1 1 1 wiki +w 1 1 1 win +w 1 1 1 win32 +w 1 1 1 windows +w 1 1 1 winnt +w 1 1 1 wireless +w 1 1 1 wml +w 1 1 1 word +w 1 1 1 wordpress +w 1 1 1 work +w 1 1 1 working +w 1 1 1 world +w 1 1 1 wp +w 1 1 1 wp-content +w 1 1 1 wp-includes +w 1 1 1 wp-login +w 1 1 1 wrap +w 1 1 1 ws-client +w 1 1 1 ws_ftp +w 1 1 1 wtai +w 1 1 1 www +w 1 1 1 www-sql +w 1 1 1 www1 +w 1 1 1 www2 +w 1 1 1 www3 +w 1 1 1 wwwboard +w 1 1 1 wwwroot +w 1 1 1 wwwstats +w 1 1 1 wwwthreads +w 1 1 1 wwwuser +w 1 1 1 x +w 1 1 1 xalan +w 1 1 1 xerces +w 1 1 1 xhtml +w 1 1 1 xmlrpc +w 1 1 1 xslt +w 1 1 1 xsql +w 1 1 1 xxx +w 1 1 1 xyzzy +w 1 1 1 y +w 1 1 1 yahoo +w 1 1 1 youtube +w 1 1 1 yt +w 1 1 1 z +w 1 1 1 zboard +w 1 1 1 zend +w 1 1 1 zero +w 1 1 1 zipfiles +w 1 1 1 zips +w 1 1 1 zope +w 1 1 1 zorum +w 1 1 1 ~admin +w 1 1 1 ~apache +w 1 1 1 ~bin +w 1 1 1 ~bob +w 1 1 1 ~ftp +w 1 1 1 ~guest +w 1 1 1 ~http +w 1 1 1 ~httpd +w 1 1 1 ~john +w 1 1 1 ~log +w 1 1 1 ~logs +w 1 1 1 ~lp +w 1 1 1 ~mark +w 1 1 1 ~matt +w 1 1 1 ~nobody +w 1 1 1 ~root +w 1 1 1 ~test +w 1 1 1 ~tmp +w 1 1 1 ~www diff --git a/dictionaries/default.wl b/dictionaries/default.wl new file mode 100644 index 0000000..f14f950 --- /dev/null +++ b/dictionaries/default.wl @@ -0,0 +1,1893 @@ +e 1 1 1 asmx +e 1 1 1 asp +e 1 1 1 aspx +e 1 1 1 bak +e 1 1 1 bat +e 1 1 1 cc +e 1 1 1 cfg +e 1 1 1 cfm +e 1 1 1 cgi +e 1 1 1 class +e 1 1 1 cnf +e 1 1 1 conf +e 1 1 1 config +e 1 1 1 cpp +e 1 1 1 csv +e 1 1 1 dat +e 1 1 1 db +e 1 1 1 dll +e 1 1 1 err +e 1 1 1 error +e 1 1 1 exe +e 1 1 1 gz +e 1 1 1 htm +e 1 1 1 html +e 1 1 1 inc +e 1 1 1 ini +e 1 1 1 java +e 1 1 1 jhtml +e 1 1 1 js +e 1 1 1 jsf +e 1 1 1 jsp +e 1 1 1 key +e 1 1 1 log +e 1 1 1 mdb +e 1 1 1 nsf +e 1 1 1 old +e 1 1 1 ora +e 1 1 1 orig +e 1 1 1 out +e 1 1 1 part +e 1 1 1 php +e 1 1 1 php3 +e 1 1 1 pl +e 1 1 1 pm +e 1 1 1 py +e 1 1 1 rss +e 1 1 1 sh +e 1 1 1 shtml +e 1 1 1 sql +e 1 1 1 stackdump +e 1 1 1 tar.gz +e 1 1 1 temp +e 1 1 1 test +e 1 1 1 tgz +e 1 1 1 tmp +e 1 1 1 txt +e 1 1 1 vb +e 1 1 1 vbs +e 1 1 1 ws +e 1 1 1 xls +e 1 1 1 xml +e 1 1 1 xsl +e 1 1 1 zip +w 1 1 1 .bash_history +w 1 1 1 .bashrc +w 1 1 1 .cvsignore +w 1 1 1 .history +w 1 1 1 .htaccess +w 1 1 1 .htpasswd +w 1 1 1 .passwd +w 1 1 1 .perf +w 1 1 1 .ssh +w 1 1 1 .svn +w 1 1 1 .web +w 1 1 1 0 +w 1 1 1 00 +w 1 1 1 01 +w 1 1 1 02 +w 1 1 1 03 +w 1 1 1 04 +w 1 1 1 05 +w 1 1 1 06 +w 1 1 1 07 +w 1 1 1 08 +w 1 1 1 09 +w 1 1 1 1 +w 1 1 1 10 +w 1 1 1 100 +w 1 1 1 1000 +w 1 1 1 1001 +w 1 1 1 101 +w 1 1 1 11 +w 1 1 1 12 +w 1 1 1 13 +w 1 1 1 14 +w 1 1 1 15 +w 1 1 1 1990 +w 1 1 1 1991 +w 1 1 1 1992 +w 1 1 1 1993 +w 1 1 1 1994 +w 1 1 1 1995 +w 1 1 1 1996 +w 1 1 1 1997 +w 1 1 1 1998 +w 1 1 1 1999 +w 1 1 1 2 +w 1 1 1 20 +w 1 1 1 200 +w 1 1 1 2000 +w 1 1 1 2001 +w 1 1 1 2002 +w 1 1 1 2003 +w 1 1 1 2004 +w 1 1 1 2005 +w 1 1 1 2006 +w 1 1 1 2007 +w 1 1 1 2008 +w 1 1 1 2009 +w 1 1 1 2010 +w 1 1 1 2011 +w 1 1 1 2012 +w 1 1 1 21 +w 1 1 1 22 +w 1 1 1 23 +w 1 1 1 24 +w 1 1 1 25 +w 1 1 1 2g +w 1 1 1 3 +w 1 1 1 300 +w 1 1 1 3g +w 1 1 1 4 +w 1 1 1 42 +w 1 1 1 5 +w 1 1 1 50 +w 1 1 1 500 +w 1 1 1 51 +w 1 1 1 6 +w 1 1 1 7 +w 1 1 1 7z +w 1 1 1 8 +w 1 1 1 9 +w 1 1 1 ADM +w 1 1 1 ADMIN +w 1 1 1 AggreSpy +w 1 1 1 AppsLocalLogin +w 1 1 1 AppsLogin +w 1 1 1 BUILD +w 1 1 1 CMS +w 1 1 1 CVS +w 1 1 1 DB +w 1 1 1 DMSDump +w 1 1 1 Documents and Settings +w 1 1 1 Entries +w 1 1 1 FCKeditor +w 1 1 1 JMXSoapAdapter +w 1 1 1 LICENSE +w 1 1 1 MANIFEST.MF +w 1 1 1 META-INF +w 1 1 1 Makefile +w 1 1 1 OA +w 1 1 1 OAErrorDetailPage +w 1 1 1 OA_HTML +w 1 1 1 Program Files +w 1 1 1 README +w 1 1 1 Readme +w 1 1 1 Recycled +w 1 1 1 Root +w 1 1 1 SQL +w 1 1 1 SUNWmc +w 1 1 1 SiteScope +w 1 1 1 SiteServer +w 1 1 1 Spy +w 1 1 1 TEMP +w 1 1 1 TMP +w 1 1 1 TODO +w 1 1 1 Thumbs.db +w 1 1 1 WEB-INF +w 1 1 1 WS_FTP +w 1 1 1 XXX +w 1 1 1 _ +w 1 1 1 _adm +w 1 1 1 _admin +w 1 1 1 _files +w 1 1 1 _include +w 1 1 1 _js +w 1 1 1 _mem_bin +w 1 1 1 _old +w 1 1 1 _pages +w 1 1 1 _private +w 1 1 1 _res +w 1 1 1 _source +w 1 1 1 _src +w 1 1 1 _test +w 1 1 1 _vti_bin +w 1 1 1 _vti_cnf +w 1 1 1 _vti_pvt +w 1 1 1 _vti_txt +w 1 1 1 _www +w 1 1 1 a +w 1 1 1 aa +w 1 1 1 aaa +w 1 1 1 abc +w 1 1 1 abc123 +w 1 1 1 abcd +w 1 1 1 abcd1234 +w 1 1 1 about +w 1 1 1 access +w 1 1 1 access-log +w 1 1 1 access-log.1 +w 1 1 1 access.1 +w 1 1 1 access_log +w 1 1 1 access_log.1 +w 1 1 1 accessibility +w 1 1 1 account +w 1 1 1 accounting +w 1 1 1 accounts +w 1 1 1 action +w 1 1 1 actions +w 1 1 1 active +w 1 1 1 activex +w 1 1 1 ad +w 1 1 1 adclick +w 1 1 1 add +w 1 1 1 addressbook +w 1 1 1 adm +w 1 1 1 admin +w 1 1 1 admin_ +w 1 1 1 ads +w 1 1 1 adv +w 1 1 1 advertise +w 1 1 1 advertising +w 1 1 1 affiliate +w 1 1 1 affiliates +w 1 1 1 agenda +w 1 1 1 agent +w 1 1 1 agents +w 1 1 1 ajax +w 1 1 1 album +w 1 1 1 albums +w 1 1 1 alert +w 1 1 1 alerts +w 1 1 1 alias +w 1 1 1 aliases +w 1 1 1 all +w 1 1 1 alpha +w 1 1 1 alumni +w 1 1 1 analog +w 1 1 1 announcement +w 1 1 1 announcements +w 1 1 1 anon +w 1 1 1 anonymous +w 1 1 1 ansi +w 1 1 1 apac +w 1 1 1 apache +w 1 1 1 apexec +w 1 1 1 api +w 1 1 1 apis +w 1 1 1 app +w 1 1 1 appeal +w 1 1 1 appeals +w 1 1 1 append +w 1 1 1 appl +w 1 1 1 apple +w 1 1 1 appliation +w 1 1 1 applications +w 1 1 1 apps +w 1 1 1 apr +w 1 1 1 arch +w 1 1 1 archive +w 1 1 1 archives +w 1 1 1 array +w 1 1 1 art +w 1 1 1 article +w 1 1 1 articles +w 1 1 1 artwork +w 1 1 1 ascii +w 1 1 1 asdf +w 1 1 1 asset +w 1 1 1 assets +w 1 1 1 atom +w 1 1 1 attach +w 1 1 1 attachment +w 1 1 1 attachments +w 1 1 1 attachs +w 1 1 1 attic +w 1 1 1 audio +w 1 1 1 audit +w 1 1 1 audits +w 1 1 1 auth +w 1 1 1 author +w 1 1 1 authorized_keys +w 1 1 1 authors +w 1 1 1 auto +w 1 1 1 automatic +w 1 1 1 automation +w 1 1 1 avatar +w 1 1 1 avatars +w 1 1 1 award +w 1 1 1 awards +w 1 1 1 awl +w 1 1 1 awstats +w 1 1 1 b +w 1 1 1 b2b +w 1 1 1 b2c +w 1 1 1 back +w 1 1 1 backdoor +w 1 1 1 backend +w 1 1 1 backup +w 1 1 1 backups +w 1 1 1 bandwidth +w 1 1 1 bank +w 1 1 1 banks +w 1 1 1 banner +w 1 1 1 banners +w 1 1 1 bar +w 1 1 1 base +w 1 1 1 bash +w 1 1 1 basic +w 1 1 1 basket +w 1 1 1 baskets +w 1 1 1 batch +w 1 1 1 baz +w 1 1 1 bb +w 1 1 1 bb-hist +w 1 1 1 bb-histlog +w 1 1 1 bboard +w 1 1 1 bbs +w 1 1 1 beans +w 1 1 1 beehive +w 1 1 1 benefits +w 1 1 1 beta +w 1 1 1 bfc +w 1 1 1 big +w 1 1 1 bigip +w 1 1 1 bill +w 1 1 1 billing +w 1 1 1 bin +w 1 1 1 binaries +w 1 1 1 binary +w 1 1 1 bins +w 1 1 1 bio +w 1 1 1 bios +w 1 1 1 biz +w 1 1 1 bkup +w 1 1 1 blah +w 1 1 1 blank +w 1 1 1 blog +w 1 1 1 blogger +w 1 1 1 bloggers +w 1 1 1 blogs +w 1 1 1 board +w 1 1 1 bofh +w 1 1 1 book +w 1 1 1 books +w 1 1 1 boot +w 1 1 1 bottom +w 1 1 1 broken +w 1 1 1 broker +w 1 1 1 browse +w 1 1 1 bs +w 1 1 1 bsd +w 1 1 1 bugs +w 1 1 1 build +w 1 1 1 buildr +w 1 1 1 bulk +w 1 1 1 bullet +w 1 1 1 business +w 1 1 1 button +w 1 1 1 buttons +w 1 1 1 buy +w 1 1 1 buynow +w 1 1 1 bypass +w 1 1 1 bz2 +w 1 1 1 c +w 1 1 1 ca +w 1 1 1 cache +w 1 1 1 cal +w 1 1 1 calendar +w 1 1 1 camel +w 1 1 1 car +w 1 1 1 card +w 1 1 1 cards +w 1 1 1 career +w 1 1 1 careers +w 1 1 1 cars +w 1 1 1 cart +w 1 1 1 carts +w 1 1 1 cat +w 1 1 1 catalog +w 1 1 1 catalogs +w 1 1 1 catalyst +w 1 1 1 categories +w 1 1 1 category +w 1 1 1 catinfo +w 1 1 1 cats +w 1 1 1 ccbill +w 1 1 1 cd +w 1 1 1 cerificate +w 1 1 1 cert +w 1 1 1 certificate +w 1 1 1 certificates +w 1 1 1 certs +w 1 1 1 cf +w 1 1 1 cfcache +w 1 1 1 cfdocs +w 1 1 1 cfide +w 1 1 1 cfusion +w 1 1 1 cgi-bin +w 1 1 1 cgi-bin2 +w 1 1 1 cgi-home +w 1 1 1 cgi-local +w 1 1 1 cgi-pub +w 1 1 1 cgi-script +w 1 1 1 cgi-shl +w 1 1 1 cgi-sys +w 1 1 1 cgi-web +w 1 1 1 cgi-win +w 1 1 1 cgibin +w 1 1 1 cgiwrap +w 1 1 1 cgm-web +w 1 1 1 change +w 1 1 1 changed +w 1 1 1 changes +w 1 1 1 charge +w 1 1 1 charges +w 1 1 1 chat +w 1 1 1 chats +w 1 1 1 checkout +w 1 1 1 child +w 1 1 1 children +w 1 1 1 cisco +w 1 1 1 cisweb +w 1 1 1 citrix +w 1 1 1 cl +w 1 1 1 claim +w 1 1 1 claims +w 1 1 1 classes +w 1 1 1 classified +w 1 1 1 classifieds +w 1 1 1 clear +w 1 1 1 click +w 1 1 1 clicks +w 1 1 1 client +w 1 1 1 clientaccesspolicy +w 1 1 1 clients +w 1 1 1 close +w 1 1 1 closed +w 1 1 1 closing +w 1 1 1 club +w 1 1 1 cluster +w 1 1 1 clusters +w 1 1 1 cmd +w 1 1 1 cms +w 1 1 1 cnf +w 1 1 1 cnt +w 1 1 1 cocoon +w 1 1 1 code +w 1 1 1 codec +w 1 1 1 codecs +w 1 1 1 codes +w 1 1 1 cognos +w 1 1 1 coldfusion +w 1 1 1 columns +w 1 1 1 com +w 1 1 1 comment +w 1 1 1 comments +w 1 1 1 commerce +w 1 1 1 commercial +w 1 1 1 common +w 1 1 1 communicator +w 1 1 1 community +w 1 1 1 compact +w 1 1 1 company +w 1 1 1 complaint +w 1 1 1 complaints +w 1 1 1 compliance +w 1 1 1 component +w 1 1 1 compressed +w 1 1 1 computer +w 1 1 1 computers +w 1 1 1 computing +w 1 1 1 conference +w 1 1 1 conferences +w 1 1 1 configs +w 1 1 1 console +w 1 1 1 consumer +w 1 1 1 contact +w 1 1 1 contacts +w 1 1 1 content +w 1 1 1 contents +w 1 1 1 contract +w 1 1 1 contracts +w 1 1 1 control +w 1 1 1 controlpanel +w 1 1 1 cookie +w 1 1 1 cookies +w 1 1 1 copies +w 1 1 1 copy +w 1 1 1 copyright +w 1 1 1 core +w 1 1 1 corp +w 1 1 1 corpo +w 1 1 1 corporate +w 1 1 1 corrections +w 1 1 1 count +w 1 1 1 counter +w 1 1 1 counters +w 1 1 1 counts +w 1 1 1 course +w 1 1 1 courses +w 1 1 1 cover +w 1 1 1 cpanel +w 1 1 1 cr +w 1 1 1 crack +w 1 1 1 crash +w 1 1 1 crashes +w 1 1 1 create +w 1 1 1 credits +w 1 1 1 crm +w 1 1 1 cron +w 1 1 1 crons +w 1 1 1 crontab +w 1 1 1 crontabs +w 1 1 1 crossdomain +w 1 1 1 crypt +w 1 1 1 crypto +w 1 1 1 cs +w 1 1 1 css +w 1 1 1 current +w 1 1 1 custom +w 1 1 1 custom-log +w 1 1 1 custom_log +w 1 1 1 customer +w 1 1 1 customers +w 1 1 1 cv +w 1 1 1 cxf +w 1 1 1 czcmdcvt +w 1 1 1 d +w 1 1 1 daemon +w 1 1 1 daily +w 1 1 1 dana-na +w 1 1 1 data +w 1 1 1 database +w 1 1 1 databases +w 1 1 1 date +w 1 1 1 dba +w 1 1 1 dbase +w 1 1 1 dbman +w 1 1 1 dc +w 1 1 1 dcforum +w 1 1 1 de +w 1 1 1 dealer +w 1 1 1 debug +w 1 1 1 decl +w 1 1 1 declaration +w 1 1 1 declarations +w 1 1 1 decode +w 1 1 1 decrypt +w 1 1 1 def +w 1 1 1 default +w 1 1 1 defaults +w 1 1 1 definition +w 1 1 1 definitions +w 1 1 1 del +w 1 1 1 delete +w 1 1 1 deleted +w 1 1 1 demo +w 1 1 1 demos +w 1 1 1 denied +w 1 1 1 deny +w 1 1 1 design +w 1 1 1 desktop +w 1 1 1 desktops +w 1 1 1 detail +w 1 1 1 details +w 1 1 1 dev +w 1 1 1 devel +w 1 1 1 developer +w 1 1 1 developers +w 1 1 1 development +w 1 1 1 device +w 1 1 1 devices +w 1 1 1 devs +w 1 1 1 df +w 1 1 1 dialog +w 1 1 1 dialogs +w 1 1 1 diff +w 1 1 1 diffs +w 1 1 1 digest +w 1 1 1 digg +w 1 1 1 dir +w 1 1 1 directories +w 1 1 1 directory +w 1 1 1 dirs +w 1 1 1 disabled +w 1 1 1 disclaimer +w 1 1 1 display +w 1 1 1 django +w 1 1 1 dl +w 1 1 1 dm +w 1 1 1 dm-config +w 1 1 1 dms +w 1 1 1 dms0 +w 1 1 1 dns +w 1 1 1 do +w 1 1 1 doc +w 1 1 1 dock +w 1 1 1 docroot +w 1 1 1 docs +w 1 1 1 document +w 1 1 1 documentation +w 1 1 1 documents +w 1 1 1 domain +w 1 1 1 domains +w 1 1 1 down +w 1 1 1 download +w 1 1 1 downloads +w 1 1 1 drop +w 1 1 1 dropped +w 1 1 1 drupal +w 1 1 1 dummy +w 1 1 1 dump +w 1 1 1 dumps +w 1 1 1 dvd +w 1 1 1 dwr +w 1 1 1 dynamic +w 1 1 1 e +w 1 1 1 e2fs +w 1 1 1 ear +w 1 1 1 ecommerce +w 1 1 1 edge +w 1 1 1 edit +w 1 1 1 editor +w 1 1 1 edits +w 1 1 1 edu +w 1 1 1 education +w 1 1 1 ee +w 1 1 1 effort +w 1 1 1 efforts +w 1 1 1 egress +w 1 1 1 ejb +w 1 1 1 element +w 1 1 1 elements +w 1 1 1 em +w 1 1 1 email +w 1 1 1 emails +w 1 1 1 emea +w 1 1 1 employees +w 1 1 1 employment +w 1 1 1 empty +w 1 1 1 emu +w 1 1 1 emulator +w 1 1 1 en +w 1 1 1 en_US +w 1 1 1 encode +w 1 1 1 encrypt +w 1 1 1 eng +w 1 1 1 engine +w 1 1 1 english +w 1 1 1 enterprise +w 1 1 1 entertainment +w 1 1 1 entries +w 1 1 1 entry +w 1 1 1 env +w 1 1 1 environ +w 1 1 1 environment +w 1 1 1 ep +w 1 1 1 error +w 1 1 1 error-log +w 1 1 1 error_log +w 1 1 1 errors +w 1 1 1 es +w 1 1 1 esale +w 1 1 1 esales +w 1 1 1 etc +w 1 1 1 europe +w 1 1 1 event +w 1 1 1 events +w 1 1 1 evil +w 1 1 1 evt +w 1 1 1 ews +w 1 1 1 ex +w 1 1 1 example +w 1 1 1 examples +w 1 1 1 excalibur +w 1 1 1 exchange +w 1 1 1 exec +w 1 1 1 export +w 1 1 1 ext +w 1 1 1 ext2 +w 1 1 1 extern +w 1 1 1 external +w 1 1 1 ezshopper +w 1 1 1 f +w 1 1 1 face +w 1 1 1 faces +w 1 1 1 faculty +w 1 1 1 fail +w 1 1 1 failure +w 1 1 1 family +w 1 1 1 faq +w 1 1 1 faqs +w 1 1 1 fcgi-bin +w 1 1 1 feature +w 1 1 1 features +w 1 1 1 feed +w 1 1 1 feedback +w 1 1 1 feeds +w 1 1 1 felix +w 1 1 1 field +w 1 1 1 fields +w 1 1 1 file +w 1 1 1 fileadmin +w 1 1 1 files +w 1 1 1 filez +w 1 1 1 finance +w 1 1 1 financial +w 1 1 1 find +w 1 1 1 finger +w 1 1 1 firewall +w 1 1 1 fixed +w 1 1 1 flags +w 1 1 1 flash +w 1 1 1 flow +w 1 1 1 flows +w 1 1 1 flv +w 1 1 1 fn +w 1 1 1 folder +w 1 1 1 folders +w 1 1 1 font +w 1 1 1 fonts +w 1 1 1 foo +w 1 1 1 footer +w 1 1 1 footers +w 1 1 1 form +w 1 1 1 formatting +w 1 1 1 formmail +w 1 1 1 forms +w 1 1 1 forrest +w 1 1 1 fortune +w 1 1 1 forum +w 1 1 1 forum1 +w 1 1 1 forum2 +w 1 1 1 forumdisplay +w 1 1 1 forums +w 1 1 1 forward +w 1 1 1 foto +w 1 1 1 foundation +w 1 1 1 fr +w 1 1 1 frame +w 1 1 1 frames +w 1 1 1 framework +w 1 1 1 free +w 1 1 1 freebsd +w 1 1 1 friend +w 1 1 1 friends +w 1 1 1 frob +w 1 1 1 frontend +w 1 1 1 fs +w 1 1 1 ftp +w 1 1 1 fuck +w 1 1 1 fuckoff +w 1 1 1 fuckyou +w 1 1 1 full +w 1 1 1 fun +w 1 1 1 func +w 1 1 1 funcs +w 1 1 1 function +w 1 1 1 functions +w 1 1 1 fusion +w 1 1 1 fw +w 1 1 1 g +w 1 1 1 galleries +w 1 1 1 gallery +w 1 1 1 game +w 1 1 1 games +w 1 1 1 ganglia +w 1 1 1 garbage +w 1 1 1 gateway +w 1 1 1 gb +w 1 1 1 geeklog +w 1 1 1 general +w 1 1 1 geronimo +w 1 1 1 get +w 1 1 1 getaccess +w 1 1 1 getjobid +w 1 1 1 gfx +w 1 1 1 gif +w 1 1 1 gitweb +w 1 1 1 glimpse +w 1 1 1 global +w 1 1 1 globals +w 1 1 1 glossary +w 1 1 1 go +w 1 1 1 goaway +w 1 1 1 google +w 1 1 1 government +w 1 1 1 gprs +w 1 1 1 grant +w 1 1 1 grants +w 1 1 1 graphics +w 1 1 1 group +w 1 1 1 groupcp +w 1 1 1 groups +w 1 1 1 gsm +w 1 1 1 guest +w 1 1 1 guestbook +w 1 1 1 guests +w 1 1 1 guide +w 1 1 1 guides +w 1 1 1 gump +w 1 1 1 gwt +w 1 1 1 h +w 1 1 1 hack +w 1 1 1 hacker +w 1 1 1 hacking +w 1 1 1 hackme +w 1 1 1 hadoop +w 1 1 1 hardcore +w 1 1 1 hardware +w 1 1 1 harmony +w 1 1 1 head +w 1 1 1 header +w 1 1 1 headers +w 1 1 1 health +w 1 1 1 hello +w 1 1 1 help +w 1 1 1 helper +w 1 1 1 helpers +w 1 1 1 hi +w 1 1 1 hidden +w 1 1 1 hide +w 1 1 1 high +w 1 1 1 hipaa +w 1 1 1 history +w 1 1 1 hit +w 1 1 1 hits +w 1 1 1 hole +w 1 1 1 home +w 1 1 1 homepage +w 1 1 1 hop +w 1 1 1 horde +w 1 1 1 hosting +w 1 1 1 hosts +w 1 1 1 howto +w 1 1 1 hp +w 1 1 1 hr +w 1 1 1 hta +w 1 1 1 htbin +w 1 1 1 htdoc +w 1 1 1 htdocs +w 1 1 1 htpasswd +w 1 1 1 http +w 1 1 1 httpd +w 1 1 1 https +w 1 1 1 httpuser +w 1 1 1 hu +w 1 1 1 hyper +w 1 1 1 i +w 1 1 1 ia +w 1 1 1 ibm +w 1 1 1 icat +w 1 1 1 icon +w 1 1 1 icons +w 1 1 1 id +w 1 1 1 idea +w 1 1 1 ideas +w 1 1 1 ids +w 1 1 1 ie +w 1 1 1 iframe +w 1 1 1 ig +w 1 1 1 ignore +w 1 1 1 iisadmin +w 1 1 1 iisadmpwd +w 1 1 1 iissamples +w 1 1 1 image +w 1 1 1 imagefolio +w 1 1 1 images +w 1 1 1 img +w 1 1 1 imgs +w 1 1 1 imp +w 1 1 1 import +w 1 1 1 important +w 1 1 1 in +w 1 1 1 inbound +w 1 1 1 incl +w 1 1 1 include +w 1 1 1 includes +w 1 1 1 incoming +w 1 1 1 incubator +w 1 1 1 index +w 1 1 1 index1 +w 1 1 1 index2 +w 1 1 1 index_1 +w 1 1 1 index_2 +w 1 1 1 inetpub +w 1 1 1 inetsrv +w 1 1 1 inf +w 1 1 1 info +w 1 1 1 information +w 1 1 1 ingress +w 1 1 1 init +w 1 1 1 inline +w 1 1 1 input +w 1 1 1 inquire +w 1 1 1 inquiries +w 1 1 1 inquiry +w 1 1 1 insert +w 1 1 1 install +w 1 1 1 int +w 1 1 1 interim +w 1 1 1 intermediate +w 1 1 1 internal +w 1 1 1 international +w 1 1 1 internet +w 1 1 1 intl +w 1 1 1 intranet +w 1 1 1 intro +w 1 1 1 ip +w 1 1 1 ipc +w 1 1 1 ips +w 1 1 1 irc +w 1 1 1 is +w 1 1 1 isapi +w 1 1 1 iso +w 1 1 1 issues +w 1 1 1 it +w 1 1 1 item +w 1 1 1 j +w 1 1 1 j2ee +w 1 1 1 j2me +w 1 1 1 jakarta +w 1 1 1 java-plugin +w 1 1 1 javadoc +w 1 1 1 javascript +w 1 1 1 javax +w 1 1 1 jboss +w 1 1 1 jdbc +w 1 1 1 jigsaw +w 1 1 1 jj +w 1 1 1 jmx-console +w 1 1 1 job +w 1 1 1 jobs +w 1 1 1 joe +w 1 1 1 john +w 1 1 1 join +w 1 1 1 joomla +w 1 1 1 journal +w 1 1 1 jp +w 1 1 1 jpa +w 1 1 1 jpg +w 1 1 1 jre +w 1 1 1 jrun +w 1 1 1 json +w 1 1 1 jsso +w 1 1 1 jsx +w 1 1 1 juniper +w 1 1 1 junk +w 1 1 1 jvm +w 1 1 1 k +w 1 1 1 kboard +w 1 1 1 keep +w 1 1 1 kernel +w 1 1 1 keygen +w 1 1 1 keys +w 1 1 1 kids +w 1 1 1 kill +w 1 1 1 known_hosts +w 1 1 1 l +w 1 1 1 labs +w 1 1 1 lang +w 1 1 1 large +w 1 1 1 law +w 1 1 1 layout +w 1 1 1 layouts +w 1 1 1 ldap +w 1 1 1 leader +w 1 1 1 leaders +w 1 1 1 left +w 1 1 1 legacy +w 1 1 1 legal +w 1 1 1 lenya +w 1 1 1 letters +w 1 1 1 level +w 1 1 1 lg +w 1 1 1 lib +w 1 1 1 library +w 1 1 1 libs +w 1 1 1 license +w 1 1 1 licenses +w 1 1 1 line +w 1 1 1 link +w 1 1 1 links +w 1 1 1 linux +w 1 1 1 list +w 1 1 1 listinfo +w 1 1 1 lists +w 1 1 1 live +w 1 1 1 lo +w 1 1 1 loader +w 1 1 1 loading +w 1 1 1 loc +w 1 1 1 local +w 1 1 1 location +w 1 1 1 lock +w 1 1 1 locked +w 1 1 1 log4j +w 1 1 1 logfile +w 1 1 1 logging +w 1 1 1 login +w 1 1 1 logins +w 1 1 1 logo +w 1 1 1 logoff +w 1 1 1 logon +w 1 1 1 logos +w 1 1 1 logout +w 1 1 1 logs +w 1 1 1 lost+found +w 1 1 1 low +w 1 1 1 ls +w 1 1 1 lst +w 1 1 1 lucene +w 1 1 1 m +w 1 1 1 mac +w 1 1 1 mail +w 1 1 1 mailer +w 1 1 1 mailing +w 1 1 1 mailman +w 1 1 1 mails +w 1 1 1 main +w 1 1 1 manage +w 1 1 1 management +w 1 1 1 manager +w 1 1 1 manifest +w 1 1 1 manual +w 1 1 1 manuals +w 1 1 1 map +w 1 1 1 maps +w 1 1 1 mark +w 1 1 1 marketing +w 1 1 1 master +w 1 1 1 master.passwd +w 1 1 1 match +w 1 1 1 matrix +w 1 1 1 maven +w 1 1 1 mbox +w 1 1 1 me +w 1 1 1 media +w 1 1 1 medium +w 1 1 1 mem +w 1 1 1 member +w 1 1 1 members +w 1 1 1 membership +w 1 1 1 memory +w 1 1 1 menu +w 1 1 1 messaging +w 1 1 1 meta +w 1 1 1 microsoft +w 1 1 1 migrate +w 1 1 1 migration +w 1 1 1 mina +w 1 1 1 mirror +w 1 1 1 mirrors +w 1 1 1 misc +w 1 1 1 mission +w 1 1 1 mix +w 1 1 1 mms +w 1 1 1 mobi +w 1 1 1 mobile +w 1 1 1 mock +w 1 1 1 mod +w 1 1 1 modify +w 1 1 1 mods +w 1 1 1 module +w 1 1 1 modules +w 1 1 1 mojo +w 1 1 1 money +w 1 1 1 monitoring +w 1 1 1 more +w 1 1 1 move +w 1 1 1 movie +w 1 1 1 movies +w 1 1 1 mp +w 1 1 1 mp3 +w 1 1 1 mp3s +w 1 1 1 ms +w 1 1 1 ms-sql +w 1 1 1 msadc +w 1 1 1 msadm +w 1 1 1 msg +w 1 1 1 msie +w 1 1 1 msql +w 1 1 1 mssql +w 1 1 1 mta +w 1 1 1 multimedia +w 1 1 1 music +w 1 1 1 mx +w 1 1 1 my +w 1 1 1 myfaces +w 1 1 1 myphpnuke +w 1 1 1 mysql +w 1 1 1 mysqld +w 1 1 1 n +w 1 1 1 nav +w 1 1 1 navigation +w 1 1 1 net +w 1 1 1 netbsd +w 1 1 1 nethome +w 1 1 1 nets +w 1 1 1 network +w 1 1 1 networking +w 1 1 1 new +w 1 1 1 news +w 1 1 1 newsletter +w 1 1 1 newsletters +w 1 1 1 next +w 1 1 1 nfs +w 1 1 1 nice +w 1 1 1 nl +w 1 1 1 nobody +w 1 1 1 node +w 1 1 1 none +w 1 1 1 note +w 1 1 1 notes +w 1 1 1 notification +w 1 1 1 notifications +w 1 1 1 notified +w 1 1 1 notify +w 1 1 1 ns +w 1 1 1 nuke +w 1 1 1 nul +w 1 1 1 null +w 1 1 1 o +w 1 1 1 oa_servlets +w 1 1 1 oauth +w 1 1 1 obdc +w 1 1 1 obsolete +w 1 1 1 obsoleted +w 1 1 1 odbc +w 1 1 1 ode +w 1 1 1 oem +w 1 1 1 ofbiz +w 1 1 1 office +w 1 1 1 onbound +w 1 1 1 online +w 1 1 1 op +w 1 1 1 open +w 1 1 1 openbsd +w 1 1 1 opendir +w 1 1 1 openejb +w 1 1 1 openjpa +w 1 1 1 operations +w 1 1 1 opinion +w 1 1 1 oprocmgr-status +w 1 1 1 opt +w 1 1 1 option +w 1 1 1 options +w 1 1 1 oracle +w 1 1 1 oracle.xml.xsql.XSQLServlet +w 1 1 1 order +w 1 1 1 ordered +w 1 1 1 orders +w 1 1 1 org +w 1 1 1 osc +w 1 1 1 oscommerce +w 1 1 1 other +w 1 1 1 outgoing +w 1 1 1 outline +w 1 1 1 output +w 1 1 1 outreach +w 1 1 1 overview +w 1 1 1 owa +w 1 1 1 ows +w 1 1 1 ows-bin +w 1 1 1 p +w 1 1 1 p2p +w 1 1 1 pack +w 1 1 1 packages +w 1 1 1 page +w 1 1 1 page1 +w 1 1 1 page2 +w 1 1 1 page_1 +w 1 1 1 page_2 +w 1 1 1 pages +w 1 1 1 paid +w 1 1 1 panel +w 1 1 1 paper +w 1 1 1 papers +w 1 1 1 parse +w 1 1 1 partner +w 1 1 1 partners +w 1 1 1 party +w 1 1 1 pass +w 1 1 1 passwd +w 1 1 1 password +w 1 1 1 passwords +w 1 1 1 past +w 1 1 1 patch +w 1 1 1 patches +w 1 1 1 paypal +w 1 1 1 pc +w 1 1 1 pci +w 1 1 1 pda +w 1 1 1 pdf +w 1 1 1 pdfs +w 1 1 1 peek +w 1 1 1 pending +w 1 1 1 people +w 1 1 1 perf +w 1 1 1 performance +w 1 1 1 perl +w 1 1 1 personal +w 1 1 1 pg +w 1 1 1 phf +w 1 1 1 phone +w 1 1 1 phones +w 1 1 1 phorum +w 1 1 1 photo +w 1 1 1 photos +w 1 1 1 phpBB +w 1 1 1 phpBB2 +w 1 1 1 phpEventCalendar +w 1 1 1 phpMyAdmin +w 1 1 1 phpbb +w 1 1 1 phpmyadmin +w 1 1 1 phpnuke +w 1 1 1 phps +w 1 1 1 pic +w 1 1 1 pics +w 1 1 1 pictures +w 1 1 1 pii +w 1 1 1 ping +w 1 1 1 pipermail +w 1 1 1 piranha +w 1 1 1 pix +w 1 1 1 pixel +w 1 1 1 pkg +w 1 1 1 pkgs +w 1 1 1 plain +w 1 1 1 play +w 1 1 1 pls +w 1 1 1 plugin +w 1 1 1 plugins +w 1 1 1 pm +w 1 1 1 png +w 1 1 1 poi +w 1 1 1 policies +w 1 1 1 policy +w 1 1 1 politics +w 1 1 1 poll +w 1 1 1 polls +w 1 1 1 pop +w 1 1 1 pop3 +w 1 1 1 porn +w 1 1 1 port +w 1 1 1 portal +w 1 1 1 portals +w 1 1 1 portfolio +w 1 1 1 pos +w 1 1 1 post +w 1 1 1 posted +w 1 1 1 postgres +w 1 1 1 postgresql +w 1 1 1 postnuke +w 1 1 1 postpaid +w 1 1 1 posts +w 1 1 1 ppt +w 1 1 1 pr +w 1 1 1 pr0n +w 1 1 1 premium +w 1 1 1 prepaid +w 1 1 1 presentation +w 1 1 1 presentations +w 1 1 1 preserve +w 1 1 1 press +w 1 1 1 preview +w 1 1 1 previews +w 1 1 1 previous +w 1 1 1 pricing +w 1 1 1 print +w 1 1 1 printenv +w 1 1 1 printer +w 1 1 1 printers +w 1 1 1 priv +w 1 1 1 privacy +w 1 1 1 private +w 1 1 1 problems +w 1 1 1 proc +w 1 1 1 procedures +w 1 1 1 prod +w 1 1 1 product +w 1 1 1 product_info +w 1 1 1 production +w 1 1 1 products +w 1 1 1 profile +w 1 1 1 profiles +w 1 1 1 profiling +w 1 1 1 program +w 1 1 1 programming +w 1 1 1 programs +w 1 1 1 project +w 1 1 1 projects +w 1 1 1 promo +w 1 1 1 prop +w 1 1 1 properties +w 1 1 1 property +w 1 1 1 props +w 1 1 1 protect +w 1 1 1 proto +w 1 1 1 proxies +w 1 1 1 proxy +w 1 1 1 prv +w 1 1 1 ps +w 1 1 1 psql +w 1 1 1 pt +w 1 1 1 pub +w 1 1 1 public +w 1 1 1 publication +w 1 1 1 publications +w 1 1 1 pubs +w 1 1 1 pull +w 1 1 1 purchase +w 1 1 1 purchases +w 1 1 1 purchasing +w 1 1 1 push +w 1 1 1 pw +w 1 1 1 pwd +w 1 1 1 python +w 1 1 1 q +w 1 1 1 qpid +w 1 1 1 queries +w 1 1 1 query +w 1 1 1 queue +w 1 1 1 queues +w 1 1 1 quote +w 1 1 1 quotes +w 1 1 1 r +w 1 1 1 radio +w 1 1 1 random +w 1 1 1 rar +w 1 1 1 rdf +w 1 1 1 read +w 1 1 1 readme +w 1 1 1 realestate +w 1 1 1 receive +w 1 1 1 received +w 1 1 1 recharge +w 1 1 1 record +w 1 1 1 records +w 1 1 1 recovery +w 1 1 1 recycle +w 1 1 1 recycled +w 1 1 1 redir +w 1 1 1 redirect +w 1 1 1 reference +w 1 1 1 reg +w 1 1 1 register +w 1 1 1 registered +w 1 1 1 registration +w 1 1 1 registrations +w 1 1 1 release +w 1 1 1 releases +w 1 1 1 remote +w 1 1 1 remove +w 1 1 1 removed +w 1 1 1 render +w 1 1 1 rendered +w 1 1 1 rep +w 1 1 1 repl +w 1 1 1 replica +w 1 1 1 replicas +w 1 1 1 replicate +w 1 1 1 replicated +w 1 1 1 replication +w 1 1 1 replicator +w 1 1 1 reply +w 1 1 1 report +w 1 1 1 reporting +w 1 1 1 reports +w 1 1 1 reprints +w 1 1 1 req +w 1 1 1 reqs +w 1 1 1 request +w 1 1 1 requests +w 1 1 1 requisition +w 1 1 1 requisitions +w 1 1 1 res +w 1 1 1 research +w 1 1 1 resin +w 1 1 1 resource +w 1 1 1 resources +w 1 1 1 rest +w 1 1 1 restore +w 1 1 1 restored +w 1 1 1 restricted +w 1 1 1 results +w 1 1 1 retail +w 1 1 1 reverse +w 1 1 1 reversed +w 1 1 1 revert +w 1 1 1 reverted +w 1 1 1 review +w 1 1 1 reviews +w 1 1 1 right +w 1 1 1 roam +w 1 1 1 roaming +w 1 1 1 robot +w 1 1 1 robots +w 1 1 1 roller +w 1 1 1 room +w 1 1 1 root +w 1 1 1 rpc +w 1 1 1 rtf +w 1 1 1 ru +w 1 1 1 rule +w 1 1 1 rules +w 1 1 1 run +w 1 1 1 rwservlet +w 1 1 1 s +w 1 1 1 sale +w 1 1 1 sales +w 1 1 1 sam +w 1 1 1 samba +w 1 1 1 sample +w 1 1 1 samples +w 1 1 1 sav +w 1 1 1 save +w 1 1 1 saved +w 1 1 1 saves +w 1 1 1 sbin +w 1 1 1 scan +w 1 1 1 scanned +w 1 1 1 scans +w 1 1 1 sched +w 1 1 1 schedule +w 1 1 1 scheduled +w 1 1 1 scheduling +w 1 1 1 schema +w 1 1 1 science +w 1 1 1 screen +w 1 1 1 screens +w 1 1 1 screenshot +w 1 1 1 screenshots +w 1 1 1 script +w 1 1 1 scriptlet +w 1 1 1 scriptlets +w 1 1 1 scripts +w 1 1 1 sdk +w 1 1 1 se +w 1 1 1 search +w 1 1 1 sec +w 1 1 1 secret +w 1 1 1 section +w 1 1 1 sections +w 1 1 1 secure +w 1 1 1 secured +w 1 1 1 security +w 1 1 1 seed +w 1 1 1 sell +w 1 1 1 send +w 1 1 1 sendmail +w 1 1 1 sendto +w 1 1 1 sent +w 1 1 1 serial +w 1 1 1 serv +w 1 1 1 serve +w 1 1 1 server +w 1 1 1 server-info +w 1 1 1 server-status +w 1 1 1 servers +w 1 1 1 service +w 1 1 1 services +w 1 1 1 servlet +w 1 1 1 servlets +w 1 1 1 session +w 1 1 1 sessions +w 1 1 1 setting +w 1 1 1 settings +w 1 1 1 setup +w 1 1 1 share +w 1 1 1 shared +w 1 1 1 shares +w 1 1 1 shell +w 1 1 1 ship +w 1 1 1 shipped +w 1 1 1 shipping +w 1 1 1 shop +w 1 1 1 shopper +w 1 1 1 shopping +w 1 1 1 shops +w 1 1 1 shoutbox +w 1 1 1 show +w 1 1 1 show_post +w 1 1 1 show_thread +w 1 1 1 showcat +w 1 1 1 showenv +w 1 1 1 showjobs +w 1 1 1 showmap +w 1 1 1 showmsg +w 1 1 1 showpost +w 1 1 1 showthread +w 1 1 1 sign +w 1 1 1 signoff +w 1 1 1 signon +w 1 1 1 signup +w 1 1 1 simple +w 1 1 1 sink +w 1 1 1 site +w 1 1 1 site-map +w 1 1 1 site_map +w 1 1 1 sitemap +w 1 1 1 sites +w 1 1 1 skel +w 1 1 1 skin +w 1 1 1 skins +w 1 1 1 skip +w 1 1 1 sl +w 1 1 1 sling +w 1 1 1 sm +w 1 1 1 small +w 1 1 1 sms +w 1 1 1 smtp +w 1 1 1 snoop +w 1 1 1 so +w 1 1 1 soap +w 1 1 1 soaprouter +w 1 1 1 soft +w 1 1 1 software +w 1 1 1 solaris +w 1 1 1 sold +w 1 1 1 solution +w 1 1 1 solutions +w 1 1 1 source +w 1 1 1 sources +w 1 1 1 soutbox +w 1 1 1 sox +w 1 1 1 sp +w 1 1 1 space +w 1 1 1 spacer +w 1 1 1 spam +w 1 1 1 special +w 1 1 1 specials +w 1 1 1 sponsor +w 1 1 1 sponsors +w 1 1 1 spool +w 1 1 1 sport +w 1 1 1 sports +w 1 1 1 sqlnet +w 1 1 1 squirrel +w 1 1 1 squirrelmail +w 1 1 1 src +w 1 1 1 srv +w 1 1 1 ss +w 1 1 1 ssh +w 1 1 1 ssi +w 1 1 1 ssl +w 1 1 1 sslvpn +w 1 1 1 ssn +w 1 1 1 sso +w 1 1 1 staff +w 1 1 1 staging +w 1 1 1 standard +w 1 1 1 standards +w 1 1 1 star +w 1 1 1 start +w 1 1 1 stat +w 1 1 1 statement +w 1 1 1 statements +w 1 1 1 static +w 1 1 1 staticpages +w 1 1 1 statistic +w 1 1 1 statistics +w 1 1 1 stats +w 1 1 1 status +w 1 1 1 stock +w 1 1 1 storage +w 1 1 1 store +w 1 1 1 stored +w 1 1 1 stories +w 1 1 1 story +w 1 1 1 strut +w 1 1 1 struts +w 1 1 1 student +w 1 1 1 students +w 1 1 1 stuff +w 1 1 1 style +w 1 1 1 styles +w 1 1 1 submissions +w 1 1 1 submit +w 1 1 1 subscribe +w 1 1 1 subscriber +w 1 1 1 subscribers +w 1 1 1 subscription +w 1 1 1 subscriptions +w 1 1 1 success +w 1 1 1 suite +w 1 1 1 suites +w 1 1 1 sun +w 1 1 1 sunos +w 1 1 1 super +w 1 1 1 support +w 1 1 1 surf +w 1 1 1 survey +w 1 1 1 surveys +w 1 1 1 swf +w 1 1 1 sws +w 1 1 1 synapse +w 1 1 1 sync +w 1 1 1 synced +w 1 1 1 sys +w 1 1 1 system +w 1 1 1 systems +w 1 1 1 sysuser +w 1 1 1 t +w 1 1 1 tag +w 1 1 1 tags +w 1 1 1 tape +w 1 1 1 tapes +w 1 1 1 tapestry +w 1 1 1 tar +w 1 1 1 tar.bz2 +w 1 1 1 tb +w 1 1 1 tcl +w 1 1 1 team +w 1 1 1 tech +w 1 1 1 technical +w 1 1 1 technology +w 1 1 1 tel +w 1 1 1 tele +w 1 1 1 templ +w 1 1 1 template +w 1 1 1 templates +w 1 1 1 terms +w 1 1 1 test-cgi +w 1 1 1 test-env +w 1 1 1 test1 +w 1 1 1 test123 +w 1 1 1 test1234 +w 1 1 1 test2 +w 1 1 1 test3 +w 1 1 1 testimonial +w 1 1 1 testimonials +w 1 1 1 testing +w 1 1 1 tests +w 1 1 1 texis +w 1 1 1 text +w 1 1 1 texts +w 1 1 1 theme +w 1 1 1 themes +w 1 1 1 thread +w 1 1 1 threads +w 1 1 1 thumb +w 1 1 1 thumbnail +w 1 1 1 thumbnails +w 1 1 1 thumbs +w 1 1 1 tickets +w 1 1 1 tiki +w 1 1 1 tiles +w 1 1 1 tip +w 1 1 1 tips +w 1 1 1 title +w 1 1 1 tls +w 1 1 1 tmpl +w 1 1 1 tmps +w 1 1 1 tn +w 1 1 1 toc +w 1 1 1 todo +w 1 1 1 toggle +w 1 1 1 tomcat +w 1 1 1 tool +w 1 1 1 toolbar +w 1 1 1 toolkit +w 1 1 1 tools +w 1 1 1 top +w 1 1 1 topic +w 1 1 1 topics +w 1 1 1 torrent +w 1 1 1 torrents +w 1 1 1 tos +w 1 1 1 tour +w 1 1 1 tpl +w 1 1 1 tpv +w 1 1 1 tr +w 1 1 1 trace +w 1 1 1 traceroute +w 1 1 1 traces +w 1 1 1 track +w 1 1 1 trackback +w 1 1 1 tracker +w 1 1 1 trackers +w 1 1 1 tracking +w 1 1 1 tracks +w 1 1 1 traffic +w 1 1 1 trailer +w 1 1 1 trailers +w 1 1 1 training +w 1 1 1 trans +w 1 1 1 transparent +w 1 1 1 transport +w 1 1 1 trash +w 1 1 1 travel +w 1 1 1 treasury +w 1 1 1 tree +w 1 1 1 trees +w 1 1 1 trial +w 1 1 1 trunk +w 1 1 1 tsweb +w 1 1 1 tt +w 1 1 1 turbine +w 1 1 1 tuscany +w 1 1 1 tutorial +w 1 1 1 tutorials +w 1 1 1 tv +w 1 1 1 tweak +w 1 1 1 type +w 1 1 1 typo3 +w 1 1 1 typo3conf +w 1 1 1 u +w 1 1 1 ubb +w 1 1 1 uds +w 1 1 1 uk +w 1 1 1 umts +w 1 1 1 union +w 1 1 1 unix +w 1 1 1 unlock +w 1 1 1 unreg +w 1 1 1 unregister +w 1 1 1 up +w 1 1 1 upd +w 1 1 1 update +w 1 1 1 updated +w 1 1 1 updater +w 1 1 1 updates +w 1 1 1 upload +w 1 1 1 uploads +w 1 1 1 url +w 1 1 1 us +w 1 1 1 usa +w 1 1 1 usage +w 1 1 1 user +w 1 1 1 userlog +w 1 1 1 users +w 1 1 1 usr +w 1 1 1 util +w 1 1 1 utilities +w 1 1 1 utility +w 1 1 1 utils +w 1 1 1 v +w 1 1 1 v1 +w 1 1 1 v2 +w 1 1 1 var +w 1 1 1 vault +w 1 1 1 vector +w 1 1 1 velocity +w 1 1 1 vendor +w 1 1 1 ver +w 1 1 1 ver1 +w 1 1 1 ver2 +w 1 1 1 version +w 1 1 1 vfs +w 1 1 1 video +w 1 1 1 videos +w 1 1 1 view +w 1 1 1 view-source +w 1 1 1 viewcvs +w 1 1 1 viewforum +w 1 1 1 viewonline +w 1 1 1 views +w 1 1 1 viewsource +w 1 1 1 viewsvn +w 1 1 1 viewtopic +w 1 1 1 viewvc +w 1 1 1 virtual +w 1 1 1 vm +w 1 1 1 voip +w 1 1 1 vol +w 1 1 1 vpn +w 1 1 1 w +w 1 1 1 w3 +w 1 1 1 w3c +w 1 1 1 wa +w 1 1 1 wap +w 1 1 1 war +w 1 1 1 warez +w 1 1 1 way-board +w 1 1 1 wbboard +w 1 1 1 wc +w 1 1 1 weather +w 1 1 1 web +w 1 1 1 web-beans +w 1 1 1 web-console +w 1 1 1 webaccess +w 1 1 1 webadmin +w 1 1 1 webagent +w 1 1 1 webalizer +w 1 1 1 webapp +w 1 1 1 webb +w 1 1 1 webbbs +w 1 1 1 webboard +w 1 1 1 webcalendar +w 1 1 1 webcart +w 1 1 1 webcasts +w 1 1 1 webcgi +w 1 1 1 webchat +w 1 1 1 webdata +w 1 1 1 webdav +w 1 1 1 weblog +w 1 1 1 weblogic +w 1 1 1 weblogs +w 1 1 1 webmail +w 1 1 1 webplus +w 1 1 1 webshop +w 1 1 1 website +w 1 1 1 websphere +w 1 1 1 webstats +w 1 1 1 websvn +w 1 1 1 webwork +w 1 1 1 welcome +w 1 1 1 whitepapers +w 1 1 1 whois +w 1 1 1 whosonline +w 1 1 1 wicket +w 1 1 1 wiki +w 1 1 1 win +w 1 1 1 win32 +w 1 1 1 windows +w 1 1 1 winnt +w 1 1 1 wireless +w 1 1 1 wml +w 1 1 1 word +w 1 1 1 wordpress +w 1 1 1 work +w 1 1 1 working +w 1 1 1 world +w 1 1 1 wp +w 1 1 1 wp-content +w 1 1 1 wp-includes +w 1 1 1 wp-login +w 1 1 1 wrap +w 1 1 1 ws-client +w 1 1 1 ws_ftp +w 1 1 1 wtai +w 1 1 1 www +w 1 1 1 www-sql +w 1 1 1 www1 +w 1 1 1 www2 +w 1 1 1 www3 +w 1 1 1 wwwboard +w 1 1 1 wwwroot +w 1 1 1 wwwstats +w 1 1 1 wwwthreads +w 1 1 1 wwwuser +w 1 1 1 x +w 1 1 1 xalan +w 1 1 1 xerces +w 1 1 1 xhtml +w 1 1 1 xmlrpc +w 1 1 1 xslt +w 1 1 1 xsql +w 1 1 1 xxx +w 1 1 1 xyzzy +w 1 1 1 y +w 1 1 1 yahoo +w 1 1 1 youtube +w 1 1 1 yt +w 1 1 1 z +w 1 1 1 zboard +w 1 1 1 zend +w 1 1 1 zero +w 1 1 1 zipfiles +w 1 1 1 zips +w 1 1 1 zope +w 1 1 1 zorum +w 1 1 1 ~admin +w 1 1 1 ~apache +w 1 1 1 ~bin +w 1 1 1 ~bob +w 1 1 1 ~ftp +w 1 1 1 ~guest +w 1 1 1 ~http +w 1 1 1 ~httpd +w 1 1 1 ~john +w 1 1 1 ~log +w 1 1 1 ~logs +w 1 1 1 ~lp +w 1 1 1 ~mark +w 1 1 1 ~matt +w 1 1 1 ~nobody +w 1 1 1 ~root +w 1 1 1 ~test +w 1 1 1 ~tmp +w 1 1 1 ~www diff --git a/dictionaries/extensions-only.wl b/dictionaries/extensions-only.wl new file mode 100644 index 0000000..4c13a96 --- /dev/null +++ b/dictionaries/extensions-only.wl @@ -0,0 +1,100 @@ +e 1 1 1 asmx +e 1 1 1 asp +e 1 1 1 aspx +e 1 1 1 bak +e 1 1 1 bat +e 1 1 1 bin +e 1 1 1 bz2 +e 1 1 1 c +e 1 1 1 cc +e 1 1 1 cfg +e 1 1 1 cgi +e 1 1 1 class +e 1 1 1 conf +e 1 1 1 config +e 1 1 1 cpp +e 1 1 1 cs +e 1 1 1 csv +e 1 1 1 dat +e 1 1 1 db +e 1 1 1 dll +e 1 1 1 do +e 1 1 1 doc +e 1 1 1 dump +e 1 1 1 ep +e 1 1 1 err +e 1 1 1 error +e 1 1 1 exe +e 1 1 1 gif +e 1 1 1 gz +e 1 1 1 htm +e 1 1 1 html +e 1 1 1 inc +e 1 1 1 ini +e 1 1 1 java +e 1 1 1 jhtml +e 1 1 1 jpg +e 1 1 1 js +e 1 1 1 jsf +e 1 1 1 jsp +e 1 1 1 key +e 1 1 1 lib +e 1 1 1 log +e 1 1 1 lst +e 1 1 1 manifest +e 1 1 1 mdb +e 1 1 1 meta +e 1 1 1 msg +e 1 1 1 nsf +e 1 1 1 o +e 1 1 1 old +e 1 1 1 ora +e 1 1 1 orig +e 1 1 1 out +e 1 1 1 part +e 1 1 1 pdf +e 1 1 1 php +e 1 1 1 php3 +e 1 1 1 pl +e 1 1 1 pm +e 1 1 1 png +e 1 1 1 ppt +e 1 1 1 properties +e 1 1 1 py +e 1 1 1 rar +e 1 1 1 rss +e 1 1 1 rtf +e 1 1 1 save +e 1 1 1 sh +e 1 1 1 shtml +e 1 1 1 so +e 1 1 1 sql +e 1 1 1 stackdump +e 1 1 1 swf +e 1 1 1 tar +e 1 1 1 tar.bz2 +e 1 1 1 tar.gz +e 1 1 1 temp +e 1 1 1 test +e 1 1 1 tgz +e 1 1 1 tmp +e 1 1 1 trace +e 1 1 1 txt +e 1 1 1 vb +e 1 1 1 vbs +e 1 1 1 ws +e 1 1 1 xls +e 1 1 1 xml +e 1 1 1 xsl +e 1 1 1 zip +w 1 1 1 AggreSpy +w 1 1 1 DMSDump +w 1 1 1 dms0 +w 1 1 1 dmse 1 1 1 7z +w 1 1 1 getjobid +w 1 1 1 oprocmgr-status +w 1 1 1 rwservlet +w 1 1 1 showenv +w 1 1 1 showjobs +w 1 1 1 showmap +w 1 1 1 soaprouter diff --git a/dictionaries/minimal.wl b/dictionaries/minimal.wl new file mode 100644 index 0000000..c8f707f --- /dev/null +++ b/dictionaries/minimal.wl @@ -0,0 +1,1892 @@ +e 1 1 1 bak +e 1 1 1 cfg +e 1 1 1 class +e 1 1 1 cnf +e 1 1 1 conf +e 1 1 1 config +e 1 1 1 csv +e 1 1 1 err +e 1 1 1 error +e 1 1 1 html +e 1 1 1 inc +e 1 1 1 ini +e 1 1 1 java +e 1 1 1 key +e 1 1 1 log +e 1 1 1 old +e 1 1 1 orig +e 1 1 1 out +e 1 1 1 part +e 1 1 1 pl +e 1 1 1 sql +e 1 1 1 temp +e 1 1 1 test +e 1 1 1 tmp +e 1 1 1 txt +e 1 1 1 xml +e 1 1 1 zip +w 1 1 1 .bash_history +w 1 1 1 .bashrc +w 1 1 1 .cvsignore +w 1 1 1 .history +w 1 1 1 .htaccess +w 1 1 1 .htpasswd +w 1 1 1 .passwd +w 1 1 1 .perf +w 1 1 1 .ssh +w 1 1 1 .svn +w 1 1 1 .web +w 1 1 1 0 +w 1 1 1 00 +w 1 1 1 01 +w 1 1 1 02 +w 1 1 1 03 +w 1 1 1 04 +w 1 1 1 05 +w 1 1 1 06 +w 1 1 1 07 +w 1 1 1 08 +w 1 1 1 09 +w 1 1 1 1 +w 1 1 1 10 +w 1 1 1 100 +w 1 1 1 1000 +w 1 1 1 1001 +w 1 1 1 101 +w 1 1 1 11 +w 1 1 1 12 +w 1 1 1 13 +w 1 1 1 14 +w 1 1 1 15 +w 1 1 1 1990 +w 1 1 1 1991 +w 1 1 1 1992 +w 1 1 1 1993 +w 1 1 1 1994 +w 1 1 1 1995 +w 1 1 1 1996 +w 1 1 1 1997 +w 1 1 1 1998 +w 1 1 1 1999 +w 1 1 1 2 +w 1 1 1 20 +w 1 1 1 200 +w 1 1 1 2000 +w 1 1 1 2001 +w 1 1 1 2002 +w 1 1 1 2003 +w 1 1 1 2004 +w 1 1 1 2005 +w 1 1 1 2006 +w 1 1 1 2007 +w 1 1 1 2008 +w 1 1 1 2009 +w 1 1 1 2010 +w 1 1 1 2011 +w 1 1 1 2012 +w 1 1 1 21 +w 1 1 1 22 +w 1 1 1 23 +w 1 1 1 24 +w 1 1 1 25 +w 1 1 1 2g +w 1 1 1 3 +w 1 1 1 300 +w 1 1 1 3g +w 1 1 1 4 +w 1 1 1 42 +w 1 1 1 5 +w 1 1 1 50 +w 1 1 1 500 +w 1 1 1 51 +w 1 1 1 6 +w 1 1 1 7 +w 1 1 1 7z +w 1 1 1 8 +w 1 1 1 9 +w 1 1 1 ADM +w 1 1 1 ADMIN +w 1 1 1 AggreSpy +w 1 1 1 AppsLocalLogin +w 1 1 1 AppsLogin +w 1 1 1 BUILD +w 1 1 1 CMS +w 1 1 1 CVS +w 1 1 1 DB +w 1 1 1 DMSDump +w 1 1 1 Documents and Settings +w 1 1 1 Entries +w 1 1 1 FCKeditor +w 1 1 1 JMXSoapAdapter +w 1 1 1 LICENSE +w 1 1 1 MANIFEST.MF +w 1 1 1 META-INF +w 1 1 1 Makefile +w 1 1 1 OA +w 1 1 1 OAErrorDetailPage +w 1 1 1 OA_HTML +w 1 1 1 Program Files +w 1 1 1 README +w 1 1 1 Readme +w 1 1 1 Recycled +w 1 1 1 Root +w 1 1 1 SQL +w 1 1 1 SUNWmc +w 1 1 1 SiteScope +w 1 1 1 SiteServer +w 1 1 1 Spy +w 1 1 1 TEMP +w 1 1 1 TMP +w 1 1 1 TODO +w 1 1 1 Thumbs.db +w 1 1 1 WEB-INF +w 1 1 1 WS_FTP +w 1 1 1 XXX +w 1 1 1 _ +w 1 1 1 _adm +w 1 1 1 _admin +w 1 1 1 _files +w 1 1 1 _include +w 1 1 1 _js +w 1 1 1 _mem_bin +w 1 1 1 _old +w 1 1 1 _pages +w 1 1 1 _private +w 1 1 1 _res +w 1 1 1 _source +w 1 1 1 _src +w 1 1 1 _test +w 1 1 1 _vti_bin +w 1 1 1 _vti_cnf +w 1 1 1 _vti_pvt +w 1 1 1 _vti_txt +w 1 1 1 _www +w 1 1 1 a +w 1 1 1 aa +w 1 1 1 aaa +w 1 1 1 abc +w 1 1 1 abc123 +w 1 1 1 abcd +w 1 1 1 abcd1234 +w 1 1 1 about +w 1 1 1 access +w 1 1 1 access-log +w 1 1 1 access-log.1 +w 1 1 1 access.1 +w 1 1 1 access_log +w 1 1 1 access_log.1 +w 1 1 1 accessibility +w 1 1 1 account +w 1 1 1 accounting +w 1 1 1 accounts +w 1 1 1 action +w 1 1 1 actions +w 1 1 1 active +w 1 1 1 activex +w 1 1 1 ad +w 1 1 1 adclick +w 1 1 1 add +w 1 1 1 addressbook +w 1 1 1 adm +w 1 1 1 admin +w 1 1 1 admin_ +w 1 1 1 ads +w 1 1 1 adv +w 1 1 1 advertise +w 1 1 1 advertising +w 1 1 1 affiliate +w 1 1 1 affiliates +w 1 1 1 agenda +w 1 1 1 agent +w 1 1 1 agents +w 1 1 1 ajax +w 1 1 1 album +w 1 1 1 albums +w 1 1 1 alert +w 1 1 1 alerts +w 1 1 1 alias +w 1 1 1 aliases +w 1 1 1 all +w 1 1 1 alpha +w 1 1 1 alumni +w 1 1 1 analog +w 1 1 1 announcement +w 1 1 1 announcements +w 1 1 1 anon +w 1 1 1 anonymous +w 1 1 1 ansi +w 1 1 1 apac +w 1 1 1 apache +w 1 1 1 apexec +w 1 1 1 api +w 1 1 1 apis +w 1 1 1 app +w 1 1 1 appeal +w 1 1 1 appeals +w 1 1 1 append +w 1 1 1 appl +w 1 1 1 apple +w 1 1 1 appliation +w 1 1 1 applications +w 1 1 1 apps +w 1 1 1 apr +w 1 1 1 arch +w 1 1 1 archive +w 1 1 1 archives +w 1 1 1 array +w 1 1 1 art +w 1 1 1 article +w 1 1 1 articles +w 1 1 1 artwork +w 1 1 1 ascii +w 1 1 1 asdf +w 1 1 1 asmx +w 1 1 1 asp +w 1 1 1 aspx +w 1 1 1 asset +w 1 1 1 assets +w 1 1 1 atom +w 1 1 1 attach +w 1 1 1 attachment +w 1 1 1 attachments +w 1 1 1 attachs +w 1 1 1 attic +w 1 1 1 audio +w 1 1 1 audit +w 1 1 1 audits +w 1 1 1 auth +w 1 1 1 author +w 1 1 1 authorized_keys +w 1 1 1 authors +w 1 1 1 auto +w 1 1 1 automatic +w 1 1 1 automation +w 1 1 1 avatar +w 1 1 1 avatars +w 1 1 1 award +w 1 1 1 awards +w 1 1 1 awl +w 1 1 1 awstats +w 1 1 1 b +w 1 1 1 b2b +w 1 1 1 b2c +w 1 1 1 back +w 1 1 1 backdoor +w 1 1 1 backend +w 1 1 1 backup +w 1 1 1 backups +w 1 1 1 bandwidth +w 1 1 1 bank +w 1 1 1 banks +w 1 1 1 banner +w 1 1 1 banners +w 1 1 1 bar +w 1 1 1 base +w 1 1 1 bash +w 1 1 1 basic +w 1 1 1 basket +w 1 1 1 baskets +w 1 1 1 bat +w 1 1 1 batch +w 1 1 1 baz +w 1 1 1 bb +w 1 1 1 bb-hist +w 1 1 1 bb-histlog +w 1 1 1 bboard +w 1 1 1 bbs +w 1 1 1 beans +w 1 1 1 beehive +w 1 1 1 benefits +w 1 1 1 beta +w 1 1 1 bfc +w 1 1 1 big +w 1 1 1 bigip +w 1 1 1 bill +w 1 1 1 billing +w 1 1 1 bin +w 1 1 1 binaries +w 1 1 1 binary +w 1 1 1 bins +w 1 1 1 bio +w 1 1 1 bios +w 1 1 1 biz +w 1 1 1 bkup +w 1 1 1 blah +w 1 1 1 blank +w 1 1 1 blog +w 1 1 1 blogger +w 1 1 1 bloggers +w 1 1 1 blogs +w 1 1 1 board +w 1 1 1 bofh +w 1 1 1 book +w 1 1 1 books +w 1 1 1 boot +w 1 1 1 bottom +w 1 1 1 broken +w 1 1 1 broker +w 1 1 1 browse +w 1 1 1 bs +w 1 1 1 bsd +w 1 1 1 bugs +w 1 1 1 build +w 1 1 1 buildr +w 1 1 1 bulk +w 1 1 1 bullet +w 1 1 1 business +w 1 1 1 button +w 1 1 1 buttons +w 1 1 1 buy +w 1 1 1 buynow +w 1 1 1 bypass +w 1 1 1 bz2 +w 1 1 1 c +w 1 1 1 ca +w 1 1 1 cache +w 1 1 1 cal +w 1 1 1 calendar +w 1 1 1 camel +w 1 1 1 car +w 1 1 1 card +w 1 1 1 cards +w 1 1 1 career +w 1 1 1 careers +w 1 1 1 cars +w 1 1 1 cart +w 1 1 1 carts +w 1 1 1 cat +w 1 1 1 catalog +w 1 1 1 catalogs +w 1 1 1 catalyst +w 1 1 1 categories +w 1 1 1 category +w 1 1 1 catinfo +w 1 1 1 cats +w 1 1 1 cc +w 1 1 1 ccbill +w 1 1 1 cd +w 1 1 1 cerificate +w 1 1 1 cert +w 1 1 1 certificate +w 1 1 1 certificates +w 1 1 1 certs +w 1 1 1 cf +w 1 1 1 cfcache +w 1 1 1 cfdocs +w 1 1 1 cfide +w 1 1 1 cfm +w 1 1 1 cfusion +w 1 1 1 cgi +w 1 1 1 cgi-bin +w 1 1 1 cgi-bin2 +w 1 1 1 cgi-home +w 1 1 1 cgi-local +w 1 1 1 cgi-pub +w 1 1 1 cgi-script +w 1 1 1 cgi-shl +w 1 1 1 cgi-sys +w 1 1 1 cgi-web +w 1 1 1 cgi-win +w 1 1 1 cgibin +w 1 1 1 cgiwrap +w 1 1 1 cgm-web +w 1 1 1 change +w 1 1 1 changed +w 1 1 1 changes +w 1 1 1 charge +w 1 1 1 charges +w 1 1 1 chat +w 1 1 1 chats +w 1 1 1 checkout +w 1 1 1 child +w 1 1 1 children +w 1 1 1 cisco +w 1 1 1 cisweb +w 1 1 1 citrix +w 1 1 1 cl +w 1 1 1 claim +w 1 1 1 claims +w 1 1 1 classes +w 1 1 1 classified +w 1 1 1 classifieds +w 1 1 1 clear +w 1 1 1 click +w 1 1 1 clicks +w 1 1 1 client +w 1 1 1 clientaccesspolicy +w 1 1 1 clients +w 1 1 1 close +w 1 1 1 closed +w 1 1 1 closing +w 1 1 1 club +w 1 1 1 cluster +w 1 1 1 clusters +w 1 1 1 cmd +w 1 1 1 cms +w 1 1 1 cnf +w 1 1 1 cnt +w 1 1 1 cocoon +w 1 1 1 code +w 1 1 1 codec +w 1 1 1 codecs +w 1 1 1 codes +w 1 1 1 cognos +w 1 1 1 coldfusion +w 1 1 1 columns +w 1 1 1 com +w 1 1 1 comment +w 1 1 1 comments +w 1 1 1 commerce +w 1 1 1 commercial +w 1 1 1 common +w 1 1 1 communicator +w 1 1 1 community +w 1 1 1 compact +w 1 1 1 company +w 1 1 1 complaint +w 1 1 1 complaints +w 1 1 1 compliance +w 1 1 1 component +w 1 1 1 compressed +w 1 1 1 computer +w 1 1 1 computers +w 1 1 1 computing +w 1 1 1 conference +w 1 1 1 conferences +w 1 1 1 configs +w 1 1 1 console +w 1 1 1 consumer +w 1 1 1 contact +w 1 1 1 contacts +w 1 1 1 content +w 1 1 1 contents +w 1 1 1 contract +w 1 1 1 contracts +w 1 1 1 control +w 1 1 1 controlpanel +w 1 1 1 cookie +w 1 1 1 cookies +w 1 1 1 copies +w 1 1 1 copy +w 1 1 1 copyright +w 1 1 1 core +w 1 1 1 corp +w 1 1 1 corpo +w 1 1 1 corporate +w 1 1 1 corrections +w 1 1 1 count +w 1 1 1 counter +w 1 1 1 counters +w 1 1 1 counts +w 1 1 1 course +w 1 1 1 courses +w 1 1 1 cover +w 1 1 1 cpanel +w 1 1 1 cpp +w 1 1 1 cr +w 1 1 1 crack +w 1 1 1 crash +w 1 1 1 crashes +w 1 1 1 create +w 1 1 1 credits +w 1 1 1 crm +w 1 1 1 cron +w 1 1 1 crons +w 1 1 1 crontab +w 1 1 1 crontabs +w 1 1 1 crossdomain +w 1 1 1 crypt +w 1 1 1 crypto +w 1 1 1 cs +w 1 1 1 css +w 1 1 1 current +w 1 1 1 custom +w 1 1 1 custom-log +w 1 1 1 custom_log +w 1 1 1 customer +w 1 1 1 customers +w 1 1 1 cv +w 1 1 1 cxf +w 1 1 1 czcmdcvt +w 1 1 1 d +w 1 1 1 daemon +w 1 1 1 daily +w 1 1 1 dana-na +w 1 1 1 dat +w 1 1 1 data +w 1 1 1 database +w 1 1 1 databases +w 1 1 1 date +w 1 1 1 db +w 1 1 1 dba +w 1 1 1 dbase +w 1 1 1 dbman +w 1 1 1 dc +w 1 1 1 dcforum +w 1 1 1 de +w 1 1 1 dealer +w 1 1 1 debug +w 1 1 1 decl +w 1 1 1 declaration +w 1 1 1 declarations +w 1 1 1 decode +w 1 1 1 decrypt +w 1 1 1 def +w 1 1 1 default +w 1 1 1 defaults +w 1 1 1 definition +w 1 1 1 definitions +w 1 1 1 del +w 1 1 1 delete +w 1 1 1 deleted +w 1 1 1 demo +w 1 1 1 demos +w 1 1 1 denied +w 1 1 1 deny +w 1 1 1 design +w 1 1 1 desktop +w 1 1 1 desktops +w 1 1 1 detail +w 1 1 1 details +w 1 1 1 dev +w 1 1 1 devel +w 1 1 1 developer +w 1 1 1 developers +w 1 1 1 development +w 1 1 1 device +w 1 1 1 devices +w 1 1 1 devs +w 1 1 1 df +w 1 1 1 dialog +w 1 1 1 dialogs +w 1 1 1 diff +w 1 1 1 diffs +w 1 1 1 digest +w 1 1 1 digg +w 1 1 1 dir +w 1 1 1 directories +w 1 1 1 directory +w 1 1 1 dirs +w 1 1 1 disabled +w 1 1 1 disclaimer +w 1 1 1 display +w 1 1 1 django +w 1 1 1 dl +w 1 1 1 dll +w 1 1 1 dm +w 1 1 1 dm-config +w 1 1 1 dms +w 1 1 1 dms0 +w 1 1 1 dns +w 1 1 1 do +w 1 1 1 doc +w 1 1 1 dock +w 1 1 1 docroot +w 1 1 1 docs +w 1 1 1 document +w 1 1 1 documentation +w 1 1 1 documents +w 1 1 1 domain +w 1 1 1 domains +w 1 1 1 down +w 1 1 1 download +w 1 1 1 downloads +w 1 1 1 drop +w 1 1 1 dropped +w 1 1 1 drupal +w 1 1 1 dummy +w 1 1 1 dump +w 1 1 1 dumps +w 1 1 1 dvd +w 1 1 1 dwr +w 1 1 1 dynamic +w 1 1 1 e +w 1 1 1 e2fs +w 1 1 1 ear +w 1 1 1 ecommerce +w 1 1 1 edge +w 1 1 1 edit +w 1 1 1 editor +w 1 1 1 edits +w 1 1 1 edu +w 1 1 1 education +w 1 1 1 ee +w 1 1 1 effort +w 1 1 1 efforts +w 1 1 1 egress +w 1 1 1 ejb +w 1 1 1 element +w 1 1 1 elements +w 1 1 1 em +w 1 1 1 email +w 1 1 1 emails +w 1 1 1 emea +w 1 1 1 employees +w 1 1 1 employment +w 1 1 1 empty +w 1 1 1 emu +w 1 1 1 emulator +w 1 1 1 en +w 1 1 1 en_US +w 1 1 1 encode +w 1 1 1 encrypt +w 1 1 1 eng +w 1 1 1 engine +w 1 1 1 english +w 1 1 1 enterprise +w 1 1 1 entertainment +w 1 1 1 entries +w 1 1 1 entry +w 1 1 1 env +w 1 1 1 environ +w 1 1 1 environment +w 1 1 1 ep +w 1 1 1 error +w 1 1 1 error-log +w 1 1 1 error_log +w 1 1 1 errors +w 1 1 1 es +w 1 1 1 esale +w 1 1 1 esales +w 1 1 1 etc +w 1 1 1 europe +w 1 1 1 event +w 1 1 1 events +w 1 1 1 evil +w 1 1 1 evt +w 1 1 1 ews +w 1 1 1 ex +w 1 1 1 example +w 1 1 1 examples +w 1 1 1 excalibur +w 1 1 1 exchange +w 1 1 1 exe +w 1 1 1 exec +w 1 1 1 export +w 1 1 1 ext +w 1 1 1 ext2 +w 1 1 1 extern +w 1 1 1 external +w 1 1 1 ezshopper +w 1 1 1 f +w 1 1 1 face +w 1 1 1 faces +w 1 1 1 faculty +w 1 1 1 fail +w 1 1 1 failure +w 1 1 1 family +w 1 1 1 faq +w 1 1 1 faqs +w 1 1 1 fcgi-bin +w 1 1 1 feature +w 1 1 1 features +w 1 1 1 feed +w 1 1 1 feedback +w 1 1 1 feeds +w 1 1 1 felix +w 1 1 1 field +w 1 1 1 fields +w 1 1 1 file +w 1 1 1 fileadmin +w 1 1 1 files +w 1 1 1 filez +w 1 1 1 finance +w 1 1 1 financial +w 1 1 1 find +w 1 1 1 finger +w 1 1 1 firewall +w 1 1 1 fixed +w 1 1 1 flags +w 1 1 1 flash +w 1 1 1 flow +w 1 1 1 flows +w 1 1 1 flv +w 1 1 1 fn +w 1 1 1 folder +w 1 1 1 folders +w 1 1 1 font +w 1 1 1 fonts +w 1 1 1 foo +w 1 1 1 footer +w 1 1 1 footers +w 1 1 1 form +w 1 1 1 formatting +w 1 1 1 formmail +w 1 1 1 forms +w 1 1 1 forrest +w 1 1 1 fortune +w 1 1 1 forum +w 1 1 1 forum1 +w 1 1 1 forum2 +w 1 1 1 forumdisplay +w 1 1 1 forums +w 1 1 1 forward +w 1 1 1 foto +w 1 1 1 foundation +w 1 1 1 fr +w 1 1 1 frame +w 1 1 1 frames +w 1 1 1 framework +w 1 1 1 free +w 1 1 1 freebsd +w 1 1 1 friend +w 1 1 1 friends +w 1 1 1 frob +w 1 1 1 frontend +w 1 1 1 fs +w 1 1 1 ftp +w 1 1 1 fuck +w 1 1 1 fuckoff +w 1 1 1 fuckyou +w 1 1 1 full +w 1 1 1 fun +w 1 1 1 func +w 1 1 1 funcs +w 1 1 1 function +w 1 1 1 functions +w 1 1 1 fusion +w 1 1 1 fw +w 1 1 1 g +w 1 1 1 galleries +w 1 1 1 gallery +w 1 1 1 game +w 1 1 1 games +w 1 1 1 ganglia +w 1 1 1 garbage +w 1 1 1 gateway +w 1 1 1 gb +w 1 1 1 geeklog +w 1 1 1 general +w 1 1 1 geronimo +w 1 1 1 get +w 1 1 1 getaccess +w 1 1 1 getjobid +w 1 1 1 gfx +w 1 1 1 gif +w 1 1 1 gitweb +w 1 1 1 glimpse +w 1 1 1 global +w 1 1 1 globals +w 1 1 1 glossary +w 1 1 1 go +w 1 1 1 goaway +w 1 1 1 google +w 1 1 1 government +w 1 1 1 gprs +w 1 1 1 grant +w 1 1 1 grants +w 1 1 1 graphics +w 1 1 1 group +w 1 1 1 groupcp +w 1 1 1 groups +w 1 1 1 gsm +w 1 1 1 guest +w 1 1 1 guestbook +w 1 1 1 guests +w 1 1 1 guide +w 1 1 1 guides +w 1 1 1 gump +w 1 1 1 gwt +w 1 1 1 gz +w 1 1 1 h +w 1 1 1 hack +w 1 1 1 hacker +w 1 1 1 hacking +w 1 1 1 hackme +w 1 1 1 hadoop +w 1 1 1 hardcore +w 1 1 1 hardware +w 1 1 1 harmony +w 1 1 1 head +w 1 1 1 header +w 1 1 1 headers +w 1 1 1 health +w 1 1 1 hello +w 1 1 1 help +w 1 1 1 helper +w 1 1 1 helpers +w 1 1 1 hi +w 1 1 1 hidden +w 1 1 1 hide +w 1 1 1 high +w 1 1 1 hipaa +w 1 1 1 history +w 1 1 1 hit +w 1 1 1 hits +w 1 1 1 hole +w 1 1 1 home +w 1 1 1 homepage +w 1 1 1 hop +w 1 1 1 horde +w 1 1 1 hosting +w 1 1 1 hosts +w 1 1 1 howto +w 1 1 1 hp +w 1 1 1 hr +w 1 1 1 hta +w 1 1 1 htbin +w 1 1 1 htdoc +w 1 1 1 htdocs +w 1 1 1 htm +w 1 1 1 htpasswd +w 1 1 1 http +w 1 1 1 httpd +w 1 1 1 https +w 1 1 1 httpuser +w 1 1 1 hu +w 1 1 1 hyper +w 1 1 1 i +w 1 1 1 ia +w 1 1 1 ibm +w 1 1 1 icat +w 1 1 1 icon +w 1 1 1 icons +w 1 1 1 id +w 1 1 1 idea +w 1 1 1 ideas +w 1 1 1 ids +w 1 1 1 ie +w 1 1 1 iframe +w 1 1 1 ig +w 1 1 1 ignore +w 1 1 1 iisadmin +w 1 1 1 iisadmpwd +w 1 1 1 iissamples +w 1 1 1 image +w 1 1 1 imagefolio +w 1 1 1 images +w 1 1 1 img +w 1 1 1 imgs +w 1 1 1 imp +w 1 1 1 import +w 1 1 1 important +w 1 1 1 in +w 1 1 1 inbound +w 1 1 1 incl +w 1 1 1 include +w 1 1 1 includes +w 1 1 1 incoming +w 1 1 1 incubator +w 1 1 1 index +w 1 1 1 index1 +w 1 1 1 index2 +w 1 1 1 index_1 +w 1 1 1 index_2 +w 1 1 1 inetpub +w 1 1 1 inetsrv +w 1 1 1 inf +w 1 1 1 info +w 1 1 1 information +w 1 1 1 ingress +w 1 1 1 init +w 1 1 1 inline +w 1 1 1 input +w 1 1 1 inquire +w 1 1 1 inquiries +w 1 1 1 inquiry +w 1 1 1 insert +w 1 1 1 install +w 1 1 1 int +w 1 1 1 interim +w 1 1 1 intermediate +w 1 1 1 internal +w 1 1 1 international +w 1 1 1 internet +w 1 1 1 intl +w 1 1 1 intranet +w 1 1 1 intro +w 1 1 1 ip +w 1 1 1 ipc +w 1 1 1 ips +w 1 1 1 irc +w 1 1 1 is +w 1 1 1 isapi +w 1 1 1 iso +w 1 1 1 issues +w 1 1 1 it +w 1 1 1 item +w 1 1 1 j +w 1 1 1 j2ee +w 1 1 1 j2me +w 1 1 1 jakarta +w 1 1 1 java-plugin +w 1 1 1 javadoc +w 1 1 1 javascript +w 1 1 1 javax +w 1 1 1 jboss +w 1 1 1 jdbc +w 1 1 1 jhtml +w 1 1 1 jigsaw +w 1 1 1 jj +w 1 1 1 jmx-console +w 1 1 1 job +w 1 1 1 jobs +w 1 1 1 joe +w 1 1 1 john +w 1 1 1 join +w 1 1 1 joomla +w 1 1 1 journal +w 1 1 1 jp +w 1 1 1 jpa +w 1 1 1 jpg +w 1 1 1 jre +w 1 1 1 jrun +w 1 1 1 js +w 1 1 1 jsf +w 1 1 1 json +w 1 1 1 jsp +w 1 1 1 jsso +w 1 1 1 jsx +w 1 1 1 juniper +w 1 1 1 junk +w 1 1 1 jvm +w 1 1 1 k +w 1 1 1 kboard +w 1 1 1 keep +w 1 1 1 kernel +w 1 1 1 keygen +w 1 1 1 keys +w 1 1 1 kids +w 1 1 1 kill +w 1 1 1 known_hosts +w 1 1 1 l +w 1 1 1 labs +w 1 1 1 lang +w 1 1 1 large +w 1 1 1 law +w 1 1 1 layout +w 1 1 1 layouts +w 1 1 1 ldap +w 1 1 1 leader +w 1 1 1 leaders +w 1 1 1 left +w 1 1 1 legacy +w 1 1 1 legal +w 1 1 1 lenya +w 1 1 1 letters +w 1 1 1 level +w 1 1 1 lg +w 1 1 1 lib +w 1 1 1 library +w 1 1 1 libs +w 1 1 1 license +w 1 1 1 licenses +w 1 1 1 line +w 1 1 1 link +w 1 1 1 links +w 1 1 1 linux +w 1 1 1 list +w 1 1 1 listinfo +w 1 1 1 lists +w 1 1 1 live +w 1 1 1 lo +w 1 1 1 loader +w 1 1 1 loading +w 1 1 1 loc +w 1 1 1 local +w 1 1 1 location +w 1 1 1 lock +w 1 1 1 locked +w 1 1 1 log4j +w 1 1 1 logfile +w 1 1 1 logging +w 1 1 1 login +w 1 1 1 logins +w 1 1 1 logo +w 1 1 1 logoff +w 1 1 1 logon +w 1 1 1 logos +w 1 1 1 logout +w 1 1 1 logs +w 1 1 1 lost+found +w 1 1 1 low +w 1 1 1 ls +w 1 1 1 lst +w 1 1 1 lucene +w 1 1 1 m +w 1 1 1 mac +w 1 1 1 mail +w 1 1 1 mailer +w 1 1 1 mailing +w 1 1 1 mailman +w 1 1 1 mails +w 1 1 1 main +w 1 1 1 manage +w 1 1 1 management +w 1 1 1 manager +w 1 1 1 manifest +w 1 1 1 manual +w 1 1 1 manuals +w 1 1 1 map +w 1 1 1 maps +w 1 1 1 mark +w 1 1 1 marketing +w 1 1 1 master +w 1 1 1 master.passwd +w 1 1 1 match +w 1 1 1 matrix +w 1 1 1 maven +w 1 1 1 mbox +w 1 1 1 mdb +w 1 1 1 me +w 1 1 1 media +w 1 1 1 medium +w 1 1 1 mem +w 1 1 1 member +w 1 1 1 members +w 1 1 1 membership +w 1 1 1 memory +w 1 1 1 menu +w 1 1 1 messaging +w 1 1 1 meta +w 1 1 1 microsoft +w 1 1 1 migrate +w 1 1 1 migration +w 1 1 1 mina +w 1 1 1 mirror +w 1 1 1 mirrors +w 1 1 1 misc +w 1 1 1 mission +w 1 1 1 mix +w 1 1 1 mms +w 1 1 1 mobi +w 1 1 1 mobile +w 1 1 1 mock +w 1 1 1 mod +w 1 1 1 modify +w 1 1 1 mods +w 1 1 1 module +w 1 1 1 modules +w 1 1 1 mojo +w 1 1 1 money +w 1 1 1 monitoring +w 1 1 1 more +w 1 1 1 move +w 1 1 1 movie +w 1 1 1 movies +w 1 1 1 mp +w 1 1 1 mp3 +w 1 1 1 mp3s +w 1 1 1 ms +w 1 1 1 ms-sql +w 1 1 1 msadc +w 1 1 1 msadm +w 1 1 1 msg +w 1 1 1 msie +w 1 1 1 msql +w 1 1 1 mssql +w 1 1 1 mta +w 1 1 1 multimedia +w 1 1 1 music +w 1 1 1 mx +w 1 1 1 my +w 1 1 1 myfaces +w 1 1 1 myphpnuke +w 1 1 1 mysql +w 1 1 1 mysqld +w 1 1 1 n +w 1 1 1 nav +w 1 1 1 navigation +w 1 1 1 net +w 1 1 1 netbsd +w 1 1 1 nethome +w 1 1 1 nets +w 1 1 1 network +w 1 1 1 networking +w 1 1 1 new +w 1 1 1 news +w 1 1 1 newsletter +w 1 1 1 newsletters +w 1 1 1 next +w 1 1 1 nfs +w 1 1 1 nice +w 1 1 1 nl +w 1 1 1 nobody +w 1 1 1 node +w 1 1 1 none +w 1 1 1 note +w 1 1 1 notes +w 1 1 1 notification +w 1 1 1 notifications +w 1 1 1 notified +w 1 1 1 notify +w 1 1 1 ns +w 1 1 1 nsf +w 1 1 1 nuke +w 1 1 1 nul +w 1 1 1 null +w 1 1 1 o +w 1 1 1 oa_servlets +w 1 1 1 oauth +w 1 1 1 obdc +w 1 1 1 obsolete +w 1 1 1 obsoleted +w 1 1 1 odbc +w 1 1 1 ode +w 1 1 1 oem +w 1 1 1 ofbiz +w 1 1 1 office +w 1 1 1 onbound +w 1 1 1 online +w 1 1 1 op +w 1 1 1 open +w 1 1 1 openbsd +w 1 1 1 opendir +w 1 1 1 openejb +w 1 1 1 openjpa +w 1 1 1 operations +w 1 1 1 opinion +w 1 1 1 oprocmgr-status +w 1 1 1 opt +w 1 1 1 option +w 1 1 1 options +w 1 1 1 ora +w 1 1 1 oracle +w 1 1 1 oracle.xml.xsql.XSQLServlet +w 1 1 1 order +w 1 1 1 ordered +w 1 1 1 orders +w 1 1 1 org +w 1 1 1 osc +w 1 1 1 oscommerce +w 1 1 1 other +w 1 1 1 outgoing +w 1 1 1 outline +w 1 1 1 output +w 1 1 1 outreach +w 1 1 1 overview +w 1 1 1 owa +w 1 1 1 ows +w 1 1 1 ows-bin +w 1 1 1 p +w 1 1 1 p2p +w 1 1 1 pack +w 1 1 1 packages +w 1 1 1 page +w 1 1 1 page1 +w 1 1 1 page2 +w 1 1 1 page_1 +w 1 1 1 page_2 +w 1 1 1 pages +w 1 1 1 paid +w 1 1 1 panel +w 1 1 1 paper +w 1 1 1 papers +w 1 1 1 parse +w 1 1 1 partner +w 1 1 1 partners +w 1 1 1 party +w 1 1 1 pass +w 1 1 1 passwd +w 1 1 1 password +w 1 1 1 passwords +w 1 1 1 past +w 1 1 1 patch +w 1 1 1 patches +w 1 1 1 paypal +w 1 1 1 pc +w 1 1 1 pci +w 1 1 1 pda +w 1 1 1 pdf +w 1 1 1 pdfs +w 1 1 1 peek +w 1 1 1 pending +w 1 1 1 people +w 1 1 1 perf +w 1 1 1 performance +w 1 1 1 perl +w 1 1 1 personal +w 1 1 1 pg +w 1 1 1 phf +w 1 1 1 phone +w 1 1 1 phones +w 1 1 1 phorum +w 1 1 1 photo +w 1 1 1 photos +w 1 1 1 php +w 1 1 1 php3 +w 1 1 1 phpBB +w 1 1 1 phpBB2 +w 1 1 1 phpEventCalendar +w 1 1 1 phpMyAdmin +w 1 1 1 phpbb +w 1 1 1 phpmyadmin +w 1 1 1 phpnuke +w 1 1 1 phps +w 1 1 1 pic +w 1 1 1 pics +w 1 1 1 pictures +w 1 1 1 pii +w 1 1 1 ping +w 1 1 1 pipermail +w 1 1 1 piranha +w 1 1 1 pix +w 1 1 1 pixel +w 1 1 1 pkg +w 1 1 1 pkgs +w 1 1 1 plain +w 1 1 1 play +w 1 1 1 pls +w 1 1 1 plugin +w 1 1 1 plugins +w 1 1 1 pm +w 1 1 1 png +w 1 1 1 poi +w 1 1 1 policies +w 1 1 1 policy +w 1 1 1 politics +w 1 1 1 poll +w 1 1 1 polls +w 1 1 1 pop +w 1 1 1 pop3 +w 1 1 1 porn +w 1 1 1 port +w 1 1 1 portal +w 1 1 1 portals +w 1 1 1 portfolio +w 1 1 1 pos +w 1 1 1 post +w 1 1 1 posted +w 1 1 1 postgres +w 1 1 1 postgresql +w 1 1 1 postnuke +w 1 1 1 postpaid +w 1 1 1 posts +w 1 1 1 ppt +w 1 1 1 pr +w 1 1 1 pr0n +w 1 1 1 premium +w 1 1 1 prepaid +w 1 1 1 presentation +w 1 1 1 presentations +w 1 1 1 preserve +w 1 1 1 press +w 1 1 1 preview +w 1 1 1 previews +w 1 1 1 previous +w 1 1 1 pricing +w 1 1 1 print +w 1 1 1 printenv +w 1 1 1 printer +w 1 1 1 printers +w 1 1 1 priv +w 1 1 1 privacy +w 1 1 1 private +w 1 1 1 problems +w 1 1 1 proc +w 1 1 1 procedures +w 1 1 1 prod +w 1 1 1 product +w 1 1 1 product_info +w 1 1 1 production +w 1 1 1 products +w 1 1 1 profile +w 1 1 1 profiles +w 1 1 1 profiling +w 1 1 1 program +w 1 1 1 programming +w 1 1 1 programs +w 1 1 1 project +w 1 1 1 projects +w 1 1 1 promo +w 1 1 1 prop +w 1 1 1 properties +w 1 1 1 property +w 1 1 1 props +w 1 1 1 protect +w 1 1 1 proto +w 1 1 1 proxies +w 1 1 1 proxy +w 1 1 1 prv +w 1 1 1 ps +w 1 1 1 psql +w 1 1 1 pt +w 1 1 1 pub +w 1 1 1 public +w 1 1 1 publication +w 1 1 1 publications +w 1 1 1 pubs +w 1 1 1 pull +w 1 1 1 purchase +w 1 1 1 purchases +w 1 1 1 purchasing +w 1 1 1 push +w 1 1 1 pw +w 1 1 1 pwd +w 1 1 1 py +w 1 1 1 python +w 1 1 1 q +w 1 1 1 qpid +w 1 1 1 queries +w 1 1 1 query +w 1 1 1 queue +w 1 1 1 queues +w 1 1 1 quote +w 1 1 1 quotes +w 1 1 1 r +w 1 1 1 radio +w 1 1 1 random +w 1 1 1 rar +w 1 1 1 rdf +w 1 1 1 read +w 1 1 1 readme +w 1 1 1 realestate +w 1 1 1 receive +w 1 1 1 received +w 1 1 1 recharge +w 1 1 1 record +w 1 1 1 records +w 1 1 1 recovery +w 1 1 1 recycle +w 1 1 1 recycled +w 1 1 1 redir +w 1 1 1 redirect +w 1 1 1 reference +w 1 1 1 reg +w 1 1 1 register +w 1 1 1 registered +w 1 1 1 registration +w 1 1 1 registrations +w 1 1 1 release +w 1 1 1 releases +w 1 1 1 remote +w 1 1 1 remove +w 1 1 1 removed +w 1 1 1 render +w 1 1 1 rendered +w 1 1 1 rep +w 1 1 1 repl +w 1 1 1 replica +w 1 1 1 replicas +w 1 1 1 replicate +w 1 1 1 replicated +w 1 1 1 replication +w 1 1 1 replicator +w 1 1 1 reply +w 1 1 1 report +w 1 1 1 reporting +w 1 1 1 reports +w 1 1 1 reprints +w 1 1 1 req +w 1 1 1 reqs +w 1 1 1 request +w 1 1 1 requests +w 1 1 1 requisition +w 1 1 1 requisitions +w 1 1 1 res +w 1 1 1 research +w 1 1 1 resin +w 1 1 1 resource +w 1 1 1 resources +w 1 1 1 rest +w 1 1 1 restore +w 1 1 1 restored +w 1 1 1 restricted +w 1 1 1 results +w 1 1 1 retail +w 1 1 1 reverse +w 1 1 1 reversed +w 1 1 1 revert +w 1 1 1 reverted +w 1 1 1 review +w 1 1 1 reviews +w 1 1 1 right +w 1 1 1 roam +w 1 1 1 roaming +w 1 1 1 robot +w 1 1 1 robots +w 1 1 1 roller +w 1 1 1 room +w 1 1 1 root +w 1 1 1 rpc +w 1 1 1 rss +w 1 1 1 rtf +w 1 1 1 ru +w 1 1 1 rule +w 1 1 1 rules +w 1 1 1 run +w 1 1 1 rwservlet +w 1 1 1 s +w 1 1 1 sale +w 1 1 1 sales +w 1 1 1 sam +w 1 1 1 samba +w 1 1 1 sample +w 1 1 1 samples +w 1 1 1 sav +w 1 1 1 save +w 1 1 1 saved +w 1 1 1 saves +w 1 1 1 sbin +w 1 1 1 scan +w 1 1 1 scanned +w 1 1 1 scans +w 1 1 1 sched +w 1 1 1 schedule +w 1 1 1 scheduled +w 1 1 1 scheduling +w 1 1 1 schema +w 1 1 1 science +w 1 1 1 screen +w 1 1 1 screens +w 1 1 1 screenshot +w 1 1 1 screenshots +w 1 1 1 script +w 1 1 1 scriptlet +w 1 1 1 scriptlets +w 1 1 1 scripts +w 1 1 1 sdk +w 1 1 1 se +w 1 1 1 search +w 1 1 1 sec +w 1 1 1 secret +w 1 1 1 section +w 1 1 1 sections +w 1 1 1 secure +w 1 1 1 secured +w 1 1 1 security +w 1 1 1 seed +w 1 1 1 sell +w 1 1 1 send +w 1 1 1 sendmail +w 1 1 1 sendto +w 1 1 1 sent +w 1 1 1 serial +w 1 1 1 serv +w 1 1 1 serve +w 1 1 1 server +w 1 1 1 server-info +w 1 1 1 server-status +w 1 1 1 servers +w 1 1 1 service +w 1 1 1 services +w 1 1 1 servlet +w 1 1 1 servlets +w 1 1 1 session +w 1 1 1 sessions +w 1 1 1 setting +w 1 1 1 settings +w 1 1 1 setup +w 1 1 1 sh +w 1 1 1 share +w 1 1 1 shared +w 1 1 1 shares +w 1 1 1 shell +w 1 1 1 ship +w 1 1 1 shipped +w 1 1 1 shipping +w 1 1 1 shop +w 1 1 1 shopper +w 1 1 1 shopping +w 1 1 1 shops +w 1 1 1 shoutbox +w 1 1 1 show +w 1 1 1 show_post +w 1 1 1 show_thread +w 1 1 1 showcat +w 1 1 1 showenv +w 1 1 1 showjobs +w 1 1 1 showmap +w 1 1 1 showmsg +w 1 1 1 showpost +w 1 1 1 showthread +w 1 1 1 shtml +w 1 1 1 sign +w 1 1 1 signoff +w 1 1 1 signon +w 1 1 1 signup +w 1 1 1 simple +w 1 1 1 sink +w 1 1 1 site +w 1 1 1 site-map +w 1 1 1 site_map +w 1 1 1 sitemap +w 1 1 1 sites +w 1 1 1 skel +w 1 1 1 skin +w 1 1 1 skins +w 1 1 1 skip +w 1 1 1 sl +w 1 1 1 sling +w 1 1 1 sm +w 1 1 1 small +w 1 1 1 sms +w 1 1 1 smtp +w 1 1 1 snoop +w 1 1 1 so +w 1 1 1 soap +w 1 1 1 soaprouter +w 1 1 1 soft +w 1 1 1 software +w 1 1 1 solaris +w 1 1 1 sold +w 1 1 1 solution +w 1 1 1 solutions +w 1 1 1 source +w 1 1 1 sources +w 1 1 1 soutbox +w 1 1 1 sox +w 1 1 1 sp +w 1 1 1 space +w 1 1 1 spacer +w 1 1 1 spam +w 1 1 1 special +w 1 1 1 specials +w 1 1 1 sponsor +w 1 1 1 sponsors +w 1 1 1 spool +w 1 1 1 sport +w 1 1 1 sports +w 1 1 1 sqlnet +w 1 1 1 squirrel +w 1 1 1 squirrelmail +w 1 1 1 src +w 1 1 1 srv +w 1 1 1 ss +w 1 1 1 ssh +w 1 1 1 ssi +w 1 1 1 ssl +w 1 1 1 sslvpn +w 1 1 1 ssn +w 1 1 1 sso +w 1 1 1 stackdump +w 1 1 1 staff +w 1 1 1 staging +w 1 1 1 standard +w 1 1 1 standards +w 1 1 1 star +w 1 1 1 start +w 1 1 1 stat +w 1 1 1 statement +w 1 1 1 statements +w 1 1 1 static +w 1 1 1 staticpages +w 1 1 1 statistic +w 1 1 1 statistics +w 1 1 1 stats +w 1 1 1 status +w 1 1 1 stock +w 1 1 1 storage +w 1 1 1 store +w 1 1 1 stored +w 1 1 1 stories +w 1 1 1 story +w 1 1 1 strut +w 1 1 1 struts +w 1 1 1 student +w 1 1 1 students +w 1 1 1 stuff +w 1 1 1 style +w 1 1 1 styles +w 1 1 1 submissions +w 1 1 1 submit +w 1 1 1 subscribe +w 1 1 1 subscriber +w 1 1 1 subscribers +w 1 1 1 subscription +w 1 1 1 subscriptions +w 1 1 1 success +w 1 1 1 suite +w 1 1 1 suites +w 1 1 1 sun +w 1 1 1 sunos +w 1 1 1 super +w 1 1 1 support +w 1 1 1 surf +w 1 1 1 survey +w 1 1 1 surveys +w 1 1 1 swf +w 1 1 1 sws +w 1 1 1 synapse +w 1 1 1 sync +w 1 1 1 synced +w 1 1 1 sys +w 1 1 1 system +w 1 1 1 systems +w 1 1 1 sysuser +w 1 1 1 t +w 1 1 1 tag +w 1 1 1 tags +w 1 1 1 tape +w 1 1 1 tapes +w 1 1 1 tapestry +w 1 1 1 tar +w 1 1 1 tar.bz2 +w 1 1 1 tar.gz +w 1 1 1 tb +w 1 1 1 tcl +w 1 1 1 team +w 1 1 1 tech +w 1 1 1 technical +w 1 1 1 technology +w 1 1 1 tel +w 1 1 1 tele +w 1 1 1 templ +w 1 1 1 template +w 1 1 1 templates +w 1 1 1 terms +w 1 1 1 test-cgi +w 1 1 1 test-env +w 1 1 1 test1 +w 1 1 1 test123 +w 1 1 1 test1234 +w 1 1 1 test2 +w 1 1 1 test3 +w 1 1 1 testimonial +w 1 1 1 testimonials +w 1 1 1 testing +w 1 1 1 tests +w 1 1 1 texis +w 1 1 1 text +w 1 1 1 texts +w 1 1 1 tgz +w 1 1 1 theme +w 1 1 1 themes +w 1 1 1 thread +w 1 1 1 threads +w 1 1 1 thumb +w 1 1 1 thumbnail +w 1 1 1 thumbnails +w 1 1 1 thumbs +w 1 1 1 tickets +w 1 1 1 tiki +w 1 1 1 tiles +w 1 1 1 tip +w 1 1 1 tips +w 1 1 1 title +w 1 1 1 tls +w 1 1 1 tmpl +w 1 1 1 tmps +w 1 1 1 tn +w 1 1 1 toc +w 1 1 1 todo +w 1 1 1 toggle +w 1 1 1 tomcat +w 1 1 1 tool +w 1 1 1 toolbar +w 1 1 1 toolkit +w 1 1 1 tools +w 1 1 1 top +w 1 1 1 topic +w 1 1 1 topics +w 1 1 1 torrent +w 1 1 1 torrents +w 1 1 1 tos +w 1 1 1 tour +w 1 1 1 tpl +w 1 1 1 tpv +w 1 1 1 tr +w 1 1 1 trace +w 1 1 1 traceroute +w 1 1 1 traces +w 1 1 1 track +w 1 1 1 trackback +w 1 1 1 tracker +w 1 1 1 trackers +w 1 1 1 tracking +w 1 1 1 tracks +w 1 1 1 traffic +w 1 1 1 trailer +w 1 1 1 trailers +w 1 1 1 training +w 1 1 1 trans +w 1 1 1 transparent +w 1 1 1 transport +w 1 1 1 trash +w 1 1 1 travel +w 1 1 1 treasury +w 1 1 1 tree +w 1 1 1 trees +w 1 1 1 trial +w 1 1 1 trunk +w 1 1 1 tsweb +w 1 1 1 tt +w 1 1 1 turbine +w 1 1 1 tuscany +w 1 1 1 tutorial +w 1 1 1 tutorials +w 1 1 1 tv +w 1 1 1 tweak +w 1 1 1 type +w 1 1 1 typo3 +w 1 1 1 typo3conf +w 1 1 1 u +w 1 1 1 ubb +w 1 1 1 uds +w 1 1 1 uk +w 1 1 1 umts +w 1 1 1 union +w 1 1 1 unix +w 1 1 1 unlock +w 1 1 1 unreg +w 1 1 1 unregister +w 1 1 1 up +w 1 1 1 upd +w 1 1 1 update +w 1 1 1 updated +w 1 1 1 updater +w 1 1 1 updates +w 1 1 1 upload +w 1 1 1 uploads +w 1 1 1 url +w 1 1 1 us +w 1 1 1 usa +w 1 1 1 usage +w 1 1 1 user +w 1 1 1 userlog +w 1 1 1 users +w 1 1 1 usr +w 1 1 1 util +w 1 1 1 utilities +w 1 1 1 utility +w 1 1 1 utils +w 1 1 1 v +w 1 1 1 v1 +w 1 1 1 v2 +w 1 1 1 var +w 1 1 1 vault +w 1 1 1 vb +w 1 1 1 vbs +w 1 1 1 vector +w 1 1 1 velocity +w 1 1 1 vendor +w 1 1 1 ver +w 1 1 1 ver1 +w 1 1 1 ver2 +w 1 1 1 version +w 1 1 1 vfs +w 1 1 1 video +w 1 1 1 videos +w 1 1 1 view +w 1 1 1 view-source +w 1 1 1 viewcvs +w 1 1 1 viewforum +w 1 1 1 viewonline +w 1 1 1 views +w 1 1 1 viewsource +w 1 1 1 viewsvn +w 1 1 1 viewtopic +w 1 1 1 viewvc +w 1 1 1 virtual +w 1 1 1 vm +w 1 1 1 voip +w 1 1 1 vol +w 1 1 1 vpn +w 1 1 1 w +w 1 1 1 w3 +w 1 1 1 w3c +w 1 1 1 wa +w 1 1 1 wap +w 1 1 1 war +w 1 1 1 warez +w 1 1 1 way-board +w 1 1 1 wbboard +w 1 1 1 wc +w 1 1 1 weather +w 1 1 1 web +w 1 1 1 web-beans +w 1 1 1 web-console +w 1 1 1 webaccess +w 1 1 1 webadmin +w 1 1 1 webagent +w 1 1 1 webalizer +w 1 1 1 webapp +w 1 1 1 webb +w 1 1 1 webbbs +w 1 1 1 webboard +w 1 1 1 webcalendar +w 1 1 1 webcart +w 1 1 1 webcasts +w 1 1 1 webcgi +w 1 1 1 webchat +w 1 1 1 webdata +w 1 1 1 webdav +w 1 1 1 weblog +w 1 1 1 weblogic +w 1 1 1 weblogs +w 1 1 1 webmail +w 1 1 1 webplus +w 1 1 1 webshop +w 1 1 1 website +w 1 1 1 websphere +w 1 1 1 webstats +w 1 1 1 websvn +w 1 1 1 webwork +w 1 1 1 welcome +w 1 1 1 whitepapers +w 1 1 1 whois +w 1 1 1 whosonline +w 1 1 1 wicket +w 1 1 1 wiki +w 1 1 1 win +w 1 1 1 win32 +w 1 1 1 windows +w 1 1 1 winnt +w 1 1 1 wireless +w 1 1 1 wml +w 1 1 1 word +w 1 1 1 wordpress +w 1 1 1 work +w 1 1 1 working +w 1 1 1 world +w 1 1 1 wp +w 1 1 1 wp-content +w 1 1 1 wp-includes +w 1 1 1 wp-login +w 1 1 1 wrap +w 1 1 1 ws +w 1 1 1 ws-client +w 1 1 1 ws_ftp +w 1 1 1 wtai +w 1 1 1 www +w 1 1 1 www-sql +w 1 1 1 www1 +w 1 1 1 www2 +w 1 1 1 www3 +w 1 1 1 wwwboard +w 1 1 1 wwwroot +w 1 1 1 wwwstats +w 1 1 1 wwwthreads +w 1 1 1 wwwuser +w 1 1 1 x +w 1 1 1 xalan +w 1 1 1 xerces +w 1 1 1 xhtml +w 1 1 1 xls +w 1 1 1 xmlrpc +w 1 1 1 xsl +w 1 1 1 xslt +w 1 1 1 xsql +w 1 1 1 xxx +w 1 1 1 xyzzy +w 1 1 1 y +w 1 1 1 yahoo +w 1 1 1 youtube +w 1 1 1 yt +w 1 1 1 z +w 1 1 1 zboard +w 1 1 1 zend +w 1 1 1 zero +w 1 1 1 zipfiles +w 1 1 1 zips +w 1 1 1 zope +w 1 1 1 zorum +w 1 1 1 ~admin +w 1 1 1 ~apache +w 1 1 1 ~bin +w 1 1 1 ~bob +w 1 1 1 ~ftp +w 1 1 1 ~guest +w 1 1 1 ~http +w 1 1 1 ~httpd +w 1 1 1 ~john +w 1 1 1 ~log +w 1 1 1 ~logs +w 1 1 1 ~lp +w 1 1 1 ~mark +w 1 1 1 ~matt +w 1 1 1 ~nobody +w 1 1 1 ~root +w 1 1 1 ~test +w 1 1 1 ~tmp +w 1 1 1 ~www diff --git a/http_client.c b/http_client.c new file mode 100644 index 0000000..155d00f --- /dev/null +++ b/http_client.c @@ -0,0 +1,2455 @@ +/* + skipfish - high-performance, single-process asynchronous HTTP client + -------------------------------------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "types.h" +#include "alloc-inl.h" +#include "string-inl.h" +#include "database.h" + +#include "http_client.h" + +/* Assorted exported settings: */ + +u32 max_connections = MAX_CONNECTIONS, + max_conn_host = MAX_CONN_HOST, + max_requests = MAX_REQUESTS, + max_fail = MAX_FAIL, + idle_tmout = IDLE_TMOUT, + resp_tmout = RESP_TMOUT, + rw_tmout = RW_TMOUT, + size_limit = SIZE_LIMIT; + +u8 browser_type = BROWSER_FAST; +u8 auth_type = AUTH_NONE; + +struct param_array global_http_par; + +/* Counters: */ + +u32 req_errors_net, + req_errors_http, + req_errors_cur, + req_count, + req_dropped, + queue_cur, + conn_cur, + conn_count, + conn_idle_tmout, + conn_busy_tmout, + conn_failed, + req_retried, + url_scope; + +u64 bytes_sent, + bytes_recv, + bytes_deflated, + bytes_inflated; + +u8 *auth_user, + *auth_pass; + +u8 ignore_cookies; + +/* Internal globals for queue management: */ + +static struct queue_entry* queue; +static struct conn_entry* conn; +static struct dns_entry* dns; + +#ifdef QUEUE_FILO +static struct queue_entry* q_tail; +#endif /* QUEUE_FILO */ + +static u8 tear_down_idle; + + +/* Extracts parameter value from param_array. Name is matched if + non-NULL. Returns pointer to value data, not a duplicate string; + NULL if no match found. */ + +u8* get_value(u8 type, u8* name, u32 offset, + struct param_array* par) { + + u32 i, coff = 0; + + for (i=0;ic;i++) { + if (type != par->t[i]) continue; + if (name && strcasecmp((char*)par->n[i], (char*)name)) continue; + if (offset != coff) { coff++; continue; } + return par->v[i]; + } + + return NULL; + +} + + +/* Inserts or overwrites parameter value in param_array. If offset + == -1, will append parameter to list. Duplicates strings, + name and val can be NULL. */ + +void set_value(u8 type, u8* name, u8* val, + s32 offset, struct param_array* par) { + + u32 i, coff = 0, matched = -1; + + /* If offset specified, try to find an entry to replace. */ + + if (offset >= 0) + for (i=0;ic;i++) { + if (type != par->t[i]) continue; + if (name && strcasecmp((char*)par->n[i], (char*)name)) continue; + if (offset != coff) { coff++; continue; } + matched = i; + break; + } + + if (matched == -1) { + + /* No offset or no match - append to the end of list. */ + + par->t = ck_realloc(par->t, (par->c + 1) * sizeof(u8)); + par->n = ck_realloc(par->n, (par->c + 1) * sizeof(u8*)); + par->v = ck_realloc(par->v, (par->c + 1) * sizeof(u8*)); + par->t[par->c] = type; + par->n[par->c] = ck_strdup(name); + par->v[par->c] = ck_strdup(val); + par->c++; + + } else { + + /* Matched - replace name & value. */ + + ck_free(par->n[matched]); + ck_free(par->v[matched]); + par->n[matched] = ck_strdup(name); + par->v[matched] = ck_strdup(val); + + } + +} + + +/* Convert a fully-qualified or relative URL string to a proper http_request + representation. Returns 0 on success, 1 on format error. */ + +u8 parse_url(u8* url, struct http_request* req, struct http_request* ref) { + + u8* cur = url; + u32 maybe_proto = strcspn((char*)url, ":/?#@"); + u8 has_host = 0, add_slash = 1; + + if (strlen((char*)url) > MAX_URL_LEN) return 1; + req->orig_url = ck_strdup(url); + + /* Interpret, skip protocol string if the URL seems to be fully-qualified; + otherwise, copy from referring URL. We could be stricter here, as + browsers bail out on seemingly invalid chars in proto names, but... */ + + if (maybe_proto && url[maybe_proto] == ':') { + + if (!strncasecmp((char*)url, "http:", 5)) { + req->proto = PROTO_HTTP; + cur += 5; + } else if (!strncasecmp((char*)url, "https:", 6)) { + req->proto = PROTO_HTTPS; + cur += 6; + } else return 1; + + } else { + + if (!ref || !ref->proto) return 1; + req->proto = ref->proto; + + } + + /* Interpret, skip //[login[:pass@](\[ipv4\]|\[ipv6\]|host)[:port] part of the + URL, if present. Note that "http:blarg" is a valid relative URL to most + browsers, and "//example.com/blarg" is a valid non-FQDN absolute one. + We need to mimick this, which complicates the code a bit. */ + + if (cur[0] == '/' && cur[1] == '/') { + + u32 path_st; + u8 *at_sign, *host, *x; + u8 has_utf = 0; + + cur += 2; + + /* Detect, skip login[:pass]@; we only use cmdline-supplied credentials or + wordlists into account. Be sure to report any embedded auth, though. */ + + at_sign = (u8*)strchr((char*)cur, '@'); + path_st = strcspn((char*)cur, ":/?#"); + + if (at_sign && path_st > (at_sign - cur)) { + cur = at_sign + 1; + if (!req->pivot) return 1; + problem(PROB_URL_AUTH, ref, 0, url, req->pivot, 0); + } + + /* No support for IPv6 or [ip] notation for now, so let's just refuse to + parse the URL. Also, refuse excessively long domain names for sanity. */ + + if (*cur == '[') return 1; + if (path_st > MAX_DNS_LEN) return 1; + + x = host = ck_memdup(cur, path_st + 1); + host[path_st] = 0; + + /* Scan, normalize extracted host name. */ + + while (*x) { + + switch (*x) { + + case 'A' ... 'Z': + *x = tolower(*x); + break; + + case 'a' ... 'z': + case '0' ... '9': + case '.': + case '-': + case '_': + break; + + case 0x80 ... 0xff: + has_utf = 1; + break; + + default: + /* Uh-oh, invalid characters in a host name - abandon ship. */ + return 1; + + } + + x++; + + } + + /* Host names that contained high bits need to be converted to Punycode + in order to resolve properly. */ + + if (has_utf) { + + char* output = 0; + + if (idna_to_ascii_8z((char*)host, &output, 0) != IDNA_SUCCESS || + strlen(output) > MAX_DNS_LEN) { + ck_free(output); + return 1; + } + + ck_free(host); + host = (u8*)output; + + } + + req->host = host; + cur += path_st; + + /* All right, moving on: if host name is followed by :, let's try to + parse and validate port number; otherwise, assume 80 / 443, depending + on protocol. */ + + if (*cur == ':') { + + u32 digit_cnt = strspn((char*)++cur, "0123456789"); + u32 port = atoi((char*)cur); + if (!digit_cnt || (cur[digit_cnt] && !strchr("/?#", cur[digit_cnt]))) + return 1; + req->port = port; + cur += digit_cnt; + + } else { + + if (req->proto == PROTO_HTTPS) req->port = 443; else req->port = 80; + + } + + has_host = 1; + + } else { + + /* No host name found - copy from referring request instead. */ + + if (!ref || !ref->host) return 1; + + req->host = ck_strdup(ref->host); + req->addr = ref->addr; + req->port = ref->port; + + } + + if (!*cur || *cur == '#') { + u32 i; + + /* No-op path. If the URL does not specify host (e.g., #foo), copy + everything from referring request, call it a day. Otherwise + (e.g., http://example.com#foo), let tokenize_path() run to + add NULL-"" entry to the list. */ + + if (!has_host) { + for (i=0;ipar.c;i++) + if (PATH_SUBTYPE(ref->par.t[i]) || QUERY_SUBTYPE(ref->par.t[i])) + set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par); + return 0; + } + + } + + if (!has_host && *cur == '?') { + u32 i; + + /* URL begins with ? and does not specify host (e.g., ?foo=bar). Copy all + path segments, but no query, then fall through to parse the query + string. */ + + for (i=0;ipar.c;i++) + if (PATH_SUBTYPE(ref->par.t[i])) + set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par); + + /* In this case, we do not want tokenize_path() to tinker with the path + in any way. */ + + add_slash = 0; + + } else if (!has_host && *cur != '/') { + + /* The URL does not begin with / or ?, and does not specify host (e.g., + foo/bar?baz). Copy path from referrer, but drop the last "proper" + path segment and everything that follows it. This mimicks browser + behavior (for URLs ending with /, it just drops the final NULL-"" + pair). */ + + u32 i; + u32 path_cnt = 0, path_cur = 0; + + for (i=0;ipar.c;i++) + if (ref->par.t[i] == PARAM_PATH) path_cnt++; + + for (i=0;ipar.c;i++) { + if (ref->par.t[i] == PARAM_PATH) path_cur++; + if (path_cur < path_cnt && PATH_SUBTYPE(ref->par.t[i])) + set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par); + } + + } + + /* Tokenize the remaining path on top of what we parsed / copied over. */ + + tokenize_path(cur, req, add_slash); + return 0; + +} + + +/* URL-decodes a string. 'Plus' parameter governs the behavior on + + signs (as they have a special meaning only in query params, not in path). */ + +u8* url_decode_token(u8* str, u32 len, u8 plus) { + u8 *ret = ck_alloc(len + 1); + u8 *src = str, *dst = ret; + char *hex_str = "0123456789abcdef"; + + while (len--) { + u8 c = *(src++); + char *f, *s; + + if (plus && c == '+') c = ' '; + + if (c == '%' && len >= 2 && + (f = strchr(hex_str, tolower(src[0]))) && + (s = strchr(hex_str, tolower(src[1])))) { + c = ((f - hex_str) << 4) | (s - hex_str); + src += 2; len -= 2; + } + + /* We can't handle NUL-terminators gracefully when deserializing request + parameters, because param_array values are NUL-terminated themselves. + Let's encode \0 as \xFF instead, and hope nobody notices. */ + + if (!c) c = 0xff; + + *(dst++) = c; + + } + + *(dst++) = 0; + + ret = ck_realloc(ret, dst - ret); + + return ret; +} + + +/* URL-encodes a string according to custom rules. The assumption here is that + the data is already tokenized as "special" boundaries such as ?, =, &, /, + ;, !, $, and , so these characters must always be escaped if present in + tokens. We otherwise let pretty much everything else go through, as it + may help with the exploitation of certain vulnerabilities. */ + +u8* url_encode_token(u8* str, u32 len) { + + u8 *ret = ck_alloc(len * 3 + 1); + u8 *src = str, *dst = ret; + + while (len--) { + u8 c = *(src++); + + if (c <= 0x20 || c >= 0x80 || strchr("#%&=/+;,!$?", c)) { + if (c == 0xFF) c = 0; + sprintf((char*)dst, "%%%02X", c); + dst += 3; + } else *(dst++) = c; + + } + + *(dst++) = 0; + + ret = ck_realloc(ret, dst - ret); + + return ret; + +} + + +/* Split path at known "special" character boundaries, URL decode values, + then put them in the provided http_request struct. */ + +void tokenize_path(u8* str, struct http_request* req, u8 add_slash) { + + u8* cur; + u8 know_dir = 0; + + while (*str == '/') str++; + cur = str; + + /* Parse path elements first. */ + + while (*cur && !strchr("?#", *cur)) { + + u32 next_seg, next_eq; + + u8 *name = NULL, *value = NULL; + u8 first_el = (str == cur); + + if (first_el || *cur == '/') { + + /* Optimize out //, /\0, /./, and /.\0. They do indicate + we are looking at a directory, so mark this. */ + + if (!first_el && (cur[1] == '/' || !cur[1])) { + cur++; + know_dir = 1; + continue; + } + + if (cur[0 + !first_el] == '.' && (cur[1 + !first_el] == '/' || + !cur[1 + !first_el])) { + cur += 1 + !first_el; + know_dir = 1; + continue; + } + + /* If we encountered /../ or /..\0, remove everything up to and + including the last "true" path element. It's also indicative + of a directory, by the way. */ + + if (cur[0 + !first_el] == '.' && cur[1 + !first_el] == '.' && + (cur[2 + !first_el] == '/' || !cur[2 + !first_el])) { + + u32 i, last_p = req->par.c; + + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_PATH) last_p = i; + + for (i=last_p;ipar.c;i++) { + req->par.t[i] = PARAM_NONE; + } + + cur += 2 + !first_el; + know_dir = 1; + continue; + + } + + } + + /* If we're here, we have an actual item to add; cur points to + the string if it's the first element, or to field separator + if one of the subsequent ones. */ + + next_seg = strcspn((char*)cur + 1, "/;,!$?#") + 1, + next_eq = strcspn((char*)cur + 1, "=/;,!$?#") + 1; + know_dir = 0; + + if (next_eq < next_seg) { + name = url_decode_token(cur + !first_el, next_eq - !first_el, 0); + value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 0); + } else { + value = url_decode_token(cur + !first_el, next_seg - !first_el, 0); + } + + switch (first_el ? '/' : *cur) { + + case ';': set_value(PARAM_PATH_S, name, value, -1, &req->par); break; + case ',': set_value(PARAM_PATH_C, name, value, -1, &req->par); break; + case '!': set_value(PARAM_PATH_E, name, value, -1, &req->par); break; + case '$': set_value(PARAM_PATH_D, name, value, -1, &req->par); break; + default: set_value(PARAM_PATH, name, value, -1, &req->par); + + } + + ck_free(name); + ck_free(value); + + cur += next_seg; + + } + + /* If the last segment was /, /./, or /../, *or* if we never added + anything to the path to begin with, we want to store a NULL-"" + entry to denote it's a directory. */ + + if (know_dir || (add_slash && (!*str || strchr("?#", *str)))) + set_value(PARAM_PATH, NULL, (u8*)"", -1, &req->par); + + /* Deal with regular query parameters now. This is much simpler, + obviously. */ + + while (*cur && !strchr("#", *cur)) { + + u32 next_seg = strcspn((char*)cur + 1, "#&;,!$") + 1; + u32 next_eq = strcspn((char*)cur + 1, "=#&;,!$") + 1; + u8 *name = NULL, *value = NULL; + + /* foo=bar syntax... */ + + if (next_eq < next_seg) { + name = url_decode_token(cur + 1, next_eq - 1, 1); + value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 1); + } else { + value = url_decode_token(cur + 1, next_seg - 1, 1); + } + + switch (*cur) { + + case ';': set_value(PARAM_QUERY_S, name, value, -1, &req->par); break; + case ',': set_value(PARAM_QUERY_C, name, value, -1, &req->par); break; + case '!': set_value(PARAM_QUERY_E, name, value, -1, &req->par); break; + case '$': set_value(PARAM_QUERY_D, name, value, -1, &req->par); break; + default: set_value(PARAM_QUERY, name, value, -1, &req->par); + + } + + ck_free(name); + ck_free(value); + + cur += next_seg; + + } + +} + + +/* Reconstructs URI from http_request data. Includes protocol and host + if with_host is non-zero. */ + +u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post) { + u32 i, cur_pos; + u8 got_search = 0; + u8* ret; + + NEW_STR(ret, cur_pos); + +#define ASD(_p3) ADD_STR_DATA(ret, cur_pos, _p3) + + /* For human-readable uses... */ + + if (with_host) { + ASD("http"); + if (req->proto == PROTO_HTTPS) ASD("s"); + ASD("://"); + ASD(req->host); + + if ((req->proto == PROTO_HTTP && req->port != 80) || + (req->proto == PROTO_HTTPS && req->port != 443)) { + u8 port[7]; + sprintf((char*)port, ":%u", req->port); + ASD(port); + } + + } + + /* First print path... */ + + for (i=0;ipar.c;i++) + if (PATH_SUBTYPE(req->par.t[i])) { + + switch (req->par.t[i]) { + + case PARAM_PATH_S: ASD(";"); break; + case PARAM_PATH_C: ASD(","); break; + case PARAM_PATH_E: ASD("!"); break; + case PARAM_PATH_D: ASD("$"); break; + default: ASD("/"); + + } + + if (req->par.n[i]) { + u32 len = strlen((char*)req->par.n[i]); + u8* str = url_encode_token(req->par.n[i], len); + ASD(str); ASD("="); + ck_free(str); + } + if (req->par.v[i]) { + u32 len = strlen((char*)req->par.v[i]); + u8* str = url_encode_token(req->par.v[i], len); + ASD(str); + ck_free(str); + } + + } + + /* Then actual parameters. */ + + for (i=0;ipar.c;i++) + if (QUERY_SUBTYPE(req->par.t[i])) { + + if (!got_search) { + ASD("?"); + got_search = 1; + } else switch (req->par.t[i]) { + + case PARAM_QUERY_S: ASD(";"); break; + case PARAM_QUERY_C: ASD(","); break; + case PARAM_QUERY_E: ASD("!"); break; + case PARAM_QUERY_D: ASD("$"); break; + default: ASD("&"); + + } + + if (req->par.n[i]) { + u32 len = strlen((char*)req->par.n[i]); + u8* str = url_encode_token(req->par.n[i], len); + ASD(str); ASD("="); + ck_free(str); + } + if (req->par.v[i]) { + u32 len = strlen((char*)req->par.v[i]); + u8* str = url_encode_token(req->par.v[i], len); + ASD(str); + ck_free(str); + } + + } + + got_search = 0; + + if (with_post) + for (i=0;ipar.c;i++) + if (POST_SUBTYPE(req->par.t[i])) { + + if (!got_search) { + ASD(" POST: "); + got_search = 1; + } else ASD("&"); + + if (req->par.n[i]) { + u32 len = strlen((char*)req->par.n[i]); + u8* str = url_encode_token(req->par.n[i], len); + ASD(str); ASD("="); + ck_free(str); + } + if (req->par.v[i]) { + u32 len = strlen((char*)req->par.v[i]); + u8* str = url_encode_token(req->par.v[i], len); + ASD(str); + ck_free(str); + } + + } + +#undef ASD + + TRIM_STR(ret, cur_pos); + return ret; + +} + + +/* Looks up IP for a particular host, returns data in network order. + Uses standard resolver, so it is slow and blocking, but we only + expect to call it a couple of times during a typical assessment. + There are some good async DNS libraries to consider in the long run. */ + +u32 maybe_lookup_host(u8* name) { + struct hostent* h; + struct dns_entry *d = dns, *prev = NULL; + u32 ret_addr = 0; + struct in_addr in; + + /* Don't bother resolving raw IP addresses, naturally. */ + + if (inet_aton((char*)name, &in)) + return (u32)in.s_addr; + + while (d) { + if (!strcasecmp((char*)name, (char*)d->name)) return d->addr; + prev = d; + d = d->next; + } + + h = gethostbyname((char*)name); + + /* If lookup fails with a transient error, be nice - try again. */ + + if (!h && h_errno == TRY_AGAIN) h = gethostbyname((char*)name); + + if (h) { + + u32 i = 0; + + /* For each address associated with the host, see if we have any + other hosts that resolved to that same IP. If yes, return + that address; otherwise, just return first. This is for HTTP + performance and bookkeeping reasons. */ + + while (h->h_addr_list[i]) { + d = dns; + while (d) { + if (d->addr == *(u32*)h->h_addr_list[i]) { + ret_addr = d->addr; + goto dns_got_name; + } + d = d->next; + } + i++; + } + + ret_addr = *(u32*)h->h_addr_list[0]; + + } + +dns_got_name: + + if (!prev) d = dns = ck_alloc(sizeof(struct dns_entry)); + else d = prev->next = ck_alloc(sizeof(struct dns_entry)); + + d->name = ck_strdup(name); + d->addr = ret_addr; + + return ret_addr; + +} + + +/* Creates an ad hoc DNS cache entry, to override NS lookups. */ + +void fake_host(u8* name, u32 addr) { + struct dns_entry *d = dns, *prev = 0; + + while (d && d->next) { prev = d ; d = d->next;} + + if (!dns) d = dns = ck_alloc(sizeof(struct dns_entry)); + else d = prev->next = ck_alloc(sizeof(struct dns_entry)); + + d->name = ck_strdup(name); + d->addr = addr; + +} + + +/* Prepares a serialized HTTP buffer to be sent over the network. */ + +u8* build_request_data(struct http_request* req) { + + u8 *ret_buf, *ck_buf, *pay_buf, *path; + u32 ret_pos, ck_pos, pay_pos, i; + u8 req_type = PARAM_NONE; + + if (req->proto == PROTO_NONE) + FATAL("uninitialized http_request"); + + NEW_STR(ret_buf, ret_pos); + + path = serialize_path(req, 0, 0); + +#define ASD(_p3) ADD_STR_DATA(ret_buf, ret_pos, _p3) + + if (req->method) ASD(req->method); else ASD((u8*)"GET"); + ASD(" "); + ASD(path); + ASD(" HTTP/1.1\r\n"); + ck_free(path); + + ASD("Host: "); + ASD(req->host); + + if ((req->proto == PROTO_HTTP && req->port != 80) || + (req->proto == PROTO_HTTPS && req->port != 443)) { + char port[7]; + sprintf((char*)port, ":%u", req->port); + ASD(port); + } + + ASD("\r\n"); + + /* Insert generic browser headers first. */ + + if (browser_type == BROWSER_FAST) { + + ASD("Accept-Encoding: gzip\r\n"); + ASD("Connection: keep-alive\r\n"); + ASD("User-Agent: Mozilla/5.0 SF/" VERSION "\r\n"); + + /* Some servers will reject to gzip responses unless "Mozilla/..." + is seen in User-Agent. Bleh. */ + + } else if (browser_type == BROWSER_FFOX) { + + if (!GET_HDR((u8*)"User-Agent", &req->par)) + ASD("User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; " + "rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 SF/" VERSION "\r\n"); + + ASD("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;" + "q=0.8\r\n"); + + if (!GET_HDR((u8*)"Accept-Language", &req->par)) + ASD("Accept-Language: en-us,en\r\n"); + + ASD("Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n"); + ASD("Keep-Alive: 300\r\n"); + ASD("Connction: keep-alive\r\n"); + + } else /* MSIE */ { + + ASD("Accept: */*\r\n"); + + if (!GET_HDR((u8*)"Accept-Language", &req->par)) + ASD("Accept-Language: en,en-US;q=0.5\r\n"); + + if (!GET_HDR((u8*)"User-Agent", &req->par)) + ASD("User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; " + "Trident/4.0; .NET CLR 1.1.4322; InfoPath.1; .NET CLR " + "2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; SF/" + VERSION ")\r\n"); + + ASD("Accept-Encoding: gzip, deflate\r\n"); + ASD("Connection: Keep-Alive\r\n"); + + } + + + /* Request a limited range up front to minimize unwanted traffic. */ + + if (size_limit) { + u8 limit[32]; + sprintf((char*)limit, "Range: bytes=0-%u\r\n", size_limit - 1); + ASD(limit); + } + + /* Include a dummy "Referer" header, to avoid certain XSRF checks. */ + + if (!GET_HDR((u8*)"Referer", &req->par)) { + ASD("Referer: http"); + if (req->proto == PROTO_HTTPS) ASD("s"); + ASD("://"); + ASD(req->host); + ASD("/\r\n"); + } + + /* Take care of HTTP authentication next. */ + + if (auth_type == AUTH_BASIC) { + u8* lp = ck_alloc(strlen((char*)auth_user) + strlen((char*)auth_pass) + 2); + u8* lpb64; + + sprintf((char*)lp, "%s:%s", auth_user, auth_pass); + + lpb64 = b64_encode(lp, strlen((char*)lp)); + + ASD("Authorization: basic "); + ASD(lpb64); + ASD("\r\n"); + + ck_free(lpb64); + ck_free(lp); + + } + + /* Append any other requested headers and cookies. */ + + NEW_STR(ck_buf, ck_pos); + + for (i=0;ipar.c;i++) { + if (req->par.t[i] == PARAM_HEADER) { + ASD(req->par.n[i]); + ASD(": "); + ASD(req->par.v[i]); + ASD("\r\n"); + } else if (req->par.t[i] == PARAM_COOKIE) { + if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";"); + ADD_STR_DATA(ck_buf, ck_pos, req->par.n[i]); + ADD_STR_DATA(ck_buf, ck_pos, "="); + ADD_STR_DATA(ck_buf, ck_pos, req->par.v[i]); + } + } + + /* Also include extra globals, if any (but avoid dupes). */ + + for (i=0;ipar)) { + ASD(global_http_par.n[i]); + ASD(": "); + ASD(global_http_par.v[i]); + ASD("\r\n"); + } else if (global_http_par.t[i] == PARAM_COOKIE && + !GET_CK(global_http_par.n[i], &req->par)) { + if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";"); + ADD_STR_DATA(ck_buf, ck_pos, global_http_par.n[i]); + ADD_STR_DATA(ck_buf, ck_pos, "="); + ADD_STR_DATA(ck_buf, ck_pos, global_http_par.v[i]); + } + } + + if (ck_pos) { + ASD("Cookie: "); + ASD(ck_buf); + ASD("\r\n"); + } + + ck_free(ck_buf); + + /* Now, let's serialize the payload, if necessary. */ + + for (i=0;ipar.c;i++) { + switch (req->par.t[i]) { + case PARAM_POST_F: + case PARAM_POST_O: + req_type = req->par.t[i]; + break; + case PARAM_POST: + if (req_type == PARAM_NONE) req_type = PARAM_POST; + break; + } + } + + NEW_STR(pay_buf, pay_pos); + + if (req_type == PARAM_POST) { + + /* The default case: application/x-www-form-urlencoded. */ + + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_POST) { + if (pay_pos) ADD_STR_DATA(pay_buf, pay_pos, "&"); + if (req->par.n[i]) { + u32 len = strlen((char*)req->par.n[i]); + u8* str = url_encode_token(req->par.n[i], len); + ADD_STR_DATA(pay_buf, pay_pos, str); + ADD_STR_DATA(pay_buf, pay_pos, "="); + ck_free(str); + } + if (req->par.v[i]) { + u32 len = strlen((char*)req->par.v[i]); + u8* str = url_encode_token(req->par.v[i], len); + ADD_STR_DATA(pay_buf, pay_pos, str); + ck_free(str); + } + } + + ASD("Content-Type: application/x-www-form-urlencoded\r\n"); + + } else if (req_type == PARAM_POST_O) { + + /* Opaque, non-escaped data of some sort. */ + + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_POST_O && req->par.v[i]) + ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]); + + ASD("Content-Type: text/plain\r\n"); + + } else if (req_type == PARAM_POST_F) { + u8 bound[20]; + + /* MIME envelopes: multipart/form-data */ + + sprintf((char*)bound, "sf%u", R(1000000)); + + for (i=0;ipar.c;i++) + if (req->par.t[i] == PARAM_POST || req->par.t[i] == PARAM_POST_F) { + + ADD_STR_DATA(pay_buf, pay_pos, "--"); + ADD_STR_DATA(pay_buf, pay_pos, bound); + ADD_STR_DATA(pay_buf, pay_pos, "\r\n" + "Content-Disposition: form-data; name=\""); + if (req->par.n[i]) + ADD_STR_DATA(pay_buf, pay_pos, req->par.n[i]); + + if (req->par.t[i] == PARAM_POST_F) { + u8 tmp[64]; + sprintf((char*)tmp, "\"; filename=\"sfish%u." DUMMY_EXT "\"\r\n" + "Content-Type: " DUMMY_MIME "\r\n\r\n", R(16)); + ADD_STR_DATA(pay_buf, pay_pos, tmp); + ADD_STR_DATA(pay_buf, pay_pos, new_xss_tag((u8*)DUMMY_FILE)); + register_xss_tag(req); + } else { + ADD_STR_DATA(pay_buf, pay_pos, "\"\r\n\r\n"); + if (req->par.v[i]) + ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]); + } + + ADD_STR_DATA(pay_buf, pay_pos, "\r\n"); + } + + ADD_STR_DATA(pay_buf, pay_pos, "--"); + ADD_STR_DATA(pay_buf, pay_pos, bound); + ADD_STR_DATA(pay_buf, pay_pos, "--\r\n"); + + ASD("Content-Type: multipart/form-data; boundary="); + ASD(bound); + ASD("\r\n"); + + } else if (req_type == 0) ASD("\r\n"); + + /* Finalize HTTP payload... */ + + for (i=0;i start_ptr && strchr("\r\n", *(cur_ptr-1))) cur_ptr--; + + ret = ck_alloc(cur_ptr - start_ptr + 1); + memcpy(ret, start_ptr, cur_ptr - start_ptr); + ret[cur_ptr - start_ptr] = 0; + + return ret; + +} + + +/* Builds response fingerprint data. These fingerprints are used to + find "roughly comparable" pages based on their word length + distributions (divided into FP_SIZE buckets). */ + +void fprint_response(struct http_response* res) { + u32 i, c_len = 0, in_space = 0; + + res->sig.code = res->code; + + for (i=0;ipay_len;i++) + + if (res->payload[i] <= 0x20 || strchr("<>'\"", res->payload[i])) { + if (!in_space) { + in_space = 1; + if (c_len <= FP_MAX_LEN) + res->sig.data[c_len % FP_SIZE]++; + c_len = 0; + } else c_len++; + } else { + if (in_space) { + in_space = 0; + if (c_len <= FP_MAX_LEN) + res->sig.data[c_len % FP_SIZE]++; + c_len = 0; + } else c_len++; + } + + res->sig.data[c_len % FP_SIZE]++; + +} + + +/* Parses a network buffer containing raw HTTP response received over the + network ('more' == the socket is still available for reading). Returns 0 + if response parses OK, 1 if more data should be read from the socket, + 2 if the response seems invalid, 3 if response OK but connection must be + closed. */ + +u8 parse_response(struct http_request* req, struct http_response* res, + u8* data, u32 data_len, u8 more) { + u8* cur_line = 0; + s32 pay_len = -1; + u32 cur_data_off = 0, + total_chunk = 0, + http_ver; + u8 chunked = 0, compressed = 0, must_close = 0; + + if (res->code) + FATAL("struct http_response reused! Original code '%u'.", res->code); + +#define NEXT_LINE() do { \ + if (cur_line) ck_free(cur_line); \ + cur_line = grab_line(data, &cur_data_off, data_len); \ + } while (0) + + /* First, let's do a superficial request completeness check. Be + prepared for a premature end at any point. */ + + NEXT_LINE(); /* HTTP/1.x xxx ... */ + + if (!cur_line) return more ? 1 : 2; + + if (strlen((char*)cur_line) < 7 && more) { + ck_free(cur_line); + return 1; + } + + if (strncmp((char*)cur_line, "HTTP/1.", 7)) { + ck_free(cur_line); + return 2; + } + + /* Scan headers for Content-Length, Transfer-Encoding, etc. */ + + while (1) { + + NEXT_LINE(); /* Next header or empty line. */ + + /* If headers end prematurely, and more data might arrive, ask for + it; otherwise, just assume end of headers and continue. */ + + if (!cur_line) { + if (more) return 1; + res->warn |= WARN_PARTIAL; + break; + } + + /* Empty line indicates the beginning of a payload. */ + + if (!cur_line[0]) break; + + if (!strncasecmp((char*)cur_line, "Content-Length:", 15)) { + + /* The value in Content-Length header would be useful for seeing if we + have all the requested data already. Reject invalid values to avoid + integer overflows, etc, though. */ + + if (sscanf((char*)cur_line + 15, "%d", &pay_len) == 1) { + if (pay_len < 0 || pay_len > 1000000000 /* 1 GB */) { + ck_free(cur_line); + return 2; + } + } else pay_len = -1; + + } else if (!strncasecmp((char*)cur_line, "Transfer-Encoding:", 18)) { + + /* Transfer-Encoding: chunked must be accounted for to properly + determine if we received all the data when Content-Length not found. */ + + u8* x = cur_line + 18; + + while (isspace(*x)) x++; + if (!strcasecmp((char*)x, "chunked")) chunked = 1; + + } else if (!strncasecmp((char*)cur_line, "Content-Encoding:", 17)) { + + /* Content-Encoding is good to know, too. */ + + u8* x = cur_line + 17; + + while (isspace(*x)) x++; + + if (!strcasecmp((char*)x, "deflate") || !strcasecmp((char*)x, "gzip")) + compressed = 1; + + } else if (!strncasecmp((char*)cur_line, "Connection:", 11)) { + + u8* x = cur_line + 11; + + while (isspace(*x)) x++; + + if (!strcasecmp((char*)x, "close")) must_close = 1; + + + + } + } + + /* We are now at the beginning of the payload. Firstly, how about decoding + 'chunked' to see if we received a complete 0-byte terminator chunk + already? */ + + if (chunked) { + while (1) { + u32 chunk_len; + + NEXT_LINE(); /* Should be chunk size, hex. */ + + if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) { + if (more) { ck_free(cur_line); return 1; } + res->warn |= WARN_PARTIAL; + break; + } + + if (chunk_len > 1000000000 || total_chunk > 1000000000 /* 1 GB */) { + ck_free(cur_line); + return 2; + } + + /* See if we actually enough buffer to skip the chunk. Bail out if + not and more data might be coming; otherwise, adjust chunk size + accordingly. */ + + if (cur_data_off + chunk_len > data_len) { + + if (more) { ck_free(cur_line); return 1; } + chunk_len = data_len - cur_data_off; + total_chunk += chunk_len; + + res->warn |= WARN_PARTIAL; + break; + } + + total_chunk += chunk_len; + + cur_data_off += chunk_len; + NEXT_LINE(); + + /* No newline? */ + if (!cur_line) { + if (more) return 1; + res->warn |= WARN_PARTIAL; + } + + /* All right, so that was the last, complete 0-size chunk? + Exit the loop if so. */ + + if (!chunk_len) break; + + } + + if (cur_data_off != data_len) res->warn |= WARN_TRAIL; + + } else if (pay_len == -1 && more) { + + /* If in a mode other than 'chunked', and C-L not received, but more + data might be available - try to request it. */ + + ck_free(cur_line); + return 1; + + } else if (pay_len != 1) { + + if (cur_data_off + pay_len > data_len) { + + /* If C-L seen, but not nough data in the buffer, try to request more + if possible, otherwise tag the response as partial. */ + + if (more) { ck_free(cur_line); return 1; } + res->warn |= WARN_PARTIAL; + + } else if (cur_data_off + pay_len < data_len) res->warn |= WARN_TRAIL; + + } + + /* Rewind, then properly parse HTTP headers, parsing cookies. */ + + cur_data_off = 0; + + NEXT_LINE(); + + if (strlen((char*)cur_line) < 13 || + sscanf((char*)cur_line, "HTTP/1.%u %u ", &http_ver, &res->code) != 2 || + res->code < 100 || res->code > 999) { + ck_free(cur_line); + return 2; + } + + /* Some servers, when presented with 'Range' header, will return 200 on + some queries for a particular resource, and 206 on other queries (e.g., + with query string), despite returning exactly as much data. As an + ugly workaround... */ + + if (res->code == 206) res->code = 200; + + if (http_ver == 0) must_close = 1; + + res->msg = ck_strdup(cur_line + 13); + + while (1) { + u8* val; + + NEXT_LINE(); /* Next header or empty line. */ + + if (!cur_line) return 2; + if (!cur_line[0]) break; + + /* Split field name and value */ + + val = (u8*) strchr((char*)cur_line, ':'); + if (!val) { ck_free(cur_line); return 2; } + *val = 0; + while (isspace(*(++val))); + + if (!strcasecmp((char*)cur_line, "Set-Cookie") || + !strcasecmp((char*)cur_line, "Set-Cookie2")) { + + /* We could bother with a proper tokenizer here, but contrary to "teh + standards", browsers generally don't accept multiple cookies in + Set-Cookie headers, handle quoted-string encoding inconsistently, + etc. So let's just grab the first value naively and move on. */ + + u8* cval; + u8* orig_val; + + cval = (u8*) strchr((char*)val, ';'); + if (cval) *cval = 0; + cval = (u8*) strchr((char*)val, '='); + if (cval) { *cval = 0; cval++; } + + /* If proper value not found, use NULL name and put whatever was + found in the value field. */ + + if (!cval) { cval = val; val = 0; } + + SET_CK(val, cval, &res->hdr); + + if (val) { + + /* New or drastically changed cookies are noteworthy. */ + + orig_val = GET_CK(val, &global_http_par); + + if (!orig_val || (strlen((char*)orig_val) != strlen((char*)cval) && + strncmp((char*)cval, (char*)orig_val, 3))) { + res->cookies_set = 1; + problem(PROB_NEW_COOKIE, req, res, val, req->pivot, 0); + } + + /* Set cookie globally, but ignore obvious attempts to delete + existing ones. */ + + if (!ignore_cookies && cval[0]) + SET_CK(val, cval, &global_http_par); + + } + + } else SET_HDR(cur_line, val, &res->hdr); + + /* Content-Type is worth mining for MIME, charset data at this point. */ + + if (!strcasecmp((char*)cur_line, "Content-Type")) { + + if (res->header_mime) { + + /* Duplicate Content-Type. Fetch previous value, if different, + complain. */ + + u8* tmp = GET_HDR((u8*)"Content-Type", &res->hdr); + if (strcasecmp((char*)tmp, (char*)val)) res->warn |= WARN_CFL_HDR; + + } else { + u8 *tmp = (u8*)strchr((char*)val, ';'), *cset; + + if (tmp) { + *tmp = 0; + if ((cset = (u8*)strchr((char*)tmp + 1, '='))) + res->header_charset = ck_strdup(cset + 1); + } + + res->header_mime = ck_strdup(val); + if (tmp) *tmp = ';'; + } + + } + + } + + /* At the beginning of the payload again! */ + + if (!chunked) { + + /* Identity. Ignore actual C-L data, use just as much as we collected. */ + + res->pay_len = data_len - cur_data_off; + res->payload = ck_alloc(res->pay_len + 1); + res->payload[res->pay_len] = 0; /* NUL-terminate for safer parsing. */ + + memcpy(res->payload, data + cur_data_off, res->pay_len); + + } else { + + u32 chunk_off = 0; + + /* Chunked - we should have the authoritative length of chunk + contents in total_chunk already, and the overall structure + validated, so let's just reparse quickly. */ + + res->pay_len = total_chunk; + res->payload = ck_alloc(total_chunk + 1); + res->payload[res->pay_len] = 0; + + while (1) { + u32 chunk_len; + + NEXT_LINE(); + + if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) break; + + if (cur_data_off + chunk_len > data_len) + chunk_len = data_len - cur_data_off; + + memcpy(res->payload + chunk_off, data + cur_data_off, chunk_len); + + chunk_off += chunk_len; + cur_data_off += chunk_len; + + NEXT_LINE(); + + if (!chunk_len) break; + } + + } + + ck_free(cur_line); + + if (compressed) { + + u8* tmp_buf; + + /* Deflate or gzip - zlib can handle both the same way. We lazily allocate + a SIZE_LIMIT output buffer, then truncate it if necessary. */ + + z_stream d; + s32 err; + + tmp_buf = ck_alloc(SIZE_LIMIT + 1); + + d.zalloc = 0; + d.zfree = 0; + d.opaque = 0; + d.next_in = res->payload; + d.avail_in = res->pay_len; + d.next_out = tmp_buf; + d.avail_out = SIZE_LIMIT; + + /* Say hello to third-party vulnerabilities! */ + + if (inflateInit2(&d, 32 + 15) != Z_OK) { + inflateEnd(&d); + ck_free(tmp_buf); + return 2; + } + + err = inflate(&d, Z_FINISH); + inflateEnd(&d); + + if (err != Z_BUF_ERROR && err != Z_OK && err != Z_STREAM_END) { + ck_free(tmp_buf); + return 2; + } + + ck_free(res->payload); + + bytes_deflated += res->pay_len; + + res->pay_len = SIZE_LIMIT - d.avail_out; + res->payload = ck_realloc(tmp_buf, res->pay_len + 1); + res->payload[res->pay_len] = 0; + + + bytes_inflated += res->pay_len; + + } + +#undef NEXT_LINE + + fprint_response(res); + + return must_close ? 3 : 0; +} + + +/* Performs a deep free() of struct http_request */ + +void destroy_request(struct http_request* req) { + u32 i; + + for (i=0;ipar.c;i++) { + ck_free(req->par.n[i]); + ck_free(req->par.v[i]); + } + + ck_free(req->par.t); + ck_free(req->par.n); + ck_free(req->par.v); + + ck_free(req->method); + ck_free(req->host); + ck_free(req->orig_url); + ck_free(req); + +} + + +/* Performs a deep free() of struct http_response */ + +void destroy_response(struct http_response* res) { + u32 i; + + for (i=0;ihdr.c;i++) { + ck_free(res->hdr.n[i]); + ck_free(res->hdr.v[i]); + } + + ck_free(res->hdr.t); + ck_free(res->hdr.n); + ck_free(res->hdr.v); + + ck_free(res->meta_charset); + ck_free(res->header_charset); + ck_free(res->header_mime); + + ck_free(res->msg); + ck_free(res->payload); + ck_free(res); + +} + + +/* Performs a deep free(), unlinking of struct queue_entry, and the + underlying request / response pair. */ + +static void destroy_unlink_queue(struct queue_entry* q, u8 keep) { + if (!keep) { + if (q->req) destroy_request(q->req); + if (q->res) destroy_response(q->res); + } + if (!q->prev) queue = q->next; else q->prev->next = q->next; +#ifdef QUEUE_FILO + if (!q->next) q_tail = q->prev; +#endif /* QUEUE_FILO */ + if (q->next) q->next->prev = q->prev; + ck_free(q); + queue_cur--; +} + + +/* Performs a deep free(), unlinking, network shutdown for struct + conn_entry, as well as the underlying queue entry, request + and response structs. */ + +static void destroy_unlink_conn(struct conn_entry* c, u8 keep) { + if (c->q) destroy_unlink_queue(c->q, keep); + if (!c->prev) conn = c->next; else c->prev->next = c->next; + if (c->next) c->next->prev = c->prev; + if (c->srv_ssl) SSL_free(c->srv_ssl); + if (c->srv_ctx) SSL_CTX_free(c->srv_ctx); + ck_free(c->write_buf); + ck_free(c->read_buf); + close(c->fd); + ck_free(c); + conn_cur--; +} + + +/* Performs struct conn_entry for reuse following a clean shutdown. */ + +static void reuse_conn(struct conn_entry* c, u8 keep) { + if (c->q) destroy_unlink_queue(c->q, keep); + c->q = 0; + ck_free(c->read_buf); + ck_free(c->write_buf); + c->read_buf = c->write_buf = NULL; + c->read_len = c->write_len = c->write_off = 0; + c->SSL_rd_w_wr = c->SSL_wr_w_rd = 0; +} + + +/* Schedules a new asynchronous request (does not make a copy of the + original http_request struct, may deallocate it immediately or + later on); req->callback() will be invoked when the request is + completed (or fails - maybe right away). */ + +void async_request(struct http_request* req) { + struct queue_entry *qe; + struct http_response *res; + + if (req->proto == PROTO_NONE || !req->callback) + FATAL("uninitialized http_request"); + + res = ck_alloc(sizeof(struct http_response)); + + req->addr = maybe_lookup_host(req->host); + + /* Don't try to issue extra requests if max_fail + consecutive failures exceeded; but still try to + wrap up the (partial) scan. */ + + if (req_errors_cur > max_fail) { + DEBUG("!!! Too many subsequent request failures!\n"); + res->state = STATE_SUPPRESS; + if (!req->callback(req, res)) { + destroy_request(req); + destroy_response(res); + } + req_dropped++; + return; + } + + /* DNS errors mean instant fail. */ + + if (!req->addr) { + DEBUG("!!! DNS error!\n"); + res->state = STATE_DNSERR; + if (!req->callback(req, res)) { + destroy_request(req); + destroy_response(res); + } + req_errors_net++; + conn_count++; + conn_failed++; + return; + } + + /* Enforce user limits. */ + + if (req_count > max_requests) { + DEBUG("!!! Total request limit exceeded!\n"); + res->state = STATE_SUPPRESS; + if (!req->callback(req, res)) { + destroy_request(req); + destroy_response(res); + } + req_dropped++; + return; + } + + /* OK, looks like we're good to go. Insert the request + into the the queue. */ + +#ifdef QUEUE_FILO + + qe = q_tail; + q_tail = ck_alloc(sizeof(struct queue_entry)); + q_tail->req = req; + q_tail->res = res; + q_tail->prev = qe; + + if (q_tail->prev) q_tail->prev->next = q_tail; + + if (!queue) queue = q_tail; + +#else + + qe = queue; + + queue = ck_alloc(sizeof(struct queue_entry)); + queue->req = req; + queue->res = res; + queue->next = qe; + + if (queue->next) queue->next->prev = queue; + +#endif /* ^QUEUE_FILO */ + + queue_cur++; + req_count++; + +} + + +/* Check SSL properties, raise security alerts if necessary. We do not perform + a very thorough validation - we do not check for valid root CAs, bad ciphers, + SSLv2 support, etc - as these are covered well by network-level security + assessment tools anyway. + + We might eventually want to check aliases or support TLS SNI. */ + +static void check_ssl(struct conn_entry* c) { + X509 *p; + + p = SSL_get_peer_certificate(c->srv_ssl); + + if (p) { + u32 cur_time = time(0); + char *issuer, *host, *req_host; + + /* Check for certificate expiration... */ + + if (ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notBefore, cur_time) + != -1 || + ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notAfter, cur_time) + != 1) + problem(PROB_SSL_CERT_DATE, c->q->req, 0, 0, + host_pivot(c->q->req->pivot), 0); + + /* Check for self-signed certs or no issuer data. */ + + issuer = X509_NAME_oneline(p->cert_info->issuer,NULL,0); + + if (!issuer || !p->name || !strcmp(issuer, p->name)) + problem(PROB_SSL_SELF_CERT, c->q->req, 0, (u8*)issuer, + host_pivot(c->q->req->pivot), 0); + else + problem(PROB_SSL_CERT, c->q->req, 0, (u8*)issuer, + host_pivot(c->q->req->pivot), 0); + + free(issuer); + + /* Extract CN= from certificate name, compare to destination host. */ + + host = strrchr(p->name, '='); + req_host = (char*)c->q->req->host; + + if (host) { + host++; + if (host[0] == '*' && host[1] == '.') { + host++; + if (strlen(req_host) > strlen(host)) + req_host += strlen(req_host) - strlen(host); + } + } + + if (!host || strcasecmp(host, req_host)) + problem(PROB_SSL_BAD_HOST, c->q->req, 0, (u8*)host, + host_pivot(c->q->req->pivot), 0); + + X509_free(p); + + } else problem(PROB_SSL_NO_CERT, c->q->req, 0, 0, + host_pivot(c->q->req->pivot), 0); + + c->ssl_checked = 1; +} + + +/* Associates a queue entry with an existing connection (if 'use_c' is + non-NULL), or creates a new connection to host (if 'use_c' NULL). */ + +static void conn_associate(struct conn_entry* use_c, struct queue_entry* q) { + struct conn_entry* c; + + if (use_c) { + + c = use_c; + c->reused = 1; + + } else { + + struct sockaddr_in sin; + + /* OK, we need to create a new connection list entry and connect + it to a target host. */ + + c = ck_alloc(sizeof(struct conn_entry)); + + conn_count++; + + c->proto = q->req->proto; + c->addr = q->req->addr; + c->port = q->req->port; + + c->fd = socket(PF_INET, SOCK_STREAM, 0); + + if (c->fd < 0) { + +connect_error: + + if (c->fd >=0) close(c->fd); + q->res->state = STATE_LOCALERR; + destroy_unlink_queue(q, q->req->callback(q->req, q->res)); + req_errors_net++; + req_errors_cur++; + + ck_free(c); + conn_failed++; + return; + } + + sin.sin_family = PF_INET; + sin.sin_port = htons(c->port); + + memcpy(&sin.sin_addr, &q->req->addr, 4); + + fcntl(c->fd, F_SETFL, O_NONBLOCK); + + if (connect(c->fd, (struct sockaddr*) &sin, sizeof(struct sockaddr_in)) && + (errno != EINPROGRESS)) goto connect_error; + + /* HTTPS also requires SSL state to be initialized at this point. */ + + if (c->proto == PROTO_HTTPS) { + + c->srv_ctx = SSL_CTX_new(SSLv23_client_method()); + + if (!c->srv_ctx) goto connect_error; + + SSL_CTX_set_mode(c->srv_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE | + SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); + + c->srv_ssl = SSL_new(c->srv_ctx); + + if (!c->srv_ssl) { + SSL_CTX_free(c->srv_ctx); + goto connect_error; + } + + SSL_set_fd(c->srv_ssl, c->fd); + SSL_set_connect_state(c->srv_ssl); + + } + + /* Make it official. */ + + c->next = conn; + conn = c; + if (c->next) c->next->prev = c; + + conn_cur++; + + } + + c->q = q; + q->c = c; + + q->res->state = STATE_CONNECT; + c->req_start = c->last_rw = time(0); + c->write_buf = build_request_data(q->req); + c->write_len = strlen((char*)c->write_buf); + +} + + +/* Processes the queue. Returns the number of queue entries remaining, + 0 if none. Will do a blocking select() to wait for socket state changes + (or timeouts) if no data available to process. This is the main + routine for the scanning loop. */ + +u32 next_from_queue(void) { + + u32 cur_time = time(0); + + if (conn_cur) { + static struct pollfd* p; + struct conn_entry* c = conn; + u32 i = 0; + + /* First, go through all connections, handle connects, SSL handshakes, data + reads and writes, and exceptions. */ + + if (p) free(p); + p = __DFL_ck_alloc(sizeof(struct pollfd) * conn_cur); + + while (c) { + p[i].fd = c->fd; + p[i].events = POLLIN | POLLERR | POLLHUP; + if (c->write_len - c->write_off || c->SSL_rd_w_wr) + p[i].events |= POLLOUT; + c = c->next; + i++; + } + + poll(p, conn_cur, 100); + + c = conn; + + for (i=0;inext; + + /* Connection closed: see if we have any pending data to write. If yes, + fail. If not, try parse_response() to see if we have all the data. + Clean up. */ + + if (p[i].revents & (POLLERR|POLLHUP)) { + + u8 keep; + +network_error: + + keep = 0; + + /* Retry requests that were sent on old keep-alive connections + and failed instantly with no data read; might be just that + the server got bored. */ + + if (c->q && c->reused && !c->read_len) { + + c->q->res->state = STATE_NOTINIT; + c->q->c = 0; + c->q = 0; + + req_retried++; + + } else if (c->q) { + + if (c->write_len - c->write_off || !c->read_len) { + c->q->res->state = STATE_CONNERR; + keep = c->q->req->callback(c->q->req, c->q->res); + req_errors_net++; + req_errors_cur++; + } else { + if (parse_response(c->q->req, c->q->res, c->read_buf, + c->read_len, 0) != 2) { + c->q->res->state = STATE_OK; + keep = c->q->req->callback(c->q->req, c->q->res); + if (req_errors_cur <= max_fail) + req_errors_cur = 0; + } else { + c->q->res->state = STATE_CONNERR; + keep = c->q->req->callback(c->q->req, c->q->res); + req_errors_net++; + req_errors_cur++; + } + } + + } + + destroy_unlink_conn(c, keep); + + } else + + /* Incoming data (when SSL_write() did not request a read) or + continuation of SSL_read() possible (if SSL_read() wanted to write). + Process data, call parse_response() to see if w have all we wanted. + Update event timers. */ + + if (((p[i].revents & POLLIN) && !c->SSL_wr_w_rd) || + ((p[i].revents & POLLOUT) && c->SSL_rd_w_wr)) { + + if (c->q) { + s32 read_res; + u8 p_ret; + + c->read_buf = ck_realloc(c->read_buf, c->read_len + READ_CHUNK + 1); + + if (c->proto == PROTO_HTTPS) { + s32 ssl_err; + + c->SSL_rd_w_wr = 0; + + read_res = SSL_read(c->srv_ssl, c->read_buf + c->read_len, + READ_CHUNK); + + if (!read_res) goto network_error; + + if (read_res < 0) { + ssl_err = SSL_get_error(c->srv_ssl, read_res); + if (ssl_err == SSL_ERROR_WANT_WRITE) c->SSL_rd_w_wr = 1; + else if (ssl_err != SSL_ERROR_WANT_READ) goto network_error; + read_res = 0; + } + + } else { + read_res = read(c->fd, c->read_buf + c->read_len, READ_CHUNK); + if (read_res <= 0) goto network_error; + } + + bytes_recv += read_res; + + c->read_len += read_res; + c->read_buf = ck_realloc(c->read_buf, c->read_len + 1); + + c->read_buf[c->read_len] = 0; /* NUL-terminate for sanity. */ + + /* We force final parse_response() if response length exceeded + size_limit by more than 4 kB. The assumption here is that + it is less expensive to redo the connection than it is + to continue receiving an unknown amount of extra data. */ + + p_ret = parse_response(c->q->req, c->q->res, c->read_buf, c->read_len, + (c->read_len > (size_limit + READ_CHUNK)) ? 0 : 1); + + if (!p_ret || p_ret == 3) { + + u8 keep; + + c->q->res->state = STATE_OK; + keep = c->q->req->callback(c->q->req, c->q->res); + + /* If we got all data without hitting the limit, and if + "Connection: close" is not indicated, we might want + to keep the connection for future use. */ + + if (c->read_len > (size_limit + READ_CHUNK) || p_ret) + destroy_unlink_conn(c, keep); else reuse_conn(c, keep); + + if (req_errors_cur <= max_fail) + req_errors_cur = 0; + + } else if (p_ret == 2) { + c->q->res->state = STATE_RESPERR; + destroy_unlink_conn(c, c->q->req->callback(c->q->req, c->q->res)); + req_errors_http++; + req_errors_cur++; + } else { + c->last_rw = cur_time; + c->q->res->state = STATE_RECEIVE; + } + + } else destroy_unlink_conn(c, 0); /* Unsolicited response! */ + + } else + + /* Write possible (if SSL_read() did not request a write), or + continuation of SSL_write() possible (if SSL_write() wanted to + read). Send data, update timers, etc. */ + + if (((p[i].revents & POLLOUT) && !c->SSL_rd_w_wr) || + ((p[i].revents & POLLIN) && c->SSL_wr_w_rd)) { + + if (c->write_len - c->write_off) { + s32 write_res; + + if (c->proto == PROTO_HTTPS) { + s32 ssl_err; + + c->SSL_wr_w_rd = 0; + + write_res = SSL_write(c->srv_ssl, c->write_buf + c->write_off, + c->write_len - c->write_off); + + if (!write_res) goto network_error; + + if (write_res < 0) { + ssl_err = SSL_get_error(c->srv_ssl, write_res); + if (ssl_err == SSL_ERROR_WANT_READ) c->SSL_wr_w_rd = 1; + else if (ssl_err != SSL_ERROR_WANT_WRITE) goto network_error; + write_res = 0; + } else if (!c->ssl_checked) check_ssl(c); + + } else { + write_res = write(c->fd, c->write_buf + c->write_off, + c->write_len - c->write_off); + if (write_res <= 0) goto network_error; + } + + bytes_sent += write_res; + + c->write_off += write_res; + + c->q->res->state = STATE_SEND; + + c->last_rw = cur_time; + + } + + } else + + /* Nothing happened. Check timeouts, kill stale connections. + Active (c->q) connections get checked for total and last I/O + timeouts. Non-active connctions must just not exceed + idle_tmout. */ + + if (!p[i].revents) { + + u8 keep = 0; + + if ((c->q && (cur_time - c->last_rw > rw_tmout || + cur_time - c->req_start > resp_tmout)) || + (!c->q && (cur_time - c->last_rw > idle_tmout)) || + (!c->q && tear_down_idle)) { + + if (c->q) { + c->q->res->state = STATE_CONNERR; + keep = c->q->req->callback(c->q->req, c->q->res); + req_errors_net++; + req_errors_cur++; + conn_busy_tmout++; + } else { + conn_idle_tmout++; + tear_down_idle = 0; + } + + destroy_unlink_conn(c, keep); + + } + + } + + c = next; + + } + + } + + /* OK, connection-handling affairs taken care of! Next, let's go through all + queue entries NOT currently associated with a connection, and try to + pair them up with something. */ + + if (queue_cur) { + struct queue_entry *q = queue; + + while (q) { + struct queue_entry* next = q->next; + u32 to_host = 0; + + if (!q->c) { + + struct conn_entry* c = conn; + + /* Let's try to find a matching, idle connection first. */ + + while (c) { + struct conn_entry* cnext = c->next; + + if (c->addr == q->req->addr && (++to_host) && + c->port == q->req->port && + c->proto == q->req->proto && !c->q) { + conn_associate(c, q); + goto next_q_entry; + } + + c = cnext; + } + + /* No match. If we are out of slots, request some other idle + connection to be nuked soon. */ + + if (to_host < max_conn_host && conn_cur < max_connections) { + conn_associate(0, q); + goto next_q_entry; + } else tear_down_idle = 1; + + } + +next_q_entry: + + q = next; + + } + + } + + return queue_cur; +} + + +/* Helper function for request / response dumpers: */ +static void dump_params(struct param_array* par) { + u32 i; + + for (i=0;ic;i++) { + + switch (par->t[i]) { + case PARAM_NONE: SAY(" <<<<"); break; + case PARAM_PATH: SAY(" PATH"); break; + case PARAM_PATH_S: SAY(" PT_S"); break; + case PARAM_PATH_C: SAY(" PT_C"); break; + case PARAM_PATH_E: SAY(" PT_E"); break; + case PARAM_PATH_D: SAY(" PT_D"); break; + case PARAM_QUERY: SAY(" QUER"); break; + case PARAM_QUERY_S: SAY(" QR_S"); break; + case PARAM_QUERY_C: SAY(" QR_C"); break; + case PARAM_QUERY_E: SAY(" QR_E"); break; + case PARAM_QUERY_D: SAY(" QR_D"); break; + case PARAM_POST: SAY(" POST"); break; + case PARAM_POST_F: SAY(" FILE"); break; + case PARAM_POST_O: SAY(" OPAQ"); break; + case PARAM_HEADER: SAY(" head"); break; + case PARAM_COOKIE: SAY(" cook"); break; + default: SAY(" ????"); + } + + SAY(":%-20s = '%s'\n", + par->n[i] ? par->n[i] : (u8*)"-", + par->v[i] ? par->v[i] : (u8*)"-"); + + } +} + + +/* Creates a working copy of a request. If all is 0, does not copy + path, query parameters, or POST data (but still copies headers). */ + +struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv, + u8 all) { + struct http_request* ret; + u32 i; + + if (!req) return NULL; + + ret = ck_alloc(sizeof(struct http_request)); + + ret->proto = req->proto; + + if (all) + ret->method = ck_strdup(req->method); + else + ret->method = ck_strdup((u8*)"GET"); + + ret->host = ck_strdup(req->host); + ret->addr = req->addr; + ret->port = req->port; + ret->pivot = pv; + ret->user_val = req->user_val; + + /* Copy all the requested data. */ + + for (i=0;ipar.c;i++) + if (all || HEADER_SUBTYPE(req->par.t[i])) + set_value(req->par.t[i], req->par.n[i], req->par.v[i], -1, + &ret->par); + + memcpy(&ret->same_sig, &req->same_sig, sizeof(struct http_sig)); + + return ret; + +} + + +/* Creates a copy of a response. */ + +struct http_response* res_copy(struct http_response* res) { + struct http_response* ret; + u32 i; + + if (!res) return NULL; + + ret = ck_alloc(sizeof(struct http_response)); + + ret->state = res->state; + ret->code = res->code; + ret->msg = res->msg ? ck_strdup(res->msg) : NULL; + ret->warn = res->warn; + + for (i=0;ihdr.c;i++) + set_value(res->hdr.t[i], res->hdr.n[i], res->hdr.v[i], -1, &ret->hdr); + + ret->pay_len = res->pay_len; + + if (res->pay_len) { + ret->payload = ck_alloc(res->pay_len); + memcpy(ret->payload, res->payload, res->pay_len); + } + + memcpy(&ret->sig, &res->sig, sizeof(struct http_sig)); + + ret->sniff_mime_id = res->sniff_mime_id; + ret->decl_mime_id = res->decl_mime_id; + ret->doc_type = res->doc_type; + ret->css_type = res->css_type; + ret->js_type = res->js_type; + ret->json_safe = res->json_safe; + ret->stuff_checked = res->stuff_checked; + ret->scraped = res->scraped; + + if (res->meta_charset) + ret->meta_charset = ck_strdup(res->meta_charset); + + if (res->header_charset) + ret->header_charset = ck_strdup(res->header_charset); + + if (res->header_mime) + ret->header_mime = ck_strdup(res->header_mime); + + ret->sniffed_mime = res->sniffed_mime; + + return ret; + +} + + +/* Dumps HTTP request data, for diagnostic purposes: */ + +void dump_http_request(struct http_request* r) { + + u8 *new_url, *tmp; + + SAY("\n== HTTP REQUEST %p ==\n\nBasic values:\n", r); + + SAY(" Proto = %u\n", r->proto); + SAY(" Method = %s\n", r->method ? r->method : (u8*)"(GET)"); + SAY(" Host = %s\n", r->host); + SAY(" Addr = %u.%u.%u.%u\n", ((u8*)&r->addr)[0], ((u8*)&r->addr)[1], + ((u8*)&r->addr)[2], ((u8*)&r->addr)[3]); + SAY(" Port = %d\n", r->port); + SAY(" Xrefs = pivot %p, handler %p, user %d\n", r->pivot, + r->callback, r->user_val); + + new_url = serialize_path(r, 1, 0); + + SAY("\nURLs:\n Original = %s\n" + " Synthetic = %s\n", r->orig_url ? r->orig_url : (u8*)"[none]", + new_url); + + ck_free(new_url); + + SAY("\nParameter array:\n"); + + dump_params(&r->par); + + SAY("\nRaw request data:\n\n"); + + tmp = build_request_data(r); + SAY("%s\n",tmp); + ck_free(tmp); + + SAY("\n== END OF REQUEST ==\n"); + +} + + +/* Dumps HTTP response data, likewise: */ + +void dump_http_response(struct http_response* r) { + + SAY("\n== HTTP RESPONSE %p ==\n\nBasic values:\n", r); + + SAY(" State = %u\n", r->state); + SAY(" Response = %u ('%s')\n", r->code, r->msg); + SAY(" Flags = %08x\n", r->warn); + SAY(" Data len = %u\n", r->pay_len); + + SAY("\nParameter array:\n"); + + dump_params(&r->hdr); + + if (r->payload) SAY("\nPayload data (%u):\n\n%s\n", r->pay_len, r->payload); + + SAY("\n== END OF RESPONSE ==\n"); + +} + +/* Destroys http state information, for memory profiling. */ + +void destroy_http() { + u32 i; + struct dns_entry* cur; + + for (i=0;inext; + ck_free(cur->name); + ck_free(cur); + cur = next; + } + +} + + +/* Shows some pretty statistics. */ + +void http_stats(u64 st_time) { + u64 en_time; + struct timeval tv; + + gettimeofday(&tv, NULL); + en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000; + + SAY("Scan statistics\n" + "---------------\n\n" + cGRA " Scan time : " cNOR "%u:%02u:%02u.%04u\n" + cGRA " HTTP requests : " cNOR "%u sent (%.02f/s), %.02f kB in, " + "%.02f kB out (%.02f kB/s) \n" + cGRA " Compression : " cNOR "%.02f kB in, %.02f kB out " + "(%.02f%% gain) \n" + cGRA " HTTP exceptions : " cNOR "%u net errors, %u proto errors, " + "%u retried, %u drops\n" + cGRA " TCP connections : " cNOR "%u total (%.02f req/conn) \n" + cGRA " TCP exceptions : " cNOR "%u failures, %u timeouts, %u purged\n" + cGRA " External links : " cNOR "%u skipped\n" + cGRA " Reqs pending : " cNOR "%u \n", + + /* hrs */ (u32)((en_time - st_time) / 1000 / 60 / 60), + /* min */ (u32)((en_time - st_time) / 1000 / 60) % 60, + /* sec */ (u32)((en_time - st_time) / 1000) % 60, + /* ms */ (u32)((en_time - st_time) % 1000), + + req_count - queue_cur, + (float) (req_count - queue_cur / 1.15) * 1000 / (en_time - st_time + 1), + (float) bytes_recv / 1024, (float) bytes_sent / 1024, + (float) (bytes_recv + bytes_sent) / 1.024 / (en_time - st_time + 1), + + (float) bytes_deflated / 1024, (float) bytes_inflated / 1024, + ((float) bytes_inflated - bytes_deflated) / (bytes_inflated + + bytes_deflated + 1) * 100, + + req_errors_net, req_errors_http, req_retried, req_dropped, + + conn_count, (float) req_count / conn_count, + conn_failed, conn_busy_tmout, conn_idle_tmout, + url_scope, queue_cur); +} diff --git a/http_client.h b/http_client.h new file mode 100644 index 0000000..e2fb405 --- /dev/null +++ b/http_client.h @@ -0,0 +1,418 @@ +/* + skipfish - high-performance, single-process asynchronous HTTP client + -------------------------------------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_HTTP_CLIENT_H +#define _HAVE_HTTP_CLIENT_H + +#include + +#include "config.h" +#include "types.h" +#include "alloc-inl.h" +#include "string-inl.h" + +/* Generic type-name-value array, used for HTTP headers, etc: */ + +struct param_array { + u8* t; /* Type */ + u8** n; /* Name */ + u8** v; /* Value */ + u32 c; /* Count */ +}; + +/* Flags for http_request protocol: */ + +#define PROTO_NONE 0 /* Illegal value */ +#define PROTO_HTTP 1 /* Plain-text HTTP */ +#define PROTO_HTTPS 2 /* TLS/SSL wrapper */ + +/* Flags for http_request parameter list entries: */ + +#define PARAM_NONE 0 /* Empty parameter slot */ + +#define PARAM_PATH 10 /* Path or parametrized path */ +#define PARAM_PATH_S 11 /* - Semicolon element */ +#define PARAM_PATH_C 12 /* - Comma element */ +#define PARAM_PATH_E 13 /* - Exclamation mark element */ +#define PARAM_PATH_D 14 /* - Dollar sign element */ + +#define PATH_SUBTYPE(_x) ((_x) >= PARAM_PATH && (_x) < PARAM_QUERY) + +#define PARAM_QUERY 20 /* Query parameter */ +#define PARAM_QUERY_S 21 /* - Semicolon element */ +#define PARAM_QUERY_C 22 /* - Comma element */ +#define PARAM_QUERY_E 23 /* - Exclamation mark element */ +#define PARAM_QUERY_D 24 /* - Dollar sign element */ + +#define QUERY_SUBTYPE(_x) ((_x) >= PARAM_QUERY && (_x) < PARAM_POST) + +#define PARAM_POST 50 /* Post parameter */ +#define PARAM_POST_F 51 /* - File field */ +#define PARAM_POST_O 52 /* - Non-standard (e.g., JSON) */ + +#define POST_SUBTYPE(_x) ((_x) >= PARAM_POST && (_x) < PARAM_HEADER) + +#define PARAM_HEADER 100 /* Generic HTTP header */ +#define PARAM_COOKIE 101 /* - HTTP cookie */ + +#define HEADER_SUBTYPE(_x) ((_x) >= PARAM_HEADER) + +struct http_response; +struct queue_entry; + +/* HTTP response signature. */ + +struct http_sig { + u32 code; /* HTTP response code */ + u32 data[FP_SIZE]; /* Response fingerprint data */ +}; + +/* HTTP request descriptor: */ + +struct http_request { + + u8 proto; /* Protocol (PROTO_*) */ + u8* method; /* HTTP method (GET, POST, ...) */ + u8* host; /* Host name */ + u32 addr; /* Resolved IP address */ + u16 port; /* Port number to connect to */ + + u8* orig_url; /* Copy of the original URL */ + + struct param_array par; /* Parameters, headers, cookies */ + + struct pivot_desc *pivot; /* Pivot descriptor */ + + u32 user_val; /* Can be used freely */ + + u8 (*callback)(struct http_request*, struct http_response*); + /* Callback to invoke when done */ + + struct http_sig same_sig; /* Used by secondary ext fuzz. */ + +}; + +/* Flags for http_response completion state: */ + +#define STATE_NOTINIT 0 /* Request not sent */ +#define STATE_CONNECT 1 /* Connecting... */ +#define STATE_SEND 2 /* Sending request */ +#define STATE_RECEIVE 3 /* Waiting for response */ + +#define STATE_OK 100 /* Proper fetch */ +#define STATE_DNSERR 101 /* DNS error */ +#define STATE_LOCALERR 102 /* Socket or routing error */ +#define STATE_CONNERR 103 /* Connection failed */ +#define STATE_RESPERR 104 /* Response not valid */ +#define STATE_SUPPRESS 200 /* Dropped (limits / errors) */ + +/* Flags for http_response warnings: */ + +#define WARN_NONE 0 /* No warnings */ +#define WARN_PARTIAL 1 /* Incomplete read */ +#define WARN_TRAIL 2 /* Trailing request garbage */ +#define WARN_CFL_HDR 4 /* Conflicting headers */ + +/* HTTP response descriptor: */ + +struct http_response { + + u32 state; /* HTTP convo state (STATE_*) */ + u32 code; /* HTTP response code */ + u8* msg; /* HTTP response message */ + u32 warn; /* Warning flags */ + + u8 cookies_set; /* Sets cookies? */ + + struct param_array hdr; /* Server header, cookie list */ + + u32 pay_len; /* Response payload length */ + u8* payload; /* Response payload data */ + + struct http_sig sig; /* Response signature data */ + + /* Various information populated by content checks: */ + + u8 sniff_mime_id; /* Sniffed MIME (MIME_*) */ + u8 decl_mime_id; /* Declared MIME (MIME_*) */ + + u8* meta_charset; /* META tag charset value */ + u8* header_charset; /* Content-Type charset value */ + u8* header_mime; /* Content-Type MIME type */ + u8* sniffed_mime; /* Detected MIME type (ref) */ + + /* Everything below is of interest to scrape_response() only: */ + + u8 doc_type; /* 0 - tbd, 1 - bin, 2 - ascii */ + u8 css_type; /* 0 - tbd, 1 - other, 2 - css */ + u8 js_type; /* 0 - tbd, 1 - other, 2 - js */ + u8 json_safe; /* 0 - no, 1 - yes */ + u8 stuff_checked; /* check_stuff() called? */ + u8 scraped; /* scrape_response() called? */ + +}; + +/* Open keep-alive connection descriptor: */ + +struct conn_entry { + + s32 fd; /* The actual file descriptor */ + + u8 proto; /* Protocol (PROTO_*) */ + u32 addr; /* Destination IP */ + u32 port; /* Destination port */ + + u8 reused; /* Used for earier requests? */ + + u32 req_start; /* Unix time: request start */ + u32 last_rw; /* Unix time: last read / write */ + + SSL_CTX *srv_ctx; /* SSL context */ + SSL *srv_ssl; + u8 SSL_rd_w_wr; /* SSL_read() wants to write? */ + u8 SSL_wr_w_rd; /* SSL_write() wants to read? */ + u8 ssl_checked; /* SSL state checked? */ + + u8* read_buf; /* Current read buffer */ + u32 read_len; + u8* write_buf; /* Pending write buffer */ + u32 write_off; /* Current write offset */ + u32 write_len; + + struct queue_entry* q; /* Current queue entry */ + + struct conn_entry* prev; /* Previous connection entry */ + struct conn_entry* next; /* Next connection entry */ + +}; + +/* Request queue descriptor: */ + +struct queue_entry { + struct http_request* req; /* Request descriptor */ + struct http_response* res; /* Response descriptor */ + struct conn_entry* c; /* Connection currently used */ + struct queue_entry* prev; /* Previous queue entry */ + struct queue_entry* next; /* Next queue entry */ +}; + +/* DNS cache item: */ + +struct dns_entry { + u8* name; /* Name requested */ + u32 addr; /* IP address (0 = bad host) */ + struct dns_entry* next; /* Next cache entry */ +}; + + +/* Simplified macros to manipulate param_arrays: */ + +#define ADD(_ar,_t,_n,_v) do { \ + u32 _cur = (_ar)->c++; \ + (_ar)->t = ck_realloc((_ar)->t, (_ar)->c); \ + (_ar)->n = ck_realloc((_ar)->n, (_ar)->c * sizeof(u8*)); \ + (_ar)->v = ck_realloc((_ar)->v, (_ar)->c * sizeof(u8*)); \ + (_ar)->t[cur] = _t; \ + (_ar)->n[cur] = (_n) ? ck_strdup(_n) : 0; \ + (_ar)->v[cur] = (_v) ? ck_strdup(_v) : 0; \ + } while (0) + +#define FREE(_ar) do { \ + while ((_ar)->c--) { \ + free((_ar)->n[(_ar)->c]); \ + free((_ar)->v[(_ar)->c]); \ + } \ + free((_ar)->t); \ + free((_ar)->n); \ + free((_ar)->v); \ + } while (0) + + +/* Extracts parameter value from param_array. Name is matched if + non-NULL. Returns pointer to value data, not a duplicate string; + NULL if no match found. */ + +u8* get_value(u8 type, u8* name, u32 offset, struct param_array* par); + +/* Inserts or overwrites parameter value in param_array. If offset + == -1, will append parameter to list. Duplicates strings, + name and val can be NULL. */ + +void set_value(u8 type, u8* name, u8* val, s32 offset, struct param_array* par); + +/* Simplified macros for value table access: */ + +#define GET_HDR(_name, _p) get_value(PARAM_HEADER, _name, 0, _p) +#define SET_HDR(_name, _val, _p) set_value(PARAM_HEADER, _name, _val, -1, _p) +#define GET_CK(_name, _p) get_value(PARAM_COOKIE, _name, 0, _p) +#define SET_CK(_name, _val, _p) set_value(PARAM_COOKIE, _name, _val, 0, _p) + +void tokenize_path(u8* str, struct http_request* req, u8 add_slash); + +/* Convert a fully-qualified or relative URL string to a proper http_request + representation. Returns 0 on success, 1 on format error. */ + +u8 parse_url(u8* url, struct http_request* req, struct http_request* ref); + +/* URL-decodes a string. 'Plus' parameter governs the behavior on + + signs (as they have a special meaning only in query params, not in path). */ + +u8* url_decode_token(u8* str, u32 len, u8 plus); + +/* URL-encodes a string according to custom rules. The assumption here is that + the data is already tokenized as "special" boundaries such as ?, =, &, /, + ;, so these characters must always be escaped if present in tokens. We + otherwise let pretty much everything else go through, as it may help with + the exploitation of certain vulnerabilities. */ + +u8* url_encode_token(u8* str, u32 len); + +/* Reconstructs URI from http_request data. Includes protocol and host + if with_host is non-zero. */ + +u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post); + +/* Looks up IP for a particular host, returns data in network order. + Uses standard resolver, so it is slow and blocking, but we only + expect to call it a couple of times. */ + +u32 maybe_lookup_host(u8* name); + +/* Creates an ad hoc DNS cache entry, to override NS lookups. */ + +void fake_host(u8* name, u32 addr); + +/* Schedules a new asynchronous request; req->callback() will be invoked when + the request is completed. */ + +void async_request(struct http_request* req); + +/* Prepares a serialized HTTP buffer to be sent over the network. */ + +u8* build_request_data(struct http_request* req); + +/* Parses a network buffer containing raw HTTP response received over the + network ('more' == the socket is still available for reading). Returns 0 + if response parses OK, 1 if more data should be read from the socket, + 2 if the response seems invalid. */ + +u8 parse_response(struct http_request* req, struct http_response* res, u8* data, + u32 data_len, u8 more); + +/* Processes the queue. Returns the number of queue entries remaining, + 0 if none. Will do a blocking select() to wait for socket state changes + (or timeouts) if no data available to process. This is the main + routine for the scanning loop. */ + +u32 next_from_queue(void); + +/* Dumps HTTP request stats, for debugging purposes: */ + +void dump_http_request(struct http_request* r); + +/* Dumps HTTP response stats, for debugging purposes: */ + +void dump_http_response(struct http_response* r); + +/* Fingerprints a response: */ + +void fprint_response(struct http_response* res); + +/* Performs a deep free() of sturct http_request */ + +void destroy_request(struct http_request* req); + +/* Performs a deep free() of sturct http_response */ + +void destroy_response(struct http_response* res); + +/* Creates a working copy of a request. If all is 0, does not copy + path, query parameters, or POST data (but still copies headers). */ + +struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv, + u8 all); + +/* Creates a copy of a response. */ + +struct http_response* res_copy(struct http_response* res); + +/* Various settings and counters exported to other modules: */ + +extern u32 max_connections, + max_conn_host, + max_requests, + max_fail, + idle_tmout, + resp_tmout, + rw_tmout, + size_limit, + req_errors_net, + req_errors_http, + req_errors_cur, + req_count, + req_dropped, + req_retried, + url_scope, + conn_count, + conn_idle_tmout, + conn_busy_tmout, + conn_failed, + queue_cur; + +extern u64 bytes_sent, + bytes_recv, + bytes_deflated, + bytes_inflated; + +extern u8 ignore_cookies; + +/* Flags for browser type: */ + +#define BROWSER_FAST 0 /* Minimimal HTTP headers */ +#define BROWSER_MSIE 1 /* Try to mimic MSIE */ +#define BROWSER_FFOX 2 /* Try to mimic Firefox */ + +extern u8 browser_type; + +/* Flags for authentication type: */ + +#define AUTH_NONE 0 /* No authentication */ +#define AUTH_BASIC 1 /* 'Basic' HTTP auth */ + +extern u8 auth_type; + +extern u8 *auth_user, + *auth_pass; + +/* Global HTTP cookies, extra headers: */ + +extern struct param_array global_http_par; + +/* Destroys http state information, for memory profiling. */ + +void destroy_http(); + +/* Shows some pretty statistics. */ + +void http_stats(u64 st_time); + +#endif /* !_HAVE_HTTP_CLIENT_H */ diff --git a/report.c b/report.c new file mode 100644 index 0000000..3698706 --- /dev/null +++ b/report.c @@ -0,0 +1,779 @@ +/* + skipfish - post-processing and reporting + ---------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "config.h" +#include "types.h" +#include "http_client.h" +#include "database.h" +#include "crawler.h" +#include "analysis.h" + +/* Pivot and issue signature data. */ + +struct p_sig_desc { + u8 type; /* Pivot type */ + struct http_sig* res_sig; /* Response signature */ + u32 issue_sig; /* Issues fingerprint */ + u32 child_sig; /* Children fingerprint */ +}; + + +static struct p_sig_desc* p_sig; +static u32 p_sig_cnt; +u8 suppress_dupes; + + +/* Response, issue sample data. */ + +struct mime_sample_desc { + u8* det_mime; + struct http_request** req; + struct http_response** res; + u32 sample_cnt; +}; + + +struct issue_sample_desc { + u32 type; + struct issue_desc** i; + u32 sample_cnt; +}; + +static struct mime_sample_desc* m_samp; +static struct issue_sample_desc* i_samp; +static u32 m_samp_cnt, i_samp_cnt; + + +/* qsort() helper for sort_annotate_pivot(). */ + +static int pivot_compar(const void* par1, const void* par2) { + const struct pivot_desc *p1 = *(struct pivot_desc**)par1, + *p2 = *(struct pivot_desc**)par2; + return strcasecmp((char*)p1->name, (char*)p2->name); +} + +static int issue_compar(const void* par1, const void* par2) { + const struct issue_desc *i1 = par1, *i2 = par2; + return i2->type - i1->type; +} + + +/* Recursively annotates and sorts pivots. */ + +static void sort_annotate_pivot(struct pivot_desc* pv) { + u32 i, path_child = 0; + static u32 proc_cnt; + u8 *q1, *q2; + + /* Add notes to all non-dir nodes with dir or file children... */ + + for (i=0;ichild_cnt;i++) { + if (pv->child[i]->type == PIVOT_FILE || pv->child[i]->type == PIVOT_DIR) path_child = 1; + sort_annotate_pivot(pv->child[i]); + } + + if (pv->type != PIVOT_DIR && pv->type != PIVOT_SERV && + pv->type != PIVOT_ROOT && path_child) + problem(PROB_NOT_DIR, pv->req, pv->res, 0, pv, 0); + + /* Non-parametric nodes with digits in the name were not brute-forced, + but the user might be interested in doing so. Skip images here. */ + + if (pv->fuzz_par == -1 && pv->res && + (pv->res->sniff_mime_id < MIME_IMG_JPEG || + pv->res->sniff_mime_id > MIME_AV_WMEDIA) && + (pv->type == PIVOT_DIR || pv->type == PIVOT_FILE || + pv->type == PIVOT_PATHINFO) && !pv->missing) { + i = strlen((char*)pv->name); + while (i--) + if (isdigit(pv->name[i])) { + problem(PROB_FUZZ_DIGIT, pv->req, pv->res, 0, pv, 0); + break; + } + } + + /* Parametric nodes that seem to contain queries in parameters, and are not + marked as bogus_par, should be marked as dangerous. */ + + if (pv->fuzz_par != -1 && !pv->bogus_par && + (((q1 = (u8*)strchr((char*)pv->req->par.v[pv->fuzz_par], '(')) && + (q2 = (u8*)strchr((char*)pv->req->par.v[pv->fuzz_par], ')')) && q1 < q2) + || + ((inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"SELECT ") || + inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"DELETE ") ) && + inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" FROM ")) || + (inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"UPDATE ") || + inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" WHERE ")) || + inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"DROP TABLE ") || + inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" ORDER BY "))) + problem(PROB_SQL_PARAM, pv->req, pv->res, 0, pv, 0); + + /* Sort children nodes and issues as appropriate. */ + + if (pv->child_cnt > 1) + qsort(pv->child, pv->child_cnt, sizeof(struct pivot_desc*), pivot_compar); + + if (pv->issue_cnt > 1) + qsort(pv->issue, pv->issue_cnt, sizeof(struct issue_desc), issue_compar); + + if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) { + SAY(cLGN "\r[+] " cNOR "Sorting and annotating crawl nodes: %u", proc_cnt); + fflush(0); + } + +} + + +/* Issue extra hashing helper. */ + +static inline u32 hash_extra(u8* str) { + register u32 ret = 0; + register u8 cur; + + if (str) + while ((cur=*str)) { + ret = ~ret ^ (cur) ^ + (cur << 5) ^ (~cur >> 5) ^ + (cur << 10) ^ (~cur << 15) ^ + (cur << 20) ^ (~cur << 25) ^ + (cur << 30); + str++; + } + + return ret; +} + + + +/* Registers a new pivot signature, or updates an existing one. */ + +static void maybe_add_sig(struct pivot_desc* pv) { + u32 i, issue_sig = ~pv->issue_cnt, + child_sig = ~pv->child_cnt; + + if (!pv->res) return; + + /* Compute a rough children node signature based on children types. */ + + for (i=0;ichild_cnt;i++) + child_sig ^= (hash_extra(pv->child[i]->name) ^ + pv->child[i]->type) << (i % 16); + + /* Do the same for all recorded issues. */ + + for (i=0;iissue_cnt;i++) + issue_sig ^= (hash_extra(pv->issue[i].extra) ^ + pv->issue[i].type) << (i % 16); + + /* Assign a simplified signature to the pivot. */ + + pv->pv_sig = (pv->type << 16) ^ ~child_sig ^ issue_sig; + + /* See if a matching signature already exists. */ + + for (i=0;itype && p_sig[i].issue_sig == issue_sig && + p_sig[i].child_sig == child_sig && + same_page(p_sig[i].res_sig, &pv->res->sig)) { + + pv->dupe = 1; + return; + + } + + /* No match - create a new one. */ + + p_sig = ck_realloc(p_sig, (p_sig_cnt + 1) * sizeof(struct p_sig_desc)); + + p_sig[p_sig_cnt].type = pv->type; + p_sig[p_sig_cnt].res_sig = &pv->res->sig; + p_sig[p_sig_cnt].issue_sig = issue_sig; + p_sig[p_sig_cnt].child_sig = child_sig; + p_sig_cnt++; + +} + + + + +/* Recursively collects unique signatures for pivots. */ + +static void collect_signatures(struct pivot_desc* pv) { + u32 i; + static u32 proc_cnt; + + maybe_add_sig(pv); + for (i=0;ichild_cnt;i++) collect_signatures(pv->child[i]); + + if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) { + SAY(cLGN "\r[+] " cNOR "Looking for duplicate entries: %u", proc_cnt); + fflush(0); + } + +} + + +/* Destroys signature data (for memory profiling purposes). */ + +void destroy_signatures(void) { + u32 i; + + ck_free(p_sig); + + for (i=0;iparent; + static u32 proc_cnt; + + for (i=0;ichild_cnt;i++) compute_counts(pv->child[i]); + + if (pv->dupe) return; + + while (tmp) { + tmp->total_child_cnt++; + tmp = tmp->parent; + } + + for (i=0;iissue_cnt;i++) { + u8 sev = PSEV(pv->issue[i].type); + tmp = pv; + while (tmp) { + tmp->total_issues[sev]++; + tmp = tmp->parent; + } + } + + if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) { + SAY(cLGN "\r[+] " cNOR "Counting unique issues: %u", proc_cnt); + fflush(0); + } + +} + + +/* Helper to JS-escape data. Static buffer, will be destroyed on + subsequent calls. */ + +static inline u8* js_escape(u8* str) { + u32 len; + static u8* ret; + u8* opos; + + if (!str) return (u8*)"[none]"; + + len = strlen((char*)str); + + if (ret) free(ret); + opos = ret = __DFL_ck_alloc(len * 4 + 1); + + while (len--) { + if (*str > 0x1f && *str < 0x80 && !strchr("<>\\'\"", *str)) { + *(opos++) = *(str++); + } else { + sprintf((char*)opos, "\\x%02x", *(str++)); + opos += 4; + } + } + + *opos = 0; + + return ret; + +} + + +static void output_scan_info(u64 scan_time, u32 seed) { + FILE* f; + time_t t = time(NULL); + u8* ct = (u8*)ctime(&t); + + if (isspace(ct[strlen((char*)ct)-1])) + ct[strlen((char*)ct)-1] = 0; + + f = fopen("summary.js", "w"); + if (!f) PFATAL("Cannot open 'summary.js'"); + + fprintf(f, "var sf_version = '%s';\n", VERSION); + fprintf(f, "var scan_date = '%s';\n", js_escape(ct)); + fprintf(f, "var scan_seed = '0x%08x';\n", seed); + fprintf(f, "var scan_ms = %llu;\n", (long long)scan_time); + + fclose(f); + +} + + +/* Helper to save request, response data. */ + +static void describe_res(FILE* f, struct http_response* res) { + + if (!res) { + fprintf(f, "'fetched': false, 'error': 'Content not fetched'"); + return; + } + + switch (res->state) { + + case 0 ... STATE_OK - 1: + fprintf(f, "'fetched': false, 'error': '(Reported while fetch in progress)'"); + break; + + case STATE_OK: + fprintf(f, "'fetched': true, 'code': %u, 'len': %u, 'decl_mime': '%s', ", + res->code, res->pay_len, + js_escape(res->header_mime)); + + fprintf(f, "'sniff_mime': '%s', 'cset': '%s'", + res->sniffed_mime ? res->sniffed_mime : (u8*)"[none]", + js_escape(res->header_charset ? res->header_charset + : res->meta_charset)); + break; + + case STATE_DNSERR: + fprintf(f, "'fetched': false, 'error': 'DNS error'"); + break; + + case STATE_LOCALERR: + fprintf(f, "'fetched': false, 'error': 'Local network error'"); + break; + + case STATE_CONNERR: + fprintf(f, "'fetched': false, 'error': 'Connection error'"); + break; + + case STATE_RESPERR: + fprintf(f, "'fetched': false, 'error': 'Malformed HTTP response'"); + break; + + case STATE_SUPPRESS: + fprintf(f, "'fetched': false, 'error': 'Limits exceeded'"); + break; + + + default: + fprintf(f, "'fetched': false, 'error': 'Unknown error'"); + + } + +} + + +/* Helper to save request, response data. */ + +static void save_req_res(struct http_request* req, struct http_response* res, u8 sample) { + FILE* f; + + if (req) { + u8* rd = build_request_data(req); + f = fopen("request.dat", "w"); + if (!f) PFATAL("Cannot create 'request.dat'"); + fwrite(rd, strlen((char*)rd), 1, f); + fclose(f); + ck_free(rd); + } + + if (res && res->state == STATE_OK) { + u32 i; + f = fopen("response.dat", "w"); + if (!f) PFATAL("Cannot create 'response.dat'"); + fprintf(f, "HTTP/1.1 %u %s\n", res->code, res->msg); + + for (i=0;ihdr.c;i++) + if (res->hdr.t[i] == PARAM_HEADER) + fprintf(f, "%s: %s\n", res->hdr.n[i], res->hdr.v[i]); + else + fprintf(f, "Set-Cookie: %s=%s\n", res->hdr.n[i], res->hdr.v[i]); + + fprintf(f, "\n"); + fwrite(res->payload, res->pay_len, 1, f); + fclose(f); + + /* Also collect MIME samples at this point. */ + + if (!req->pivot->dupe && res->sniffed_mime && sample) { + + for (i=0;isniffed_mime)) break; + + if (i == m_samp_cnt) { + m_samp = ck_realloc(m_samp, (i + 1) * sizeof(struct mime_sample_desc)); + m_samp[i].det_mime = res->sniffed_mime; + m_samp_cnt++; + } else { + u32 c; + + /* If we already have something that looks very much the same on the + list, don't bother reporting it again. */ + + for (c=0;csig, &res->sig)) return; + } + + m_samp[i].req = ck_realloc(m_samp[i].req, (m_samp[i].sample_cnt + 1) * + sizeof(struct http_request*)); + m_samp[i].res = ck_realloc(m_samp[i].res, (m_samp[i].sample_cnt + 1) * + sizeof(struct http_response*)); + m_samp[i].req[m_samp[i].sample_cnt] = req; + m_samp[i].res[m_samp[i].sample_cnt] = res; + m_samp[i].sample_cnt++; + + } + + } + +} + + +/* Dumps the actual crawl data. */ + +static void output_crawl_tree(struct pivot_desc* pv) { + u32 i; + FILE* f; + static u32 proc_cnt; + + /* Save request, response. */ + + save_req_res(pv->req, pv->res, 1); + + /* Write children information. Don't crawl children just yet, + because we could run out of file descriptors on a particularly + deep tree if we keep one open and recurse. */ + + f = fopen("child_index.js", "w"); + if (!f) PFATAL("Cannot create 'child_index.js'."); + + fprintf(f, "var child = [\n"); + + for (i=0;ichild_cnt;i++) { + u8 tmp[32]; + u8* p; + + if (suppress_dupes && pv->child[i]->dupe && + !pv->child[i]->total_child_cnt) continue; + + /* Also completely suppress nodes that seem identical to the + previous one, and have a common prefix (as this implies + a mod_rewrite or htaccess filter). */ + + if (i && pv->child[i-1]->pv_sig == pv->child[i]->pv_sig) { + u8 *pn = pv->child[i-1]->name, *cn = pv->child[i]->name; + u32 pnd = strcspn((char*)pn, "."); + if (!strncasecmp((char*)pn, (char*)cn, pnd)) continue; + } + + sprintf((char*)tmp, "c%u", i); + + fprintf(f, " { 'dupe': %s, 'type': %u, 'name': '%s%s", + pv->child[i]->dupe ? "true" : "false", + pv->child[i]->type, js_escape(pv->child[i]->name), + (pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE) + ? (u8*)"" : (u8*)"="); + + fprintf(f, "%s', 'dir': '%s', 'linked': %d, ", + (pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE) + ? (u8*)"" : + js_escape(pv->child[i]->req->par.v[pv->child[i]->fuzz_par]), + tmp, pv->child[i]->linked); + + p = serialize_path(pv->child[i]->req, 1, 1); + fprintf(f, "'url': '%s', ", js_escape(p)); + ck_free(p); + + describe_res(f, pv->child[i]->res); + + fprintf(f,", 'missing': %s, 'csens': %s, 'child_cnt': %u, " + "'issue_cnt': [ %u, %u, %u, %u, %u ] }%s\n", + pv->child[i]->missing ? "true" : "false", + pv->child[i]->csens ? "true" : "false", + pv->child[i]->total_child_cnt, pv->child[i]->total_issues[1], + pv->child[i]->total_issues[2], pv->child[i]->total_issues[3], + pv->child[i]->total_issues[4], pv->child[i]->total_issues[5], + (i == pv->child_cnt - 1) ? "" : ","); + } + + fprintf(f, "];\n"); + fclose(f); + + /* Write issue index, issue dumps. */ + + f = fopen("issue_index.js", "w"); + if (!f) PFATAL("Cannot create 'issue_index.js'."); + + fprintf(f, "var issue = [\n"); + + for (i=0;iissue_cnt;i++) { + u8 tmp[32]; + sprintf((char*)tmp, "i%u", i); + + fprintf(f, " { 'severity': %u, 'type': %u, 'extra': '%s', ", + PSEV(pv->issue[i].type) - 1, pv->issue[i].type, + pv->issue[i].extra ? js_escape(pv->issue[i].extra) : (u8*)""); + + describe_res(f, pv->issue[i].res); + + fprintf(f, ", 'dir': '%s' }%s\n", + tmp, (i == pv->issue_cnt - 1) ? "" : ","); + + if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp); + chdir((char*)tmp); + save_req_res(pv->issue[i].req, pv->issue[i].res, 1); + chdir((char*)".."); + + /* Issue samples next! */ + + if (!pv->dupe) { + u32 c; + for (c=0;cissue[i].type) break; + + if (c == i_samp_cnt) { + i_samp = ck_realloc(i_samp, (c + 1) * sizeof(struct issue_sample_desc)); + i_samp_cnt++; + i_samp[c].type = pv->issue[i].type; + } + + i_samp[c].i = ck_realloc(i_samp[c].i, (i_samp[c].sample_cnt + 1) * + sizeof(struct issue_desc*)); + i_samp[c].i[i_samp[c].sample_cnt] = &pv->issue[i]; + i_samp[c].sample_cnt++; + } + + } + + fprintf(f, "];\n"); + fclose(f); + + /* Actually crawl children. */ + + for (i=0;ichild_cnt;i++) { + u8 tmp[32]; + sprintf((char*)tmp, "c%u", i); + if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp); + chdir((char*)tmp); + output_crawl_tree(pv->child[i]); + chdir((char*)".."); + } + + if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) { + SAY(cLGN "\r[+] " cNOR "Counting unique issues: %u", proc_cnt); + fflush(0); + } + +} + + +/* Writes previews of MIME types, issues. */ + +static int m_samp_qsort(const void* ptr1, const void* ptr2) { + const struct mime_sample_desc *p1 = ptr1, *p2 = ptr2; + return strcasecmp((char*)p1->det_mime, (char*)p2->det_mime); +} + +static int i_samp_qsort(const void* ptr1, const void* ptr2) { + const struct issue_sample_desc *p1 = ptr1, *p2 = ptr2; + return p2->type - p1->type; +} + + +static void output_summary_views() { + u32 i; + FILE* f; + + f = fopen("samples.js", "w"); + if (!f) PFATAL("Cannot create 'samples.js'."); + + qsort(m_samp, m_samp_cnt, sizeof(struct mime_sample_desc), m_samp_qsort); + qsort(i_samp, i_samp_cnt, sizeof(struct issue_sample_desc), i_samp_qsort); + + fprintf(f, "var mime_samples = [\n"); + + for (i=0;i MAX_SAMPLES ? MAX_SAMPLES : + m_samp[i].sample_cnt); + + sprintf((char*)tmp, "_m%u", i); + if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp); + chdir((char*)tmp); + + fprintf(f, " { 'mime': '%s', 'samples': [\n", m_samp[i].det_mime); + + for (c=0;cpivot->linked, m_samp[i].res[c]->pay_len, + (c == use_samp - 1) ? " ]" : ","); + ck_free(p); + } + + fprintf(f, " }%s\n", (i == m_samp_cnt - 1) ? "" : ","); + chdir(".."); + } + + fprintf(f, "];\n\n"); + + fprintf(f, "var issue_samples = [\n"); + + for (i=0;i MAX_SAMPLES ? MAX_SAMPLES : + i_samp[i].sample_cnt); + + sprintf((char*)tmp, "_i%u", i); + if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp); + chdir((char*)tmp); + + fprintf(f, " { 'severity': %d, 'type': %d, 'samples': [\n", + PSEV(i_samp[i].type) - 1, i_samp[i].type); + + for (c=0;creq, 1, 0); + sprintf((char*)tmp2, "%u", c); + if (mkdir((char*)tmp2, 0755)) PFATAL("Cannot create '%s'.", tmp2); + chdir((char*)tmp2); + save_req_res(i_samp[i].i[c]->req, i_samp[i].i[c]->res, 0); + chdir(".."); + fprintf(f, " { 'url': '%s', ", js_escape(p)); + fprintf(f, "'extra': '%s', 'dir': '%s/%s' }%s\n", + i_samp[i].i[c]->extra ? js_escape(i_samp[i].i[c]->extra) : + (u8*)"", tmp, tmp2, + (c == use_samp - 1) ? " ]" : ","); + ck_free(p); + } + + fprintf(f, " }%s\n", (i == i_samp_cnt - 1) ? "" : ","); + chdir(".."); + } + + fprintf(f, "];\n\n"); + fclose(f); + +} + + +/* Copies over assets/... to target directory. */ + +static u8* ca_out_dir; + +static int copy_asset(const struct dirent* d) { + u8 *itmp, *otmp, buf[1024]; + s32 i, o; + + if (d->d_name[0] == '.' || !strcmp(d->d_name, "COPYING")) return 0; + + itmp = ck_alloc(6 + strlen(d->d_name) + 2); + sprintf((char*)itmp, "assets/%s", d->d_name); + i = open((char*)itmp, O_RDONLY); + + otmp = ck_alloc(strlen((char*)ca_out_dir) + strlen(d->d_name) + 2); + sprintf((char*)otmp, "%s/%s", ca_out_dir, d->d_name); + o = open((char*)otmp, O_WRONLY | O_CREAT | O_EXCL, 0644); + + if (i >= 0 && o >= 0) { + s32 c; + while ((c = read(i, buf, 1024)) > 0) write(o, buf, c); + } + + close(i); + close(o); + + ck_free(itmp); + ck_free(otmp); + + return 0; + +} + + +static void copy_static_code(u8* out_dir) { + struct dirent** d; + ca_out_dir = out_dir; + scandir("assets", &d, copy_asset, NULL); +} + + +/* Writes report to index.html in the current directory. Will create + subdirectories, helper files, etc. */ + +void write_report(u8* out_dir, u64 scan_time, u32 seed) { + + SAY(cLGN "[+] " cNOR "Copying static resources...\n"); + copy_static_code(out_dir); + + if (chdir((char*)out_dir)) PFATAL("Cannot chdir to '%s'", out_dir); + + sort_annotate_pivot(&root_pivot); + SAY("\n"); + + collect_signatures(&root_pivot); + SAY("\n"); + + compute_counts(&root_pivot); + SAY("\n"); + + SAY(cLGN "[+] " cNOR "Writing scan description...\n"); + output_scan_info(scan_time, seed); + + output_crawl_tree(&root_pivot); + SAY("\n"); + + SAY(cLGN "[+] " cNOR "Generating summary views...\n"); + output_summary_views(); + + SAY(cLGN "[+] " cNOR "Report saved to '" cLBL "%s/index.html" cNOR "' [" + cLBL "0x%08x" cNOR "].\n", out_dir, seed); + +} diff --git a/report.h b/report.h new file mode 100644 index 0000000..291ed5d --- /dev/null +++ b/report.h @@ -0,0 +1,38 @@ +/* + skipfish - post-processing and reporting + ---------------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_REPORT_H + +#include "types.h" + +extern u8 suppress_dupes; + +/* Writes report to index.html in the current directory. Will create + subdirectories, helper files, etc. */ + +void write_report(u8* out_dir, u64 scan_time, u32 seed); + +/* Destroys all signatures created for pivot and issue clustering purposes. */ + +void destroy_signatures(void); + +#endif /* !_HAVE_REPORT_H */ diff --git a/same_test.c b/same_test.c new file mode 100644 index 0000000..946d804 --- /dev/null +++ b/same_test.c @@ -0,0 +1,84 @@ +/* + skipfish - same_page() test utility + ----------------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "alloc-inl.h" +#include "string-inl.h" + +#include "crawler.h" +#include "analysis.h" +#include "database.h" +#include "http_client.h" +#include "report.h" + +#ifdef DEBUG_ALLOCATOR +struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS]; +u32 __AD_trk_cnt[ALLOC_BUCKETS]; +#endif /* DEBUG_ALLOCATOR */ + +#define MAX_LEN (1024*1024) + +u8 p1[MAX_LEN], p2[MAX_LEN]; + +int main(int argc, char** argv) { + static struct http_response r1, r2; + s32 l1, l2; + + l1 = read(8, p1, MAX_LEN); + l2 = read(9, p2, MAX_LEN); + + if (l1 < 0 || l2 < 0) + FATAL("Usage: ./same_test 8 + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "alloc-inl.h" +#include "string-inl.h" + +#include "crawler.h" +#include "analysis.h" +#include "database.h" +#include "http_client.h" +#include "report.h" + +#ifdef DEBUG_ALLOCATOR +struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS]; +u32 __AD_trk_cnt[ALLOC_BUCKETS]; +#endif /* DEBUG_ALLOCATOR */ + + +void usage(char* argv0) { + SAY("Usage: %s [ options ... ] -o output_dir start_url [ start_url2 ... ]\n\n" + + "Authentication and access options:\n\n" + + " -A user:pass - use specified HTTP authentication credentials\n" + " -F host:IP - pretend that 'host' resolves to 'IP'\n" + " -C name=val - append a custom cookie to all requests\n" + " -H name=val - append a custom HTTP header to all requests\n" + " -b (i|f) - use headers consistent with MSIE / Firefox\n" + " -N - do not accept any new cookies\n\n" + + "Crawl scope options:\n\n" + + " -d max_depth - maximum crawl tree depth (%u)\n" + " -c max_child - maximum children to index per node (%u)\n" + " -r r_limit - max total number of requests to send (%u)\n" + " -p crawl%% - node and link crawl probability (100%%)\n" + " -q hex - repeat probabilistic scan with given seed\n" + " -I string - only follow URLs matching 'string'\n" + " -X string - exclude URLs matching 'string'\n" + " -S string - exclude pages containing 'string'\n" + " -D domain - crawl cross-site links to another domain\n" + " -B domain - trust, but do not crawl, another domain\n" + " -O - do not submit any forms\n" + " -P - do not parse HTML, etc, to find new links\n\n" + + "Reporting options:\n\n" + + " -o dir - write output to specified directory (required)\n" + " -J - be less noisy about MIME / charset mismatches\n" + " -M - log warnings about mixed content\n" + " -E - log all HTTP/1.0 / HTTP/1.1 caching intent mismatches\n" + " -U - log all external URLs and e-mails seen\n" + " -Q - completely suppress duplicate nodes in reports\n\n" + + "Dictionary management options:\n\n" + + " -W wordlist - load an alternative wordlist (%s)\n" + " -L - do not auto-learn new keywords for the site\n" + " -V - do not update wordlist based on scan results\n" + " -Y - do not fuzz extensions in directory brute-force\n" + " -R age - purge words hit more than 'age' scans ago\n" + " -T name=val - add new form auto-fill rule\n" + " -G max_guess - maximum number of keyword guesses to keep (%d)\n\n" + + "Performance settings:\n\n" + + " -g max_conn - max simultaneous TCP connections, global (%u)\n" + " -m host_conn - max simultaneous connections, per target IP (%u)\n" + " -f max_fail - max number of consecutive HTTP errors (%u)\n" + " -t req_tmout - total request response timeout (%u s)\n" + " -w rw_tmout - individual network I/O timeout (%u s)\n" + " -i idle_tmout - timeout on idle HTTP connections (%u s)\n" + " -s s_limit - response size limit (%u B)\n\n" + + "Send comments and complaints to .\n", argv0, + max_depth, max_children, max_requests, DEF_WORDLIST, MAX_GUESSES, + max_connections, max_conn_host, max_fail, resp_tmout, rw_tmout, + idle_tmout, size_limit); + + exit(1); +} + + +/* Ctrl-C handler... */ + +static u8 stop_soon; + +static void ctrlc_handler(int sig) { + stop_soon = 1; +} + + +/* Main entry point */ + +int main(int argc, char** argv) { + s32 opt; + u32 loop_cnt = 0, purge_age = 0, seed; + u8 dont_save_words = 0, show_once = 0; + u8 *wordlist = (u8*)DEF_WORDLIST, *output_dir = NULL; + + struct timeval tv; + u64 st_time, en_time; + + signal(SIGINT, ctrlc_handler); + signal(SIGPIPE, SIG_IGN); + SSL_library_init(); + + /* Come up with a quasi-decent random seed. */ + + gettimeofday(&tv, NULL); + seed = tv.tv_usec ^ (tv.tv_sec << 16) ^ getpid(); + + SAY("skipfish version " VERSION " by \n"); + + while ((opt = getopt(argc, argv, + "+A:F:C:H:b:Nd:c:r:p:I:X:S:D:PJOYQMUEW:LVT:G:R:B:q:g:m:f:t:w:i:s:o:")) > 0) + + switch (opt) { + + case 'A': { + u8* x = (u8*)strchr(optarg, ':'); + if (!x) FATAL("Credentials must be in 'user:pass' form."); + *(x++) = 0; + auth_user = (u8*)optarg; + auth_pass = x; + auth_type = AUTH_BASIC; + break; + } + + case 'F': { + u8* x = (u8*)strchr(optarg, '='); + u32 fake_addr; + if (!x) FATAL("Fake mappings must be in 'host=IP' form."); + *x = 0; + fake_addr = inet_addr((char*)x + 1); + if (fake_addr == (u32)-1) + FATAL("Could not parse IP address '%s'.", x + 1); + fake_host((u8*)optarg, fake_addr); + break; + } + + case 'H': { + u8* x = (u8*)strchr(optarg, '='); + if (!x) FATAL("Extra headers must be in 'name=value' form."); + *x = 0; + if (!strcasecmp(optarg, "Cookie")) + FATAL("Do not use -H to set cookies (try -C instead)."); + SET_HDR((u8*)optarg, x + 1, &global_http_par); + break; + } + + case 'C': { + u8* x = (u8*)strchr(optarg, '='); + if (!x) FATAL("Cookies must be in 'name=value' form."); + if (strchr(optarg, ';')) + FATAL("Split multiple cookies into separate -C options."); + *x = 0; + SET_CK((u8*)optarg, x + 1, &global_http_par); + break; + } + + case 'D': + if (*optarg == '*') optarg++; + APPEND_FILTER(allow_domains, num_allow_domains, optarg); + break; + + case 'B': + if (*optarg == '*') optarg++; + APPEND_FILTER(trust_domains, num_trust_domains, optarg); + break; + + case 'I': + if (*optarg == '*') optarg++; + APPEND_FILTER(allow_urls, num_allow_urls, optarg); + break; + + case 'X': + if (*optarg == '*') optarg++; + APPEND_FILTER(deny_urls, num_deny_urls, optarg); + break; + + case 'J': + relaxed_mime = 1; + break; + + case 'S': + if (*optarg == '*') optarg++; + APPEND_FILTER(deny_strings, num_deny_strings, optarg); + break; + + case 'T': { + u8* x = (u8*)strchr(optarg, '='); + if (!x) FATAL("Rules must be in 'name=value' form."); + *x = 0; + add_form_hint((u8*)optarg, x + 1); + break; + } + + case 'N': + ignore_cookies = 1; + break; + + case 'Y': + no_fuzz_ext = 1; + break; + + case 'q': + if (sscanf(optarg, "0x%08x", &seed) != 1) + FATAL("Invalid seed format."); + srandom(seed); + break; + + case 'Q': + suppress_dupes = 1; + break; + + case 'P': + no_parse = 1; + break; + + case 'V': + dont_save_words = 1; + break; + + case 'M': + warn_mixed = 1; + break; + + case 'U': + log_ext_urls = 1; + break; + + case 'L': + dont_add_words = 1; + break; + + case 'E': + pedantic_cache = 1; + break; + + case 'O': + no_forms = 1; + break; + + case 'R': + purge_age = atoi(optarg); + if (purge_age < 3) FATAL("Purge age invalid or too low (min 3)."); + break; + + case 'd': + max_depth = atoi(optarg); + if (max_depth < 2) FATAL("Invalid value '%s'.", optarg); + break; + + case 'c': + max_children = atoi(optarg); + if (!max_children) FATAL("Invalid value '%s'.", optarg); + break; + + case 'p': + crawl_prob = atoi(optarg); + if (!crawl_prob) FATAL("Invalid value '%s'.", optarg); + break; + + case 'W': + wordlist = (u8*)optarg; + break; + + case 'b': + if (optarg[0] == 'i') browser_type = BROWSER_MSIE; else + if (optarg[0] == 'f') browser_type = BROWSER_FFOX; else + usage(argv[0]); + break; + + case 'g': + max_connections = atoi(optarg); + if (!max_connections) FATAL("Invalid value '%s'.", optarg); + break; + + case 'm': + max_conn_host = atoi(optarg); + if (!max_conn_host) FATAL("Invalid value '%s'.", optarg); + break; + + case 'G': + max_guesses = atoi(optarg); + if (!max_guesses) FATAL("Invalid value '%s'.", optarg); + break; + + case 'r': + max_requests = atoi(optarg); + if (!max_requests) FATAL("Invalid value '%s'.", optarg); + break; + + case 'f': + max_fail = atoi(optarg); + if (!max_fail) FATAL("Invalid value '%s'.", optarg); + break; + + case 't': + resp_tmout = atoi(optarg); + if (!resp_tmout) FATAL("Invalid value '%s'.", optarg); + break; + + case 'w': + rw_tmout = atoi(optarg); + if (!rw_tmout) FATAL("Invalid value '%s'.", optarg); + break; + + case 'i': + idle_tmout = atoi(optarg); + if (!idle_tmout) FATAL("Invalid value '%s'.", optarg); + break; + + case 's': + size_limit = atoi(optarg); + if (!size_limit) FATAL("Invalid value '%s'.", optarg); + break; + + case 'o': + if (output_dir) FATAL("Multiple -o options not allowed."); + output_dir = (u8*)optarg; + + rmdir(optarg); + + if (mkdir(optarg, 0755)) + PFATAL("Unable to create '%s'.", output_dir); + + break; + + default: + usage(argv[0]); + + } + + if (access("assets/index.html", R_OK)) + PFATAL("Unable to access 'assets/index.html' - wrong directory?"); + + srandom(seed); + + if (optind == argc) + FATAL("Scan target not specified (try -h for help)."); + + if (!output_dir) + FATAL("Output directory not specified (try -h for help)."); + + if (resp_tmout < rw_tmout) + resp_tmout = rw_tmout; + + if (max_connections < max_conn_host) + max_connections = max_conn_host; + + load_keywords((u8*)wordlist, purge_age); + + /* Schedule all URLs in the command line for scanning */ + + while (optind < argc) { + + struct http_request *req = ck_alloc(sizeof(struct http_request)); + + if (parse_url((u8*)argv[optind], req, NULL)) + FATAL("One of specified scan targets is not a valid absolute URL."); + + if (!url_allowed_host(req)) + APPEND_FILTER(allow_domains, num_allow_domains, + __DFL_ck_strdup(req->host)); + + if (!url_allowed(req)) + FATAL("URL '%s' explicitly excluded by -I / -X rules.", argv[optind]); + + maybe_add_pivot(req, NULL, 2); + destroy_request(req); + + optind++; + } + + gettimeofday(&tv, NULL); + st_time = tv.tv_sec * 1000 + tv.tv_usec / 1000; + + SAY("\x1b[H\x1b[J"); + + while ((next_from_queue() && !stop_soon) || (!show_once++)) { + + if ((loop_cnt++ % 20) && !show_once) continue; + + SAY(cYEL "\x1b[H" + "skipfish version " VERSION " by \n\n" cNOR); + + http_stats(st_time); + SAY("\n"); + database_stats(); + SAY("\n \r"); + + } + + gettimeofday(&tv, NULL); + en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000; + + if (stop_soon) + SAY(cYEL "[!] " cBRI "Scan aborted by user, bailing out!" cNOR "\n"); + + if (!dont_save_words) save_keywords((u8*)wordlist); + + write_report(output_dir, en_time - st_time, seed); + +#ifdef LOG_STDERR + SAY("\n== PIVOT DEBUG ==\n"); + dump_pivots(0, 0); + SAY("\n== END OF DUMP ==\n\n"); +#endif /* LOG_STDERR */ + + SAY(cLGN "[+] " cBRI "This was a great day for science!" cNOR "\n\n"); + +#ifdef DEBUG_ALLOCATOR + if (!stop_soon) { + destroy_database(); + destroy_http(); + destroy_signatures(); + __AD_report(); + } +#endif /* DEBUG_ALLOCATOR */ + + return 0; + +} diff --git a/string-inl.h b/string-inl.h new file mode 100644 index 0000000..82bfab2 --- /dev/null +++ b/string-inl.h @@ -0,0 +1,182 @@ +/* + + skipfish - various string manipulation helpers + ---------------------------------------------- + + Some modern operating systems still ship with no strcasestr() or memmem() + implementations in place, for reasons beyond comprehension. This file + includes a simplified version of these routines, copied from NetBSD, plus + several minor, custom string manipulation macros and inline functions. + + The original NetBSD code is licensed under a BSD license, as follows: + + Copyright (c) 1990, 1993 + The Regents of the University of California. All rights reserved. + + This code is derived from software contributed to Berkeley by + Chris Torek. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + */ + +#ifndef _HAVE_STRING_INL_H +#define _HAVE_STRING_INL_H + +#include +#include + +#include "types.h" + + +/* Modified NetBSD strcasestr() implementation (rolling strncasecmp). */ + +static inline u8* inl_strcasestr(const u8* haystack, const u8* needle) { + register u8 c, sc; + register u32 len; + + if (!haystack || !needle) return 0; + + if ((c = *needle++)) { + + c = tolower(c); + len = strlen((char*)needle); + + do { + do { + if (!(sc = *haystack++)) return 0; + } while (tolower(sc) != c); + } while (strncasecmp((char*)haystack, (char*)needle, len)); + + haystack--; + + } + + return (u8*)haystack; + +} + + +/* Modified NetBSD memmem() implementation (rolling memcmp). */ + +static inline void* inl_memmem(const void* haystack, u32 h_len, + const void* needle, u32 n_len) { + register u8* sp = (u8*)haystack; + register u8* pp = (u8*)needle; + register u8* eos = sp + h_len - n_len; + + if (!(haystack && needle && h_len && n_len)) return 0; + + while (sp <= eos) { + if (*sp == *pp) + if (memcmp(sp, pp, n_len) == 0) return sp; + sp++; + } + + return 0; + +} + + +/* String manipulation macros for operating on a dynamic buffer. */ + +#define NEW_STR(_buf_ptr, _buf_len) do { \ + (_buf_ptr) = ck_alloc(1024); \ + (_buf_len) = 0; \ + } while (0) + +#define ADD_STR_DATA(_buf_ptr, _buf_len, _str) do { \ + u32 _sl = strlen((char*)_str); \ + if ((_buf_len) + (_sl) + 1 > malloc_usable_size(_buf_ptr)) { \ + u32 _nsiz = ((_buf_len) + _sl + 1024) >> 10 << 10; \ + (_buf_ptr) = ck_realloc(_buf_ptr, _nsiz); \ + } \ + memcpy((_buf_ptr) + (_buf_len), _str, _sl + 1); \ + (_buf_len) += _sl; \ + } while (0) + +#define TRIM_STR(_buf_ptr, _buf_len) do { \ + (_buf_ptr) = ck_realloc(_buf_ptr, _buf_len + 1); \ + (_buf_ptr)[_buf_len] = 0; \ + } while (0) + + +/* Simple base64 encoder */ + +static inline u8* b64_encode(u8* str, u32 len) { + + const u8 b64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + + u8 *ret, *cur; + + ret = cur = ck_alloc((len + 3) * 4 / 3 + 1); + + while (len > 0) { + + if (len >= 3) { + u32 comp = (str[0] << 16) | (str[1] << 8) | str[2]; + + *(cur++) = b64[comp >> 18]; + *(cur++) = b64[(comp >> 12) & 0x3F]; + *(cur++) = b64[(comp >> 6) & 0x3F]; + *(cur++) = b64[comp & 0x3F]; + + len -= 3; + str += 3; + + } else if (len == 2) { + u32 comp = (str[0] << 16) | (str[1] << 8); + + *(cur++) = b64[comp >> 18]; + *(cur++) = b64[(comp >> 12) & 0x3F]; + *(cur++) = b64[(comp >> 6) & 0x3D]; + *(cur++) = '='; + + len -= 2; + str += 2; + + } else { + u32 comp = (str[0] << 16);; + + *(cur++) = b64[comp >> 18]; + *(cur++) = b64[(comp >> 12) & 0x3F]; + *(cur++) = '='; + *(cur++) = '='; + + len--; + str++; + + } + + } + + *cur = 0; + return ret; + +} + +#endif /* !_HAVE_STRING_INL_H */ diff --git a/types.h b/types.h new file mode 100644 index 0000000..fdf05d3 --- /dev/null +++ b/types.h @@ -0,0 +1,42 @@ +/* + skipfish - type definitions + --------------------------- + + Author: Michal Zalewski + + Copyright 2009, 2010 by Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + */ + +#ifndef _HAVE_TYPES_H +#define _HAVE_TYPES_H + +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +/* PRNG wrapper, of no better place to put it. */ + +#define R(_ceil) ((u32)(random() % (_ceil))) + +#endif /* ! _HAVE_TYPES_H */