commit fcf0650b5ee378f7baf9cfddb71504f8316184fc
Author: Steve Pinkham <steve.pinkham@gmail.com>
Date:   Sat Mar 20 11:46:08 2010 -0400

    Version 1.00b as released

diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c897eda
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,54 @@
+#
+# skipfish - Makefile
+# -------------------
+#
+# Author: Michal Zalewski <lcamtuf@google.com>
+#
+# Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+PROGNAME   = skipfish
+
+OBJFILES   = http_client.c database.c crawler.c analysis.c report.c
+INCFILES   = alloc-inl.h string-inl.h debug.h types.h http_client.h \
+             database.h crawler.h analysis.h config.h report.h
+
+CFLAGS_GEN = -Wall -funsigned-char -g -ggdb
+CFLAGS_DBG = $(CFLAGS_GEN) -DLOG_STDERR=1 -DDEBUG_ALLOCATOR=1
+CFLAGS_OPT = $(CFLAGS_GEN) -O3 -Wno-format
+LDFLAGS    = -lcrypto -lssl -lidn -lz
+
+all: $(PROGNAME)
+
+$(PROGNAME): $(PROGNAME).c $(OBJFILES) $(INCFILES)
+	$(CC) $(PROGNAME).c -o $(PROGNAME) $(CFLAGS_OPT) $(OBJFILES) $(LDFLAGS)
+	@echo
+	@echo "NOTE: See dictionaries/README-FIRST to pick a dictionary for the tool."
+	@echo
+
+debug: $(PROGNAME).c $(OBJFILES) $(INCFILES)
+	$(CC) $(PROGNAME).c -o $(PROGNAME) $(CFLAGS_DBG) $(OBJFILES) $(LDFLAGS)
+
+clean:
+	rm -f $(PROGNAME) *.exe *.o *~ a.out core core.[1-9][0-9]* *.stackdump \
+	      LOG same_test
+	rm -rf tmpdir
+
+same_test: same_test.c $(OBJFILES) $(INCFILES)
+	$(CC) same_test.c -o same_test $(CFLAGS_DBG) $(OBJFILES) $(LDFLAGS)
+
+publish: clean
+	cd ..; tar cfvz ~/www/skipfish.tgz skipfish
+	chmod 644 ~/www/skipfish.tgz
diff --git a/README b/README
new file mode 100644
index 0000000..a01040e
--- /dev/null
+++ b/README
@@ -0,0 +1,484 @@
+===========================================
+skipfish - web application security scanner
+===========================================
+
+  http://code.google.com/p/skipfish/
+
+  * Written and maintained by Michal Zalewski <lcamtuf@google.com>.
+  * Copyright 2009, 2010 Google Inc, rights reserved.
+  * Released under terms and conditions of the Apache License, version 2.0. 
+
+--------------------
+1. What is skipfish?
+--------------------
+
+Skipfish is an active web application security reconnaissance tool. It prepares 
+an interactive sitemap for the targeted site by carrying out a recursive crawl 
+and dictionary-based probes. The resulting map is then annotated with the 
+output from a number of active (but hopefully non-disruptive) security checks. 
+The final report generated by the tool is meant to serve as a foundation for 
+professional web application security assessments.
+Why should I bother with this particular tool?
+
+A number of commercial and open source tools with analogous functionality is 
+readily available (e.g., Nikto, Nessus); stick to the one that suits you best. 
+That said, skipfish tries to address some of the common problems associated 
+with web security scanners. Specific advantages include:
+
+  * High performance: 500+ requests per second against responsive Internet 
+    targets, 2000+ requests per second on LAN / MAN networks, and 7000+ requests
+    against local instances has been observed, with a very modest CPU, network,
+    and memory footprint. This can be attributed to:
+
+    - Multiplexing single-thread, fully asynchronous network I/O and data 
+      processing model that eliminates memory management, scheduling, and IPC 
+      inefficiencies present in some multi-threaded clients.
+
+    - Advanced HTTP/1.1 features such as range requests, content 
+      compression, and keep-alive connections, as well as forced response size 
+      limiting, to keep network-level overhead in check.
+
+    - Smart response caching and advanced server behavior heuristics are 
+      used to minimize unnecessary traffic.
+
+    - Performance-oriented, pure C implementation, including a custom 
+      HTTP stack. 
+
+  * Ease of use: skipfish is highly adaptive and reliable. The scanner 
+    features:
+
+    - Heuristic recognition of obscure path- and query-based parameter 
+      handling schemes.
+
+    - Graceful handling of multi-framework sites where certain paths obey 
+      a completely different semantics, or are subject to different filtering
+      rules.
+
+    - Automatic wordlist construction based on site content analysis.
+
+    - Probabilistic scanning features to allow periodic, time-bound 
+      assessments of arbitrarily complex sites. 
+
+  * Well-designed security checks: the tool is meant to provide accurate and 
+    meaningful results:
+
+    - Three-step differential probes are preferred to signature checks 
+      for detecting vulnerabilities.
+
+    - Ratproxy-style logic is used to spot subtle security problems: 
+      cross-site request forgery, cross-site script inclusion, mixed content,
+      issues MIME- and charset mismatches, incorrect caching directive, etc.
+
+    - Bundled security checks are designed to handle tricky scenarios: 
+      stored XSS (path, parameters, headers), blind SQL or XML injection, or
+      blind shell injection.
+
+    - Report post-processing drastically reduces the noise caused by any 
+      remaining false positives or server gimmicks by identifying repetitive
+      patterns. 
+
+That said, skipfish is not a silver bullet, and may be unsuitable for certain 
+purposes. For example, it does not satisfy most of the requirements outlined in 
+WASC Web Application Security Scanner Evaluation Criteria (some of them on 
+purpose, some out of necessity); and unlike most other projects of this type, 
+it does not come with an extensive database of known vulnerabilities for 
+banner-type checks.
+
+-----------------------------------------------------
+2. Most curious! What specific tests are implemented?
+-----------------------------------------------------
+
+A rough list of the security checks offered by the tool is outlined below.
+
+  * High risk flaws (potentially leading to system compromise):
+
+    - Server-side SQL injection (including blind vectors, numerical 
+      parameters).
+    - Explicit SQL-like syntax in GET or POST parameters.
+    - Server-side shell command injection (including blind vectors).
+    - Server-side XML / XPath injection (including blind vectors).
+    - Format string vulnerabilities.
+    - Integer overflow vulnerabilities. 
+
+  * Medium risk flaws (potentially leading to data compromise):
+
+    - Stored and reflected XSS vectors in document body (minimal JS XSS 
+      support present).
+    - Stored and reflected XSS vectors via HTTP redirects.
+    - Stored and reflected XSS vectors via HTTP header splitting.
+    - Directory traversal (including constrained vectors).
+    - Assorted file POIs (server-side sources, configs, etc).
+    - Attacker-supplied script and CSS inclusion vectors (stored and 
+      reflected).
+    - External untrusted script and CSS inclusion vectors.
+    - Mixed content problems on script and CSS resources (optional).
+    - Incorrect or missing MIME types on renderables.
+    - Generic MIME types on renderables.
+    - Incorrect or missing charsets on renderables.
+    - Conflicting MIME / charset info on renderables.
+    - Bad caching directives on cookie setting responses. 
+
+  * Low risk issues (limited impact or low specificity):
+
+    - Directory listing bypass vectors.
+    - Redirection to attacker-supplied URLs (stored and reflected).
+    - Attacker-supplied embedded content (stored and reflected).
+    - External untrusted embedded content.
+    - Mixed content on non-scriptable subresources (optional).
+    - HTTP credentials in URLs.
+    - Expired or not-yet-valid SSL certificates.
+    - HTML forms with no XSRF protection.
+    - Self-signed SSL certificates.
+    - SSL certificate host name mismatches.
+    - Bad caching directives on less sensitive content. 
+
+  * Internal warnings:
+
+    - Failed resource fetch attempts.
+    - Exceeded crawl limits.
+    - Failed 404 behavior checks.
+    - IPS filtering detected.
+    - Unexpected response variations.
+    - Seemingly misclassified crawl nodes. 
+
+  * Non-specific informational entries:
+
+    - General SSL certificate information.
+    - Significantly changing HTTP cookies.
+    - Changing Server, Via, or X-... headers.
+    - New 404 signatures.
+    - Resources that cannot be accessed.
+    - Resources requiring HTTP authentication.
+    - Broken links.
+    - Server errors.
+    - All external links not classified otherwise (optional).
+    - All external e-mails (optional).
+    - All external URL redirectors (optional).
+    - Links to unknown protocols.
+    - Form fields that could not be autocompleted.
+    - All HTML forms detected.
+    - Password entry forms (for external brute-force).
+    - Numerical file names (for external brute-force).
+    - User-supplied links otherwise rendered on a page.
+    - Incorrect or missing MIME type on less significant content.
+    - Generic MIME type on less significant content.
+    - Incorrect or missing charset on less significant content.
+    - Conflicting MIME / charset information on less significant content.
+    - OGNL-like parameter passing conventions. 
+
+Along with a list of identified issues, skipfish also provides summary 
+overviews of document types and issue types found; and an interactive sitemap, 
+with nodes discovered through brute-force denoted in a distinctive way.
+
+-----------------------------------------------------------
+3. All right, I want to try it out. What do I need to know?
+-----------------------------------------------------------
+
+First and foremost, please do not be evil. Use skipfish only against services 
+you own, or have a permission to test.
+
+Keep in mind that all types of security testing can be disruptive. Although the 
+scanner is designed not to carry out disruptive malicious attacks, it may 
+accidentally interfere with the operations of the site. You must accept the 
+risk, and plan accordingly. Run the scanner against test instances where 
+feasible, and be prepared to deal with the consequences if things go wrong.
+
+Also note that the tool is meant to be used by security professionals, and is 
+experimental in nature. It may return false positives or miss obvious security 
+problems - and even when it operates perfectly, it is simply not meant to be a 
+point-and-click application. Do not rely on its output at face value.
+How to run the scanner?
+
+To compile it, simply unpack the archive and try make. Chances are, you will 
+need to install libidn first.
+
+Next, you need to copy the desired dictionary file from dictionaries/ to 
+skipfish.wl. Please read dictionaries/README-FIRST carefully to make the right 
+choice. This step has a profound impact on the quality of scan results later on.
+
+Once you have the dictionary selected, you can try:
+
+$ ./skipfish -o output_dir http://www.example.com/some/starting/path.txt
+
+Note that you can provide more than one starting URL if so desired; all of them 
+will be crawled.
+
+In the example above, skipfish will scan the entire www.example.com (including 
+services on other ports, if linked to from the main page), and write a report 
+to output_dir/index.html. You can then view this report with your favorite 
+browser (JavaScript must be enabled). The index.html file is static; actual 
+results are stored as a hierarchy of JSON files, suitable for machine 
+processing if needs be.
+
+Some sites may require authentication; for simple HTTP credentials, you can try:
+
+$ ./skipfish -A user:pass ...other parameters...
+
+Alternatively, if the site relies on HTTP cookies instead, log in in your 
+browser or using a simple curl script, and then provide skipfish with a session 
+cookie:
+
+$ ./skipfish -C name=val ...other parameters...
+
+Other session cookies may be passed the same way, one per each -C option.
+
+Certain URLs on the site may log out your session; you can combat this in two 
+ways: by using the -N option, which causes the scanner to reject attempts to 
+set or delete cookies; or with the -X parameter, which prevents matching URLs 
+from being fetched:
+
+$ ./skipfish -X /logout/logout.aspx ...other parameters...
+
+The -X option is also useful for speeding up your scans by excluding /icons/, 
+/doc/, /manuals/, and other standard, mundane locations along these lines. In 
+general, you can use -X, plus -I (only spider URLs matching a substring) and -S 
+(ignore links on pages where a substring appears in response body) to limit the 
+scope of a scan any way you like - including restricting it only to a specific 
+protocol and port:
+
+$ ./skipfish -I http://example.com:1234/ ...other parameters...
+
+Another useful scoping option is -D - allowing you to specify additional hosts 
+or domains to consider in-scope for the test. By default, all hosts appearing 
+in the command-line URLs are added to the list - but you can use -D to broaden 
+these rules, for example:
+
+$ ./skipfish -D test2.example.com -o output-dir http://test1.example.com/
+
+...or, for a domain wildcard match, use:
+
+$ ./skipfish -D .example.com -o output-dir http://test1.example.com/
+
+In some cases, you do not want to actually crawl a third-party domain, but you 
+trust the owner of that domain enough not to worry about cross-domain content 
+inclusion from that location. To suppress warnings, you can use the -B option, 
+for example:
+
+$ ./skipfish -B .google-analytics.com -B .googleapis.com ...other parameters...
+
+By default, skipfish sends minimalistic HTTP headers to reduce the amount of 
+data exchanged over the wire; some sites examine User-Agent strings or header 
+ordering to reject unsupported clients, however. In such a case, you can use -b 
+ie or -b ffox to mimic one of the two popular browsers.
+
+When it comes to customizing your HTTP requests, you can also use the -H option 
+to insert any additional, non-standard headers; or -F to define a custom 
+mapping between a host and an IP (bypassing the resolver). The latter feature 
+is particularly useful for not-yet-launched or legacy services.
+
+Some sites may be too big to scan in a reasonable timeframe. If the site 
+features well-defined tarpits - for example, 100,000 nearly identical user 
+profiles as a part of a social network - these specific locations can be 
+excluded with -X or -S. In other cases, you may need to resort to other 
+settings: -d limits crawl depth to a specified number of subdirectories; -c 
+limits the number of children per directory; and -r limits the total number of 
+requests to send in a scan.
+
+An interesting option is available for repeated assessments: -p. By specifying 
+a percentage between 1 and 100%, it is possible to tell the crawler to follow 
+fewer than 100% of all links, and try fewer than 100% of all dictionary 
+entries. This - naturally - limits the completeness of a scan, but unlike most 
+other settings, it does so in a balanced, non-deterministic manner. It is 
+extremely useful when you are setting up time-bound, but periodic assessments 
+of your infrastructure. Another related option is -q, which sets the initial 
+random seed for the crawler to a specified value. This can be used to exactly 
+reproduce a previous scan to compare results. Randomness is relied upon most 
+heavily in the -p mode, but also for making a couple of other scan management 
+decisions elsewhere.
+
+Some particularly complex (or broken) services may involve a very high number 
+of identical or nearly identical pages. Although these occurrences are by 
+default grayed out in the report, they still use up some screen estate and take 
+a while to process on JavaScript level. In such extreme cases, you may use the 
+-Q option to suppress reporting of duplicate nodes altogether, before the 
+report is written. This may give you a less comprehensive understanding of how 
+the site is organized, but has no impact on test coverage.
+
+In certain quick assessments, you might also have no interest in paying any 
+particular attention to the desired functionality of the site - hoping to 
+explore non-linked secrets only. In such a case, you may specify -P to inhibit 
+all HTML parsing. This limits the coverage and takes away the ability for the 
+scanner to learn new keywords by looking at the HTML, but speeds up the test 
+dramatically. Another similarly crippling option that reduces the risk of 
+persistent effects of a scan is -O, which inhibits all form parsing and 
+submission steps.
+
+By default, skipfish complains loudly about all MIME or character set 
+mismatches on renderable documents, and classifies many of them as "medium 
+risk"; this is because, if any user-controlled content is returned, the 
+situation could lead to cross-site scripting attacks in certain browsers. On 
+some poorly designed and maintained sites, this may contribute too much noise; 
+if so, you may use -J to mark these issues as "low risk" unless the scanner can 
+explicitly sees its own user input being echoed back on the resulting page. 
+This may miss many subtle attack vectors, though.
+
+Some sites that handle sensitive user data care about SSL - and about getting 
+it right. Skipfish may optionally assist you in figuring out problematic mixed 
+content scenarios - use the -M option to enable this. The scanner will complain 
+about situations such as http:// scripts being loaded on https:// pages - but 
+will disregard non-risk scenarios such as images.
+
+Likewise, certain pedantic sites may care about cases where caching is 
+restricted on HTTP/1.1 level, but no explicit HTTP/1.0 caching directive is 
+given on specifying -E in the command-line causes skipfish to log all such 
+cases carefully.
+
+Lastly, in some assessments that involve self-contained sites without extensive 
+user content, the auditor may care about any external e-mails or HTTP links 
+seen, even if they have no immediate security impact. Use the -U option to have 
+these logged.
+
+Dictionary management is a special topic, and - as mentioned - is covered in 
+more detail in dictionaries/README-FIRST. Please read that file before 
+proceeding. Some of the relevant options include -W to specify a custom 
+wordlist, -L to suppress auto-learning, -V to suppress dictionary updates, -G 
+to limit the keyword guess jar size, -R to drop old dictionary entries, and -Y 
+to inhibit expensive $keyword.$extension fuzzing.
+
+Skipfish also features a form auto-completion mechanism in order to maximize 
+scan coverage. The values should be non-malicious, as they are not meant to 
+implement security checks - but rather, to get past input validation logic. You 
+can define additional rules, or override existing ones, with the -T option (-T 
+form_field_name=field_value, e.g. -T login=test123 -T password=test321 - 
+although note that -C and -A are a much better method of logging in).
+
+There is also a handful of performance-related options. Use -g to set the 
+maximum number of connections to maintain, globally, to all targets (it is 
+sensible to keep this under 50 or so to avoid overwhelming the TCP/IP stack on 
+your system or on the nearby NAT / firewall devices); and -m to set the per-IP 
+limit (experiment a bit: 2-4 is usually good for localhost, 4-8 for local 
+networks, 10-20 for external targets, 30+ for really lagged or non-keep-alive 
+hosts). You can also use -w to set the I/O timeout (i.e., skipfish will wait 
+only so long for an individual read or write), and -t to set the total request 
+timeout, to account for really slow or really fast sites.
+
+Lastly, -f controls the maximum number of consecutive HTTP errors you are 
+willing to see before aborting the scan; and -s sets the maximum length of a 
+response to fetch and parse (longer responses will be truncated).
+
+--------------------------------
+4. But seriously, how to run it?
+--------------------------------
+
+A standard, authenticated scan of a well-designed and self-contained site 
+(warns about all external links, e-mails, mixed content, and caching header 
+issues):
+
+$ ./skipfish -MEU -C "AuthCookie=value" -X /logout.aspx -o output_dir \
+  http://www.example.com/
+
+Five-connection crawl, but no brute-force; pretending to be MSIE and caring 
+less about ambiguous MIME or character set mismatches:
+
+$ ./skipfish -m 5 -LVJ -W /dev/null -o output_dir -b ie http://www.example.com/
+
+Brute force only (no HTML link extraction), trusting links within example.com 
+and timing out after 5 seconds:
+
+$ ./skipfish -B .example.com -O -o output_dir -t 5 http://www.example.com/
+
+For a short list of all command-line options, try ./skipfish -h.
+
+----------------------------------------------------
+5. How to interpret and address the issues reported?
+----------------------------------------------------
+
+Most of the problems reported by skipfish should self-explanatory, assuming you 
+have a good gasp of the fundamentals of web security. If you need a quick 
+refresher on some of the more complicated topics, such as MIME sniffing, you 
+may enjoy our comprehensive Browser Security Handbook as a starting point:
+
+  http://code.google.com/p/browsersec/
+
+If you still need assistance, there are several organizations that put a 
+considerable effort into documenting and explaining many of the common web 
+security threats, and advising the public on how to address them. I encourage 
+you to refer to the materials published by OWASP and Web Application Security 
+Consortium, amongst others:
+
+  * http://www.owasp.org/index.php/Category:Principle
+  * http://www.owasp.org/index.php/Category:OWASP_Guide_Project
+  * http://www.webappsec.org/projects/articles/
+
+Although I am happy to diagnose problems with the scanner itself, I regrettably 
+cannot offer any assistance with the inner wokings of third-party web 
+applications.
+
+---------------------------------------
+6. Known limitations / feature wishlist
+---------------------------------------
+
+Below is a list of features currently missing in skipfish. If you wish to 
+improve the tool by contributing code in one of these areas, please let me know:
+
+  * Buffer overflow checks: after careful consideration, I suspect there is 
+    no reliable way to test for buffer overflows remotely. Much like the actual 
+    fault condition we are looking for, proper buffer size checks may also
+    result in uncaught exceptions, 500 messages, etc. I would love to be proved
+    wrong, though. 
+
+  * Fully-fledged JavaScript XSS detection: several rudimentary checks are 
+    present in the code, but there is no proper script engine to evaluate 
+    expressions and DOM access built in. 
+
+  * Variable length encoding character consumption / injection bugs: these 
+    problems seem to be largely addressed on browser level at this point, so
+    they were much lower priority at the time of this writing. 
+
+  * Security checks and link extraction for third-party, plugin-based content 
+    (Flash, Java, PDF, etc). 
+
+  * Password brute-force and numerical filename brute-force probes. 
+
+  * Search engine integration (vhosts, starting paths). 
+
+  * VIEWSTATE decoding. 
+
+  * NTLM and digest authentication. 
+
+  * Proxy support: somewhat incompatible with performance control features 
+    currently employed by skipfish; but in the long run, should be provided as
+    a last-resort option. 
+
+  * Scan resume option. 
+
+  * Standalone installation (make install) support. 
+
+  * Config file support. 
+
+-------------------------------------
+7. Oy! Something went horribly wrong!
+-------------------------------------
+
+There is no web crawler so good that there wouldn't be a web framework to one 
+day set it on fire. If you encounter what appears to be bad behavior (e.g., a 
+scan that takes forever and generates too many requests, completely bogus nodes 
+in scan output, or outright crashes), please recompile the scanner with:
+
+$ make clean debug
+
+...and re-run it this way:
+
+$ ./skipfish [...previous options...] 2>logfile.txt
+
+You can then inspect logfile.txt to get an idea what went wrong; if it looks 
+like a scanner problem, please scrub any sensitive information from the log 
+file and send it to the author.
+
+If the scanner crashed, please recompile it as indicated above, and then type:
+
+$ ulimit -c unlimited
+$ ./skipfish [...previous options...] 2>logfile.txt
+$ gdb --batch -ex back ./skipfish core
+
+...and be sure to send the author the output of that last command as well.
+
+-----------------------
+8. Credits and feedback
+-----------------------
+
+Skipfish is made possible thanks to the contributions of, and valuable feedback 
+from, Google's information security engineering team.
+
+If you have any bug reports, questions, suggestions, or concerns regarding the 
+application, the author can be reached at lcamtuf@google.com. 
diff --git a/alloc-inl.h b/alloc-inl.h
new file mode 100644
index 0000000..3b90fb7
--- /dev/null
+++ b/alloc-inl.h
@@ -0,0 +1,294 @@
+/*
+   skipfish - error-checking, memory-zeroing alloc routines
+   --------------------------------------------------------
+
+   Note: when DEBUG_ALLOCATOR is set, a horribly slow but pedantic
+   allocation tracker is used. Don't enable this in production.
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_ALLOC_INL_H
+#define _HAVE_ALLOC_INL_H
+
+#include <stdlib.h>
+
+#ifdef __APPLE__
+#include <malloc/malloc.h>
+#else
+#include <malloc.h>
+#endif /* __APPLE__ */
+
+#include <string.h>
+
+#include "config.h"
+#include "types.h"
+#include "debug.h"
+
+#define ALLOC_CHECK_SIZE(_s) do { \
+    if ((_s) > MAX_ALLOC) \
+      FATAL("bad alloc request: %u bytes", (_s)); \
+  } while (0)
+
+#define ALLOC_CHECK_RESULT(_r,_s) do { \
+    if (!(_r)) \
+      FATAL("out of memory: can't allocate %u bytes", (_s)); \
+  } while (0)
+
+#ifdef __APPLE__
+#define malloc_usable_size malloc_size
+#endif /* __APPLE__ */
+
+static inline void* __DFL_ck_alloc(u32 size) {
+  void* ret;
+  u32   usable;
+
+  if (!size) return NULL;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  usable = malloc_usable_size(ret);
+  memset(ret, 0, usable);
+
+  return ret;
+}
+
+
+static inline void* __DFL_ck_realloc(void* orig, u32 size) {
+  void* ret;
+  u32   old_usable = 0,
+        new_usable;
+
+  if (!size) {
+    free(orig);
+    return NULL;
+  }
+
+  if (orig) old_usable = malloc_usable_size(orig);
+
+  ALLOC_CHECK_SIZE(size);
+  ret = realloc(orig, size);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  new_usable = malloc_usable_size(ret);
+
+  if (new_usable > old_usable)
+    memset(ret + old_usable, 0, new_usable - old_usable);
+
+  return ret;
+}
+
+
+static inline void* __DFL_ck_strdup(u8* str) {
+  void* ret;
+  u32   size;
+  u32   usable;
+
+  if (!str) return NULL;
+
+  size = strlen((char*)str) + 1;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  usable = malloc_usable_size(ret);
+
+  memcpy(ret, str, size);
+
+  if (usable > size)
+    memset(ret + size, 0, usable - size);
+
+  return ret;
+}
+
+static inline void* __DFL_ck_memdup(u8* mem, u32 size) {
+  void* ret;
+  u32   usable;
+
+  if (!mem || !size) return NULL;
+
+  ALLOC_CHECK_SIZE(size);
+  ret = malloc(size);
+  ALLOC_CHECK_RESULT(ret, size);
+
+  usable = malloc_usable_size(ret);
+
+  memcpy(ret, mem, size);
+
+  if (usable > size)
+    memset(ret + size, 0, usable - size);
+
+  return ret;
+}
+
+
+#ifndef DEBUG_ALLOCATOR
+
+/* Non-debugging mode - straightforward aliasing. */
+
+#define ck_alloc        __DFL_ck_alloc
+#define ck_realloc      __DFL_ck_realloc
+#define ck_strdup       __DFL_ck_strdup
+#define ck_memdup       __DFL_ck_memdup
+#define ck_free         free
+
+#else
+
+/* Debugging mode - include additional structures and support code. */
+
+#define ALLOC_BUCKETS 1024
+
+struct __AD_trk_obj {
+  void *ptr;
+  char *file, *func;
+  u32 line;
+};
+
+
+extern struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS];
+extern u32 __AD_trk_cnt[ALLOC_BUCKETS];
+
+#define __AD_H(_ptr) (((((u32)(long)(_ptr)) >> 16) ^ ((u32)(long)(_ptr))) % \
+                     ALLOC_BUCKETS)
+
+/* Adds a new entry to the list of allocated objects. */
+
+static inline void __AD_alloc_buf(void* ptr, const char* file, const char* func,
+                                   u32 line) {
+  u32 i, b;
+
+  if (!ptr) return;
+
+  b = __AD_H(ptr);
+
+  for (i=0;i<__AD_trk_cnt[b];i++)
+    if (!__AD_trk[b][i].ptr) {
+      __AD_trk[b][i].ptr = ptr;
+      __AD_trk[b][i].file = (char*)file;
+      __AD_trk[b][i].func = (char*)func;
+      __AD_trk[b][i].line = line;
+      return;
+    }
+
+  __AD_trk[b] = __DFL_ck_realloc(__AD_trk[b],
+    (__AD_trk_cnt[b] + 1) * sizeof(struct __AD_trk_obj));
+
+  __AD_trk[b][__AD_trk_cnt[b]].ptr = ptr;
+  __AD_trk[b][__AD_trk_cnt[b]].file = (char*)file;
+  __AD_trk[b][__AD_trk_cnt[b]].func = (char*)func;
+  __AD_trk[b][__AD_trk_cnt[b]].line = line;
+  __AD_trk_cnt[b]++;
+
+}
+
+
+/* Removes entry from the list of allocated objects. */
+
+static inline void __AD_free_buf(void* ptr, const char* file, const char* func,
+                                 u32 line) {
+  u32 i, b;
+
+  if (!ptr) return;
+
+  b = __AD_H(ptr);
+
+  for (i=0;i<__AD_trk_cnt[b];i++)
+    if (__AD_trk[b][i].ptr == ptr) {
+      __AD_trk[b][i].ptr = 0;
+      return;
+    }
+
+  WARN("ALLOC: Attempt to free non-allocated memory in %s (%s:%u)",
+       func, file, line);
+
+}
+
+
+/* Does a final report on all non-deallocated objects. */
+
+static inline void __AD_report(void) {
+  u32 i, b;
+
+  fflush(0);
+
+  for (b=0;b<ALLOC_BUCKETS;b++)
+    for (i=0;i<__AD_trk_cnt[b];i++)
+      if (__AD_trk[b][i].ptr)
+        WARN("ALLOC: Memory never freed, created in %s (%s:%u)",
+             __AD_trk[b][i].func, __AD_trk[b][i].file, __AD_trk[b][i].line);
+
+}
+
+
+/* Simple wrappers for non-debugging functions: */
+
+static inline void* __AD_ck_alloc(u32 size, const char* file, const char* func,
+                                  u32 line) {
+  void* ret = __DFL_ck_alloc(size);
+  __AD_alloc_buf(ret, file, func, line);
+  return ret;
+}
+
+static inline void* __AD_ck_realloc(void* orig, u32 size, const char* file,
+                                    const char* func, u32 line) {
+  void* ret = __DFL_ck_realloc(orig, size);
+  __AD_free_buf(orig, file, func, line);
+  __AD_alloc_buf(ret, file, func, line);
+  return ret;
+}
+
+static inline void* __AD_ck_strdup(u8* str, const char* file, const char* func,
+                                   u32 line) {
+  void* ret = __DFL_ck_strdup(str);
+  __AD_alloc_buf(ret, file, func, line);
+  return ret;
+}
+
+static inline void* __AD_ck_memdup(u8* mem, u32 size, const char* file,
+                                   const char* func, u32 line) {
+  void* ret = __DFL_ck_memdup(mem, size);
+  __AD_alloc_buf(ret, file, func, line);
+  return ret;
+}
+
+static inline void __AD_ck_free(void* ptr, const char* file,
+                                const char* func, u32 line) {
+  __AD_free_buf(ptr, file, func, line);
+  free(ptr);
+}
+
+
+/* Populates file / function / line number data to *_d wrapper calls: */
+
+#define ck_alloc(_p1) \
+  __AD_ck_alloc(_p1, __FILE__, __FUNCTION__, __LINE__)
+#define ck_realloc(_p1, _p2) \
+  __AD_ck_realloc(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+#define ck_strdup(_p1) \
+  __AD_ck_strdup(_p1, __FILE__, __FUNCTION__, __LINE__)
+#define ck_memdup(_p1, _p2) \
+  __AD_ck_memdup(_p1, _p2, __FILE__, __FUNCTION__, __LINE__)
+#define ck_free(_p1) \
+  __AD_ck_free(_p1, __FILE__, __FUNCTION__, __LINE__)
+
+#endif /* ^!DEBUG_ALLOCATOR */
+
+#endif /* ! _HAVE_ALLOC_INL_H */
diff --git a/analysis.c b/analysis.c
new file mode 100644
index 0000000..f80141b
--- /dev/null
+++ b/analysis.c
@@ -0,0 +1,2422 @@
+/*
+   skipfish - content analysis
+   ---------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#define _VIA_ANALYSIS_C
+
+#include "debug.h"
+#include "config.h"
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+#include "crawler.h"
+#include "analysis.h"
+
+u8  no_parse,            /* Disable HTML link detection */
+    warn_mixed,          /* Warn on mixed content       */
+    log_ext_urls,        /* Log all external URLs       */
+    no_forms,            /* Do not submit forms         */
+    relaxed_mime,        /* Relax about cset / mime     */
+    pedantic_cache;      /* Match HTTP/1.0 and HTTP/1.1 */
+
+/* Form autofill hints: */
+
+static u8** addl_form_name;
+static u8** addl_form_value;
+static u32  addl_form_cnt;
+
+
+/* Runs some rudimentary checks on top-level pivot HTTP responses. */
+
+void pivot_header_checks(struct http_request* req,
+                         struct http_response* res) {
+
+  u32 i;
+  u8 *par_hdr, *cur_hdr;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* Server: change. */
+
+  cur_hdr = GET_HDR((u8*)"Server", &res->hdr);
+  if (!RPAR(req)->res) par_hdr = NULL;
+  else par_hdr = GET_HDR((u8*)"Server", &RPAR(req)->res->hdr);
+
+  if (!cur_hdr) cur_hdr = (u8*)"[none]";
+  if (!par_hdr) par_hdr = (u8*)"[none]";
+
+  if (strcmp((char*)cur_hdr, (char*)par_hdr))
+    problem(PROB_SERVER_CHANGE, req, res, cur_hdr, req->pivot, 0);
+
+  /* Via: appears or disappears. */
+
+  cur_hdr = GET_HDR((u8*)"Via", &res->hdr);
+  if (!RPAR(req)->res) par_hdr = NULL;
+  else par_hdr = GET_HDR((u8*)"Via", &RPAR(req)->res->hdr);
+
+  if (cur_hdr != par_hdr)
+    problem(PROB_VIA_CHANGE, req, res, cur_hdr ? cur_hdr : (u8*)"[none]",
+            req->pivot, 0);
+
+  /* New X-* header appears. */
+
+  for (i=0;i<res->hdr.c;i++) {
+
+    if (strncasecmp((char*)res->hdr.n[i], "X-", 2)) continue;
+
+    if (!RPAR(req)->res) par_hdr = NULL;
+    else par_hdr = GET_HDR(res->hdr.n[i], &RPAR(req)->res->hdr);
+
+    if (!par_hdr)
+      problem(PROB_X_CHANGE, req, res, res->hdr.n[i], req->pivot,0);
+
+  }
+
+  /* Old X-* header disappears. */
+
+  if (RPAR(req)->res)
+    for (i=0;i<RPAR(req)->res->hdr.c;i++) {
+
+      if (strncasecmp((char*)RPAR(req)->res->hdr.n[i], "X-", 2)) continue;
+
+      cur_hdr = GET_HDR(RPAR(req)->res->hdr.n[i], &res->hdr);
+
+      if (!cur_hdr)
+        problem(PROB_X_CHANGE, req, res, RPAR(req)->res->hdr.n[i], req->pivot, 0);
+
+    }
+
+}
+
+
+/* Helper for scrape_response(). Tries to add a previously extracted link,
+   also checks for cross-site and mixed content issues and similar woes.
+   Subres is: 1 - redirect; 2 - IMG; 3 - IFRAME, EMBED, OBJECT, APPLET;
+   4 - SCRIPT, LINK REL=STYLESHEET; 0 - everything else. */
+
+static void test_add_link(u8* str, struct http_request* ref,
+                          struct http_response* res, u8 subres, u8 sure) {
+  struct http_request* n;
+
+  DEBUG_CALLBACK(ref,res);
+  DEBUG("* Alleged URL = '%s' [%u]\n", str, subres);
+
+  /* Don't add injected links. */
+
+  if (!strncasecmp((char*)str, "skipfish:", 10) ||
+      !strncasecmp((char*)str, "//skipfish.invalid/", 20) ||
+      !strncasecmp((char*)str, "http://skipfish.invalid/", 25)) return;
+
+  /* Don't add links that look like they came from JS code with fragmented HTML
+     snippets, etc. */
+
+  if (!sure && (strchr("()\"' +,^", *str) ||
+     (*str == '/' && strchr("()\"' +,^", str[1])))) return;
+
+  if ((str[0] == '\'' || str[0] == '"') && (str[1] == '+' || str[1] == ' '))
+    return;
+
+  if (!strncasecmp((char*)str, "mailto:", 7)) {
+
+    if (log_ext_urls) {
+      u8* qmark = (u8*)strchr((char*)str, '?');
+      if (qmark) *qmark = 0;
+      problem(PROB_MAIL_ADDR, ref, res, str + 7, host_pivot(ref->pivot),0);
+      if (qmark) *qmark = '?';
+    }
+
+    return;
+  }
+
+  n = ck_alloc(sizeof(struct http_request));
+
+  n->pivot = ref->pivot;
+
+  if (!parse_url(str, n, ref)) {
+
+    if (R(100) < crawl_prob) maybe_add_pivot(n, NULL, sure ? 2 : 1);
+
+    /* Link to a third-party site? */
+
+    if (!url_allowed_host(n) && !url_trusted_host(n))
+      switch (subres) {
+
+        case 0:
+          if (log_ext_urls)
+            problem(PROB_EXT_LINK, ref, res, str, host_pivot(ref->pivot), 0);
+          break;
+
+        case 1:
+          if (log_ext_urls)
+            problem(PROB_EXT_REDIR, ref, res, str, ref->pivot, 0);
+          break;
+
+        case 2:
+        case 3:
+          problem(PROB_EXT_OBJ, ref, res, str, ref->pivot, 0);
+          break;
+
+        case 4:
+          problem(PROB_EXT_SUB, ref, res, str, ref->pivot, 0);
+          break;
+
+      }
+
+    /* Mixed content? We don't care about <IMG> or redirectors
+       here, though. */
+
+    if (ref->proto == PROTO_HTTPS && n->proto == PROTO_HTTP &&
+        subres > 2 && warn_mixed)
+      problem((subres == 4) ? PROB_MIXED_SUB : PROB_MIXED_OBJ,
+              ref, res, str, ref->pivot, 0);
+
+  } else if (!ref->proto) {
+
+    /* Parser didn't recognize the protocol. If it's a
+       hierarchical URL (foo://), log it. */
+
+    u8* x = str;
+
+    while (isalnum(*x)) x++;
+
+    if (str != x && *x == ':' && x[1] == '/')
+      problem(PROB_UNKNOWN_PROTO, ref, res, str, ref->pivot, 0);
+
+  }
+
+  destroy_request(n);
+}
+
+
+/* Another scrape_response() helper - decodes HTML escaping,
+   maybe also JS escaping, from URLs. Returns a dynamically
+   allocated copy. */
+
+static u8* html_decode_param(u8* url, u8 also_js) {
+  u32 len = strlen((char*)url);
+  u8* ret = ck_alloc(len + 1);
+  u32 i, pos = 0;
+
+  /* If directed to do so, decode \x, \u, and \char sequences
+     first. */
+
+  if (also_js) {
+
+    for (i=0;i<len;i++) {
+
+      if (url[i] == '\\') {
+        u32 act_val = 0;
+
+        if (url[i+1] == 'x') {
+          sscanf((char*)url + i + 2, "%2x", &act_val);
+          i += 3;
+        } else if (url[i+1] == 'u') {
+          sscanf((char*)url + i + 2, "%4x", &act_val);
+          i += 5;
+        } else {
+          act_val = url[i+1];
+          i += 1;
+        }
+
+        if (!act_val || act_val > 0xff) act_val = '?';
+
+        ret[pos++] = act_val;
+
+      } else ret[pos++] = url[i];
+
+    }
+
+    ret[pos] = 0;
+    url = ret;
+    len = pos;
+    pos = 0;
+    ret = ck_alloc(len + 1);
+
+  }
+
+  /* Next, do old-school HTML decoding. There are many other named
+     entities, of course, but the odds of them appearing in URLs
+     without %-encoding are negligible. */
+
+  for (i=0;i<len;i++) {
+
+    if (url[i] == '&') {
+
+      if (!strncasecmp((char*)url + i + 1, "amp;", 4)) {
+        ret[pos++] = '&';
+        i += 4;
+        continue;
+      } else if (!strncasecmp((char*)url + i + 1, "quot;", 5)) {
+        ret[pos++] = '\'';
+        i += 5;
+        continue;
+      } else if (!strncasecmp((char*)url + i + 1, "lt;", 3)) {
+        ret[pos++] = '<';
+        i += 3;
+        continue;
+      } else if (!strncasecmp((char*)url + i + 1, "gt;", 3)) {
+        ret[pos++] = '>';
+        i += 3;
+        continue;
+      } else if (url[i+1] == '#') {
+        u32 act_val = 0;
+        u8 semicol = 0;
+
+        if (url[i+2] == 'x')
+          sscanf((char*)url + i + 3, "%x%c", &act_val, &semicol);
+        else sscanf((char*)url + i + 2, "%u%c", &act_val, &semicol);
+
+        if (semicol == ';') {
+          if (!act_val || act_val > 0xff) act_val = '?';
+          ret[pos++] = act_val;
+          i += strcspn((char*)url + i, ";");
+          continue;
+        }
+
+        /* Fall through and output the sequence as-is. */
+
+      }
+
+    } else if (url[i] == '\r' || url[i] == '\n') continue;
+
+    ret[pos++] = url[i];
+
+  }
+
+  ret[pos] = 0;
+  if (also_js) ck_free(url);
+
+  return ret;
+
+}
+
+
+/* Macro to test for tag names */
+
+#define ISTAG(_val, _tag) \
+  (!strncasecmp((char*)(_val), _tag, strlen((char*)_tag)) && \
+  isspace(_val[strlen((char*)_tag)]))
+
+/* Macro to find and move past parameter name (saves result in
+   _store, NULL if not found). Buffer needs to be NUL-terminated
+   at nearest >. */
+
+#define FIND_AND_MOVE(_store, _val, _param) { \
+    (_store) = inl_strcasestr((u8*)_val, (u8*)_param); \
+    if (_store) { \
+      if (!isspace((_store)[-1])) (_store) = NULL; \
+      else (_store) += strlen((char*)_param); \
+    } \
+ } while (0)
+
+/* Macro to extract parameter value, handling quotes. */
+
+#define EXTRACT_ALLOC_VAL(_store, _val) do { \
+    u32 _val_len; \
+    if (*(_val) == '\'') _val_len = strcspn((char*)++(_val), "'"); else \
+    if (*(_val) == '"') _val_len = strcspn((char*)++(_val), "\""); else \
+      _val_len = strcspn((char*)(_val), "> \t\r\n"); \
+    (_store) = ck_memdup((_val), (_val_len) + 1); \
+    (_store)[(_val_len)] = 0; \
+  } while (0)
+
+
+/* Adds a new item to the form hint system. */
+
+void add_form_hint(u8* name, u8* value) {
+  addl_form_name = ck_realloc(addl_form_name,
+                             (addl_form_cnt + 1) * sizeof(u8*));
+
+  addl_form_value = ck_realloc(addl_form_value,
+                              (addl_form_cnt + 1) * sizeof(u8*));
+
+  addl_form_name[addl_form_cnt] = name;
+  addl_form_value[addl_form_cnt] = value;
+  addl_form_cnt++;
+
+}
+
+
+/* Helper for collect_form_data() - comes up with a fitting value for
+   a checkbox. Returns a static buffer. */
+
+static u8* make_up_form_value(u8* name, struct http_request* req,
+                              struct http_response* res) {
+  u32 i;
+
+  for (i=0;i<addl_form_cnt;i++)
+    if (inl_strcasestr(name, addl_form_name[i]))
+      return addl_form_value[i];
+
+  i = 0;
+
+  while (form_suggestion[i][0]) {
+    if (inl_strcasestr(name, (u8*)form_suggestion[i][0]))
+      return (u8*)form_suggestion[i][1];
+    i++;
+  }
+
+  /* Let's hint we have no clue what to do. */
+
+  problem(PROB_UNKNOWN_FIELD, req, res, name, host_pivot(req->pivot), 0);
+
+  return (u8*)form_suggestion[i][1];
+
+}
+
+
+/* Helper for collect_form_data() - checks for a probable anti-XSRF token
+   values. */
+
+static u8 maybe_xsrf(u8* token) {
+  u8* tmp;
+  u32 digit_cnt = 0, upper_cnt = 0, slash_cnt = 0;;
+  static u8 tm_prefix[8];
+
+  if (!tm_prefix[0])
+    sprintf((char*)tm_prefix, "%lu", time(0) / 100000);
+
+  /* Unix time is not a valid token. */
+
+  if (!strncasecmp((char*)token, (char*)tm_prefix, strlen((char*)tm_prefix)))
+    return 0;
+
+  tmp = token;
+  while (*tmp && (isdigit(*tmp) || strchr("abcdef", tolower(*tmp)))) {
+    if (isdigit(*tmp)) digit_cnt++;
+    tmp++;
+  }
+
+  /* Looks like base 10 or 16... */
+
+  if (!*tmp) {
+    u32 len = tmp - token;
+    if (len >= XSRF_B16_MIN && len <= XSRF_B16_MAX && digit_cnt >= XSRF_B16_NUM)
+      return 1;
+    return 0;
+  }
+
+  digit_cnt = 0;
+  tmp = token;
+  while (*tmp && (isalnum(*tmp) || strchr("=+/", *tmp))) {
+    if (isdigit(*tmp)) digit_cnt++;
+    if (isupper(*tmp)) upper_cnt++;
+    if (*tmp == '/') slash_cnt++;
+    tmp++;
+  }
+
+  /* Looks like base 32 or 64... */
+
+  if (!*tmp) {
+    u32 len = tmp - token;
+    if (len >= XSRF_B64_MIN && len <= XSRF_B64_MAX && ((digit_cnt >=
+        XSRF_B64_NUM && upper_cnt >= XSRF_B64_CASE) || digit_cnt >=
+        XSRF_B64_NUM2) && slash_cnt <= XSRF_B64_SLASH) return 1;
+    return 0;
+  }
+
+  /* Looks like... not a numerical token at all. */
+
+  return 0;
+
+}
+
+
+/* Another helper for scrape_response(): examines all <input> tags
+   up until </form>, then adds them as parameters to current request. */
+
+static void collect_form_data(struct http_request* req,
+                              struct http_request* orig_req,
+                              struct http_response* orig_res,
+                              u8* cur_str, u8 is_post) {
+
+  u8  has_xsrf = 0, pass_form = 0;
+  u32 tag_cnt = 0;
+
+  DEBUG("* collect_form_data() entered\n");
+
+  do {
+
+    u8* tag_end;
+
+    if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) {
+
+      cur_str++;
+      *tag_end = 0;
+
+      if (!strncasecmp((char*)cur_str, "/form", 5)) {
+        *tag_end = '>';
+        goto final_checks;
+      }
+
+      if (ISTAG(cur_str, "input") || ISTAG(cur_str, "textarea") ||
+          ISTAG(cur_str, "select")) {
+
+        u8 *tag_name, *tag_value, *tag_type, *clean_name = NULL,
+           *clean_value = NULL;
+
+        FIND_AND_MOVE(tag_name, cur_str, "name=");
+        FIND_AND_MOVE(tag_value, cur_str, "value=");
+        FIND_AND_MOVE(tag_type, cur_str, "type=");
+
+        if (!tag_name) goto next_tag;
+
+        EXTRACT_ALLOC_VAL(tag_name, tag_name);
+        clean_name = html_decode_param(tag_name, 0);
+        ck_free(tag_name);
+        tag_name = 0;
+
+        if (tag_value) {
+          EXTRACT_ALLOC_VAL(tag_value, tag_value);
+          clean_value = html_decode_param(tag_value, 0);
+          ck_free(tag_value);
+          tag_value = 0;
+        }
+
+        if (tag_type)
+          EXTRACT_ALLOC_VAL(tag_type, tag_type);
+        else tag_type = ck_strdup((u8*)"text");
+
+        tag_cnt++;
+
+        if (!strcasecmp((char*)tag_type, "file")) {
+
+          if (!is_post) {
+            ck_free(req->method);
+            req->method = ck_strdup((u8*)"POST");
+            is_post = 1;
+          }
+
+          set_value(PARAM_POST_F, clean_name, clean_value ?
+                    clean_value : (u8*)"", 0, &req->par);
+
+        } else if (!strcasecmp((char*)tag_type, "reset")) {
+
+          /* Do nothing - do not store. */
+          tag_cnt--;
+
+        } else if (!strcasecmp((char*)tag_type, "button") ||
+                   !strcasecmp((char*)tag_type, "submit")) {
+
+          set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name,
+                    clean_value ? clean_value : (u8*)"", 0, &req->par);
+
+        } else if (!strcasecmp((char*)tag_type, "checkbox")) {
+
+          /* Turn checkboxes on. */
+
+          set_value(is_post ? PARAM_POST : PARAM_QUERY, clean_name,
+                    (u8*)"on", 0, &req->par);
+
+        } else {
+
+          u8* use_value = clean_value;
+
+          /* Don't second-guess hidden fields. */
+
+          if (strcasecmp((char*)tag_type, "hidden") &&
+              (!use_value || !use_value[0])) {
+            use_value = make_up_form_value(clean_name, orig_req, orig_res);
+          } else {
+            if (!use_value) use_value = (u8*)"";
+          }
+
+          /* Radio buttons are rolled back into a single parameter
+             because we always replace offset 0 for given clean_name. */
+
+          set_value(is_post ? PARAM_POST : PARAM_QUERY,
+                    clean_name, use_value, 0, &req->par);
+
+          if (!strcasecmp((char*)tag_type, "hidden") &&
+              maybe_xsrf(use_value)) has_xsrf = 1;
+
+        }
+
+        if (inl_strcasestr(tag_name, (u8*) "passw")) pass_form = 1;
+
+        ck_free(tag_name);
+        ck_free(tag_type);
+        ck_free(tag_value);
+        ck_free(clean_name);
+        ck_free(clean_value);
+
+      }
+
+next_tag:
+
+      *tag_end = '>';
+
+    } else tag_end = cur_str;
+
+    /* Skip to next tag. */
+
+    cur_str = (u8*)strchr((char*)tag_end + 1, '<');
+
+  } while (cur_str);
+
+final_checks:
+
+  if (pass_form) {
+    problem(PROB_PASS_FORM, req, orig_res, NULL, req->pivot, 0);
+  } else {
+    if (tag_cnt && !has_xsrf)
+      problem(PROB_VULN_FORM, req, orig_res, NULL, req->pivot, 0);
+    else
+      problem(PROB_FORM, req, orig_res, NULL, req->pivot, 0);
+  }
+
+}
+
+
+/* Helper for scrape_response() and content_checks: is the
+   file mostly ASCII? */
+
+static u8 is_mostly_ascii(struct http_response* res) {
+  u32 i, total, printable = 0;
+
+  if (res->doc_type) return (res->doc_type == 2);
+
+  total = (res->pay_len > 128) ? 128 : res->pay_len;
+
+  if (!total) { res->doc_type = 2; return 1; }
+
+  for (i=0;i<total;i++)
+    if ((res->payload[i] >= 0x20 && res->payload[i] <= 0x7f)
+        || (res->payload[i] && strchr("\r\n", res->payload[i])))
+      printable++;
+
+  if (printable * 100 / total < 90) {
+    DEBUG("* looks like binary data (print = %u, total = %u)\n",
+          printable, total);
+    res->doc_type = 1;
+    return 1;
+  }
+
+  DEBUG("* looks like text file (print = %u, total = %u)\n",
+        printable, total);
+
+  res->doc_type = 2;
+  return 1;
+
+}
+
+/* Analyzes response headers (Location, etc), body to extract new links,
+   keyword guesses. This code is designed to be simple and fast, but it
+   does not even try to understand the intricacies of HTML or whatever
+   the response might be wrapped in. */
+
+void scrape_response(struct http_request* req, struct http_response* res) {
+
+  struct http_request *base = NULL;
+  u8* cur_str;
+  u32 i;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (no_parse || res->scraped) return;
+
+  res->scraped = 1;
+
+  /* Handle Location, Refresh headers first. */
+
+  if ((cur_str = GET_HDR((u8*)"Location", &res->hdr)))
+    test_add_link(cur_str, req, res, 1, 1);
+
+  if ((cur_str = GET_HDR((u8*)"Refresh", &res->hdr)) &&
+      (cur_str = (u8*)strchr((char*)cur_str, '=')))
+    test_add_link(cur_str + 1, req, res, 1, 1);
+
+  if (!res->payload || !is_mostly_ascii(res)) return;
+
+  cur_str = res->payload;
+
+  /* PASS 1: Do a simplified check to what looks like proper,
+     known HTML parameters bearing URLs. Note that payload is
+     conveniently NUL-terminated. */
+
+  do {
+
+    u8 *tag_end;
+
+    if (*cur_str == '<' && (tag_end = (u8*)strchr((char*)cur_str + 1, '>'))) {
+
+      u32 link_type = 0;
+      u8  set_base = 0, parse_form = 0;
+      u8  *dirty_url = NULL, *clean_url = NULL, *meta_url = NULL;
+
+      cur_str++;
+      *tag_end = 0;
+
+      /* Several tags we need to handle specially, either because they
+         denote a particularly interesting content type (marked in
+         link_type, see test_add_link()), or because they use a
+         non-standard parameter for URL data. */
+
+      if (ISTAG(cur_str, "meta")) {
+
+        link_type = 1;
+        FIND_AND_MOVE(dirty_url, cur_str, "content=");
+
+        if (dirty_url) {
+          EXTRACT_ALLOC_VAL(meta_url, dirty_url);
+          dirty_url = inl_strcasestr(meta_url, (u8*)"URL=");
+          if (dirty_url) dirty_url += 4;
+        }
+
+      } else if (ISTAG(cur_str, "img")) {
+
+        link_type = 2;
+        FIND_AND_MOVE(dirty_url, cur_str, "src=");
+
+      } else if (ISTAG(cur_str, "object") || ISTAG(cur_str, "embed") ||
+               ISTAG(cur_str, "applet") || ISTAG(cur_str, "iframe")) {
+
+        link_type = 3;
+        FIND_AND_MOVE(dirty_url, cur_str, "src=");
+        if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "codebase=");
+
+      } else if (ISTAG(cur_str, "param") && inl_strcasestr(cur_str,
+                 (u8*)"movie")) {
+
+        link_type = 3;
+        FIND_AND_MOVE(dirty_url, cur_str, "value=");
+
+      } else if (ISTAG(cur_str, "script")) {
+
+        link_type = 4;
+        FIND_AND_MOVE(dirty_url, cur_str, "src=");
+
+      } else if (ISTAG(cur_str, "link") && inl_strcasestr(cur_str,
+                 (u8*)"stylesheet")) {
+
+        link_type = 4;
+        FIND_AND_MOVE(dirty_url, cur_str, "href=");
+
+      } else if (ISTAG(cur_str, "base")) {
+
+        set_base = 1;
+        FIND_AND_MOVE(dirty_url, cur_str, "href=");
+
+      } else if (ISTAG(cur_str, "form")) {
+
+        u8* method;
+        parse_form = 1;
+        FIND_AND_MOVE(dirty_url, cur_str, "action=");
+
+        /* See if we need to POST this form or not. */
+
+        FIND_AND_MOVE(method, cur_str, "method=");
+
+        if (method && *method) {
+          if (strchr("\"'", *method)) method++;
+          if (tolower(method[0]) == 'p') parse_form = 2;
+        }
+
+      } else {
+
+        /* All other tags - other <link> types, <a>, <bgsound> -
+           are handled in a generic way. */
+
+        FIND_AND_MOVE(dirty_url, cur_str, "href=");
+        if (!dirty_url) FIND_AND_MOVE(dirty_url, cur_str, "src=");
+
+      }
+
+      /* If we found no URL to speak of, we're done. */
+
+      if (!dirty_url) {
+        ck_free(meta_url);
+        goto next_tag;
+      }
+
+      /* De-quotify and decode the value. */
+
+      EXTRACT_ALLOC_VAL(dirty_url, dirty_url);
+      clean_url = html_decode_param(dirty_url, 0);
+      ck_free(dirty_url);
+      ck_free(meta_url);
+
+      if (!*clean_url) goto next_tag;
+
+      test_add_link(clean_url, base ? base : req, res, link_type, 1);
+
+      /* If we are dealing with a <base> tag, we need to create
+         a new dummy request to use as a referrer. */
+
+      if (set_base) {
+
+        struct http_request* n = ck_alloc(sizeof(struct http_request));
+        n->pivot = req->pivot;
+        if (!parse_url(clean_url, n, base ? base : req)) base = n;
+
+      } else if (parse_form) {
+
+        /* <form> handling... */
+
+        struct http_request* n = ck_alloc(sizeof(struct http_request));
+        n->pivot = req->pivot;
+
+        if (parse_form == 2) {
+          ck_free(n->method);
+          n->method = ck_strdup((u8*)"POST");
+        }
+
+        /* Don't collect form fields, etc, if target is not within the
+           scope anyway. */
+
+        DEBUG("* Found form: target %s method %s\n", clean_url, n->method);
+
+        if (!parse_url(clean_url, n, base ? base : req) && url_allowed(n) &&
+            R(100) < crawl_prob && !no_forms) {
+          collect_form_data(n, req, res, tag_end + 1, (parse_form == 2));
+          maybe_add_pivot(n, NULL, 2);
+        }
+
+        destroy_request(n);
+
+      }
+
+next_tag:
+
+      *tag_end = '>';
+
+      if (clean_url) ck_free(clean_url);
+
+    } else tag_end = cur_str;
+
+    /* Skip to next tag. */
+
+    cur_str = (u8*)strchr((char*)tag_end + 1, '<');
+
+  } while (cur_str);
+
+  cur_str = res->payload;
+
+  /* PASS 2: Extract links from non-HTML body, JS, etc; add keywords. */
+
+  do {
+
+    u32 clean_len, alpha_cnt = 0, lower_cnt = 0, lead = 0, seg_len;
+    u8  *ext, *token, *clean_url, *tmp, *pos_at;
+    u8  last = 0, saved;
+
+    /* Skip leading whitespaces, terminators. */
+
+    seg_len = strspn((char*)cur_str, " \t\r\n<>\"'");
+    cur_str += seg_len;
+
+    /* If there's a = character preceeded only by alnums or underscores,
+       skip this chunk (to handle something=http://www.example.com/ neatly) */
+
+    tmp = cur_str;
+    while (*tmp && (isalnum(*tmp) || *tmp == '_')) tmp++;
+    if (*tmp == '=') cur_str = tmp + 1;
+
+    if (!*cur_str) break;
+    seg_len = strcspn((char*)cur_str + 1, " \t\r\n<>\"'") + 1;
+
+    /* Extract the segment, decoding JS and HTML on the go. */
+
+    saved            = cur_str[seg_len];
+    cur_str[seg_len] = 0;
+    clean_url        = html_decode_param(cur_str, 1);
+    cur_str[seg_len] = saved;
+
+    tmp = clean_url;
+
+    /* We want the entire extracted segment to consist only of nice
+       characters we would expect in a URL. If not, panic. */
+
+    while (*tmp) {
+      if (!isalnum(*tmp) && !isspace(*tmp) &&
+          !strchr("_-.:@/?&=#%;$!+~()[]{}\\|^*", *tmp)) goto url_done;
+      tmp++;
+    }
+
+    clean_len = tmp - clean_url;
+
+    /* Strip trailing characters that are unlikely to appear in valid URLs
+       anyway, and could be a part of some message. */
+
+    while (clean_len &&
+           strchr(".,:?!-$&", clean_url[clean_len-1])) clean_len--;
+
+    clean_url[clean_len] = 0;
+
+    /* URL CHECK 1: Things that start with ./ or ../ are obviously URLs.
+       We do not make assumptins about syntax such as /foo/, though, as
+       it could very well be a regex in a JS block. */
+
+    if (!strncmp((char*)clean_url, "./", 2) || !strncmp((char*)clean_url,
+        "../", 3)) {
+add_link:
+      test_add_link(clean_url, base ? base : req, res, 0, 0);
+      goto url_done;
+    }
+
+    /* URL CHECK 2: Things that start with <alnum>://<str> are quite
+       clearly URLs. */
+
+    while (clean_url[lead] && (isalnum(clean_url[lead]))) lead++;
+
+    if (lead && !strncmp((char*)clean_url + lead, "://", 3) &&
+        clean_url[lead + 3]) goto add_link;
+
+    /* URL CHECK 3: If the result ends with <str>.<known_ext>,
+       and contains a slash anywhere, assume URL (without that
+       slash check, we would get duped by 'domain.com'. */
+
+    if (strchr((char*)clean_url, '/')) {
+
+      i = 0;
+
+      while ((ext = wordlist_get_extension(i++))) {
+        u32 ext_len = strlen((char*)ext);
+
+        if (clean_len > ext_len + 2 &&
+            !strncasecmp((char*)clean_url + clean_len - ext_len,
+                         (char*)ext, ext_len) &&
+            clean_url[clean_len - ext_len - 1] == '.') goto add_link;
+
+      }
+
+    }
+
+    if (!(pos_at = (u8*)strchr((char*)clean_url, '@'))) {
+
+      /* URL CHECK 4: ?<str>=<str> syntax is strongly indicative of
+         an URL (only if not e-mail). */
+
+      u8 *pos_qmark = (u8*)strchr((char*)clean_url, '?'),
+         *pos_eq    = (u8*)strchr((char*)clean_url, '='),
+         *pos_amp   = (u8*)strchr((char*)clean_url, '&');
+
+      if (pos_qmark && pos_eq && pos_qmark + 1 < pos_eq && 
+          pos_eq[1] && (!pos_amp || pos_amp > pos_eq) && 
+          pos_eq[1] != '=' && !strchr((char*)clean_url, '(') &&
+          !strchr((char*)clean_url, '[') &&
+          (u8*)strchr((char*)clean_url, ':') < pos_eq)
+        goto add_link;
+
+    } else if (log_ext_urls) {
+
+      /* EMAIL CHECK: If the string uses a limited set of characters,
+         starts with alpha, ahs at least one period after @, and both
+         @ and the period are immediately followed by alpha - assume
+         e-mail. */
+
+      u8 *pos_dot,
+         *pos_qmark = (u8*)strchr((char*)clean_url, '?');
+
+      if (pos_qmark && pos_qmark > pos_at) *pos_qmark = 0;
+
+      lead = 0;
+
+      while (clean_url[lead] && (isalnum(clean_url[lead]) ||
+             strchr("._-+@", clean_url[lead]))) lead++;
+
+      pos_dot = (u8*)strchr((char*)pos_at + 1, '.');
+
+      if (!clean_url[lead] && pos_at && pos_dot && isalpha(clean_url[0]) &&
+          isalpha(pos_at[1]) && isalpha(pos_dot[1])) {
+        problem(PROB_MAIL_ADDR, req, res, clean_url, host_pivot(req->pivot), 0);
+        goto url_done;
+      }
+
+    }
+
+    /* LAST CHANCE: Try to detect base64; if the segment does not look like
+       base64, add each segment to try_list. */
+
+    tmp = clean_url;
+
+    while (*tmp) {
+      if (isalpha(*tmp)) {
+        alpha_cnt++;
+        if (islower(*tmp)) lower_cnt++;
+      }
+      tmp++;
+    }
+
+    if (alpha_cnt > 20 && (lower_cnt * 100 / alpha_cnt) > 35 &&
+        (lower_cnt * 100 / alpha_cnt) < 65) goto url_done;
+
+    token = clean_url;
+
+    do {
+      while (*token && !isalnum(*token)) token++;
+      tmp = token;
+      while (*tmp && isalnum(*tmp)) tmp++;
+      if (!*tmp) last = 1;
+      *tmp = 0;
+      if (R(100) < GUESS_PROB) wordlist_add_guess(token);
+      token = tmp + 1;
+    } while (!last);
+
+url_done:
+
+    ck_free(clean_url);
+
+    cur_str += seg_len;
+
+  } while (*cur_str);
+
+  if (base) destroy_request(base);
+
+  /* Phew! */
+
+}
+
+
+/* Returns 1 if document looks like standalone CSS. */
+
+static u8 is_css(struct http_response* res) {
+  u8* text = res->payload;
+  u8  first = 0, last = 0;
+
+  if (res->css_type) return (res->css_type == 2);
+  if (!text || !is_mostly_ascii(res)) return 0;
+
+  do {
+
+    /* Skip whitespaces... */
+
+    while (isspace(*text)) text++;
+
+    /* Skip HTML, CSS comments. */
+
+    if (!strncmp((char*)text, "<!--", 4)) {
+      text += 4;
+      continue;
+    }
+
+    if (*text == '/') {
+      u8 *end;
+
+      if (text[1] == '/') {
+        end = text + strcspn((char*)text, "\r\n");
+      } else if (text[1] == '*') {
+        end = (u8*)strstr((char*)text + 2, "*/");
+        if (end) end += 2;
+      } else {
+        res->css_type = 1;
+        return 0;
+      }
+
+      text = end;
+      continue;
+
+    }
+
+    /* @import or @charset is a clear indicator of CSS. */
+
+    if (*text == '@' && (!strncasecmp((char*)text + 1, "import", 6) ||
+        !strncasecmp((char*)text + 1, "charset", 7))) {
+      res->css_type = 2;
+      return 1;
+    }
+
+    /* { preceeded with at least one character conforming to the charset
+       permitted for CSS selectors, and nothing else, is proof enough. Note
+       that we need to handle all the CSS features, e.g.:
+
+       foo, .bar { ... }
+       * { ... }
+       foo#bar { ... }
+       foo[bar~="baz"] { ... }
+       foo > bar { ... }
+
+       Joy. */
+
+    if (*text == '{') {
+
+      /* Last non-whitespace before { must conform to a smaller subset that
+         does not include =, etc. */
+
+      if (!first || !last ||
+          (!isalnum(last) && !strchr("-_]*", last))) {
+        res->css_type = 1;
+        return 0;
+      } else {
+        res->css_type = 2;
+        return 1;
+      }
+
+    }
+
+    if (first) {
+
+      /* Subsequent non-whitespaces can enjoy the whole range of funny
+         characters. */
+
+      if (!isalnum(*text) && !strchr(":,.#_-*[]~=\"'>", *text)) {
+        res->css_type = 1;
+        return 0;
+      }
+
+    } else {
+
+      /* First non-whitespace must conform to a narrow set. */
+
+      if (!isalnum(*text) && !strchr(".#_-*", *text)) {
+        res->css_type = 1;
+        return 0;
+      }
+
+      first = 1;
+
+    }
+
+    last = *(text++);
+
+  } while (text && *text);
+
+  /* Reached end without hitting { or @? Not CSS then. */
+
+  res->css_type = 1;
+  return 0;
+
+}
+
+
+/* Returns 1 if document looks like JS / JSON. Note that this makes sense
+   only after we ruled CSS via is_css() call. */
+
+static u8 is_javascript(struct http_response* res) {
+  u8* text = res->payload;
+  u8  first = 0, i = 0;
+
+  if (res->js_type) return (res->js_type == 2);
+  if (!text || !is_mostly_ascii(res) || is_css(res)) return 0;
+
+  do {
+
+    /* Skip HTML, JS comments. Special case for MOTW. */
+
+    if (!strncmp((char*)text, "<!--", 4)) {
+
+      text += 4;
+
+      if (!strncmp((char*)text, " saved from url=", 16)) {
+        res->js_type = 1;
+        return 0;
+      }
+
+      continue;
+    }
+
+    if (*text == '/') {
+      u8 *end;
+
+      if (text[1] == '/') {
+        end = text + strcspn((char*)text, "\r\n");
+      } else if (text[1] == '*') {
+        end = (u8*)strstr((char*)text + 2, "*/");
+        if (end) end += 2;
+      } else {
+        res->js_type = 1;
+        return 0;
+      }
+
+      text = end;
+      continue;
+
+    }
+
+    /* Known XSSI-busting prefixes imply JavaScript. */
+
+    if (!first)
+      while (json_safe[i]) {
+        if (!strncasecmp((char*)text, json_safe[i], strlen(json_safe[i]))) {
+          res->js_type   = 2;
+          res->json_safe = 1;
+          return 1;
+        }
+        i++;
+      }
+
+    /* Common syntax element that seem convincingly close to JS. */
+
+    if (strchr("({[\"'", *text) || (first && strchr("=;", *text))) {
+      res->js_type = 2;
+      return 1;
+    }
+
+    /* Ignore legal identifiers. */
+
+    if (!isalnum(*text) && !strchr(" \t\r\n_.", *text)) {
+      res->js_type = 1;
+      return 0;
+    }
+
+    first = 1;
+    text++;
+
+  } while (*text);
+
+  res->js_type = 1;
+  return 0;
+
+}
+
+
+
+/* Checks for XSS, bad coding practices in JavaScript. */
+
+static void check_js_xss(struct http_request* req, struct http_response* res,
+                         u8* body) {
+
+  u8* text = body;
+  u8  in_quot = 0, prev_space = 1;
+  u32 tag_id, scan_id;
+  u8* last_word = body;
+
+  if (!text) return;
+
+  do {
+
+    /* Skip comments. */
+
+    if (!in_quot && *text == '/') {
+      u8 *end;
+
+      if (text[1] == '/') {
+        end = text + strcspn((char*)text, "\r\n");
+      } else if (text[1] == '*') {
+        end = (u8*)strstr((char*)text + 2, "*/");
+        if (end) end += 2;
+      } else return;
+
+      text = end;
+      continue;
+
+    } else
+
+    if (*text == '\\') { text += 2; continue; } else
+
+    if (!in_quot && (*text == '\'' || *text == '"')) {
+
+      in_quot = *text;
+
+      /* If prev word is write, innerHTML, href, or open,
+         and current string starts with //skipfishy thingees,
+         complain. */
+
+      if ((!strncmp((char*)last_word, "innerHTML", 9) ||
+          !strncmp((char*)last_word, "open", 4) ||
+          !strncmp((char*)last_word, "url", 3) ||
+          !strncmp((char*)last_word, "href", 4) ||
+          !strncmp((char*)last_word, "write", 5)) &&
+          (!strncasecmp((char*)text + 1,"//skipfish.invalid/", 20) ||
+          !strncasecmp((char*)text + 1,"http://skipfish.invalid/", 25) ||
+          !strncasecmp((char*)text + 1,"skipfish:", 10)))
+        problem(PROB_URL_XSS, req, res,
+          (u8*)"injected URL in JS/CSS code", req->pivot, 0);
+
+    } else if (in_quot && *text == in_quot) in_quot = 0;
+
+    else if (!in_quot && !strncasecmp((char*)text, "sfi", 3) &&
+        sscanf((char*)text, "sfi%06uv%06u", &tag_id, &scan_id) == 2) {
+      struct http_request* orig = get_xss_request(tag_id, scan_id);
+
+      if (orig)
+        problem(PROB_BODY_XSS, orig, res, (u8*)
+                "injected syntax in JS/CSS code", req->pivot, 0);
+      else
+        problem(PROB_BODY_XSS, req, res, (u8*)
+                "injected syntax in JS/CSS code (from previous scans)",
+                req->pivot, 0);
+
+    } else if (isspace(*text) || *text == '.') prev_space = 1;
+
+    else if (isalnum(*text) && prev_space) {
+      last_word  = text;
+      prev_space = 0;
+    }
+
+    text++;
+
+  } while (text && *text);
+
+}
+
+
+static void detect_mime(struct http_request*, struct http_response*);
+static void check_for_stuff(struct http_request*, struct http_response*);
+
+
+/* Extracts date from HTTP headers. */
+
+static u64 get_date(u8* str) {
+#ifdef LOG_STDERR
+  u8* orig = str;
+#endif /* LOG_STDERR */
+  u8 got_dow = 0;
+  s64 month = -1, day = -1, year = -1, hr = -1, min = -1, sec = -1;
+ 
+next_elem:
+
+  if (month != -1 && day != -1 && year != -1 && hr != -1) {
+    u64 ret = (sec + (min * 100) + (hr * 10000) +
+              (day * 1000000LL) + (month * 100000000LL) +
+              (year * 10000000000LL));
+    DEBUG("* get_date() '%s' => %llu\n", orig, (long long)ret);
+    return ret;
+  }
+
+  if (!*str) {
+    DEBUG("* get_date() '%s' => FAIL (1)\n", orig);
+    return 0;
+  }
+
+  /* Skip spaces, commas, dashes. */
+  while (*str && strchr(" ,-", *str)) str++;
+
+  /* Skip day of week. */
+
+  if (!got_dow) {
+    got_dow = 1;
+    while (*str && isalpha(*str)) str++;
+    goto next_elem;
+  }
+
+  /* Check if next element if a month. */
+
+  if (month == -1 && isalpha(*str)) {
+
+    month = 0;
+
+#define SEL(_a,_b) ((((u32)(_a)) << 8) | (_b))
+
+    switch (SEL(str[1], str[2])) {
+      case SEL('e', 'c'): month++;
+      case SEL('o', 'v'): month++;
+      case SEL('c', 't'): month++;
+      case SEL('e', 'p'): month++;
+      case SEL('u', 'g'): month++;
+      case SEL('u', 'l'): month++;
+      case SEL('u', 'n'): month++;
+      case SEL('a', 'y'): month++;
+      case SEL('p', 'r'): month++;
+      case SEL('a', 'r'): month++;
+      case SEL('e', 'b'): month++;
+      case SEL('a', 'n'): month++;
+    }
+
+    while (*str && isalpha(*str)) str++;
+    goto next_elem;
+
+  }
+
+  /* Something that starts with a digit should be a day. */
+
+  if (day == -1 && isdigit(*str)) {
+    day = atoi((char*)str);
+    while (*str && isdigit(*str)) str++;
+    goto next_elem;
+  }
+
+  /* If we already have a day, the next digit-based thing
+     might be time or year. Time would have nn:... */
+
+  if (hr == -1 && isdigit(*str) && str[2] == ':') {
+    sscanf((char*)str, "%02llu:%02llu:%02llu", &hr, &min,
+                                               &sec);
+    while (*str && (isdigit(*str) || *str == ':')) str++;
+    goto next_elem;
+  }
+
+  /* And year wouldn't. */
+
+  if (year == -1 && isdigit(*str)) {
+    year = atoi((char*)str);
+    if (year < 1000) year += 1900; /* 94 -> 1994, 104 -> 2004, 04 -> 1904 */
+    if (year < 1970) year += 100;  /* 1994 -> 1994, 2004 -> 2004, 1904 -> 2004 */
+    while (*str && isdigit(*str)) str++;
+    goto next_elem;
+  }
+
+  DEBUG("* get_date() '%s' => FAIL (2)\n", orig);
+  return 0;
+
+}
+
+
+/* Analyzes response headers and body to detect stored XSS, redirection,
+   401, 500 codes, exception messages, source code, caching issues, etc. */
+
+void content_checks(struct http_request* req, struct http_response* res) {
+  u8* tmp;
+  u32 tag_id, scan_id;
+  u8  high_risk = 0;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* CHECK 1: Caching header logic. */
+
+  if (req->proto == PROTO_HTTP) {
+
+    u8 *exp = GET_HDR((u8*)"Expires", &res->hdr),
+       *dat = GET_HDR((u8*)"Date", &res->hdr),
+       *prg = GET_HDR((u8*)"Pragma", &res->hdr),
+       *cc  = GET_HDR((u8*)"Cache-Control", &res->hdr),
+       cacheable = 0;
+
+    u8 h10c = 0, h11c = 0;
+
+    /* Check implicit cacheability. */
+
+    if ((!req->method || !strcmp((char*)req->method, "GET")))
+      cacheable = 1;
+
+    /* Determine HTTP/1.0 caching intent. Handle Expires: -1, etc, gracefully.
+       Note that 'Expires' without 'Date' may cause problems with Opera,
+       so we complain about this in pedantic mode. */
+
+    if (exp) {
+      if (!isalpha(exp[0])) {
+        h10c = 1;
+      } else if (dat) {
+        if (get_date(exp) <= get_date(dat)) h10c = 1; else h10c = 2;
+      } else if (cacheable && pedantic_cache) {
+        problem(PROB_CACHE_LOW, req, res, (u8*)"'Expires' without 'Date'",
+                req->pivot, 0);
+      }
+    }
+
+    /* Check 'Pragma', and complain if there's a conflicting 'Expires'
+       intent already detected. */
+
+    if (prg && strstr((char*)prg, "no-cache")) {
+      if (h10c == 2)
+        problem(res->cookies_set ? PROB_CACHE_HI : PROB_CACHE_LOW, req, res, 
+                (u8*)"conflicting 'Expires' and 'Pragma'", req->pivot, 0);
+      h10c = 1;
+    }
+
+    /* Check HTTP/1.1 intent next. Detect conflicting keywords. */
+
+    if (cc) {
+
+      if (strstr((char*)cc, "no-cache") || strstr((char*)cc, "no-store") ||
+          strstr((char*)cc, "private") || strstr((char*)cc, "max-age=0")) {
+
+        h11c = 1;
+
+        if (strstr((char*)cc, "public"))
+          problem(res->cookies_set ? PROB_CACHE_HI : PROB_CACHE_LOW, req, res, 
+                  (u8*)"conflicting 'Cache-Control' data", req->pivot, 0);
+
+      } else h11c = 2;
+
+    }
+
+    DEBUG("* CACHE: 1.0 intent = %u, 1.1 intent = %u, impl = %u, cookie = %u\n",
+          h10c, h11c, cacheable, res->cookies_set);
+
+    /* Perform complex checks against all the data collected: */
+
+    if (res->cookies_set && (h10c == 2 || h11c == 2)) {
+
+      /* Explicit public intent on a cookie-setting response. This is
+         obviously bad. */
+
+      problem(PROB_CACHE_HI, req, res, (u8*)
+              "caching explicitly permitted on a 'Set-Cookie' response",
+              req->pivot, 0);
+
+    } else if (res->cookies_set && !h10c && !h11c && cacheable) {
+
+      /* Implicitly cacheable Set-Cookie response with no intent specified.
+         Likewise, makes us unhappy. */
+
+      problem(PROB_CACHE_HI, req, res, (u8*)
+              "implicitly cacheable 'Set-Cookie' response",
+              req->pivot, 0);
+
+    } else if (h10c && h11c && h10c != h11c) {
+
+      /* Explicit, conflicting HTTP/1.1 and HTTP/1.0 intents are likely
+         a problem for many implementations (although earlier checks
+         already caught cacheability of sensitive responses). */
+
+      problem(PROB_CACHE_LOW, req, res, (u8*)
+              "conflicting 'Cache-Control' and 'Expires' / 'Pragma'",
+              req->pivot, 0);
+
+    } else if (pedantic_cache && h11c == 1 && !h10c && cacheable) {
+
+      /* Legacy HTTP/1.0 proxies may fall back to implicit cacheability
+         even if HTTP/1.1 intent is specified, but no HTTP/1.0 one.
+         Complain about this in pedantic mode. */
+
+      problem(PROB_CACHE_LOW, req, res, (u8*)
+              "caching restricted by 'Cache-Control', but not 'Expires'",
+              req->pivot, 0);
+
+    }
+  }
+
+  /* CHECK 2: Log troubling response codes. */
+
+  if (res->code == 401)
+    problem(PROB_AUTH_REQ, req, res, NULL, req->pivot, 0);
+  else if (res->code >= 500)
+    problem(PROB_SERV_ERR, req, res, NULL, req->pivot, 0);
+
+  if (!res->pay_len) return;
+
+  if (!is_mostly_ascii(res)) goto binary_checks;
+
+  /* CHECK 3: Parse HTML to detect various XSS issues. We are trying to do
+     a much better job parsing HTML than in scrape_page(), because we do not
+     want any false positives.
+
+     It is a bit silly to have two separate HTML parsers in the code,
+     so we should fix it at some point. */
+
+  /* Full-body CSS / JS responses should be additionally checked for JS
+     quoting errors. */
+
+  if (is_javascript(res) || is_css(res)) check_js_xss(req, res, res->payload);
+
+  /* Responses that do not contain the term "function" are much more likely to
+     be dynamic JSON than just static scripts. Let's try to highlight these. */
+
+  if (is_javascript(res) && !res->json_safe &&
+      (!req->method || !strcmp((char*)req->method, "GET")) &&
+      !strstr((char*)res->payload, "function ") &&
+      !strstr((char*)res->payload, "function("))
+    problem(PROB_JS_XSSI, req, res, NULL, req->pivot, 0);
+
+  tmp = res->payload;
+
+  do {
+
+    if (*tmp == '<') {
+      u8* tag_name;
+      u32 len = strcspn((char*)++tmp, "> \t\r\n"), space_len;
+      u8  remote_script = 0;
+
+      /* Grab tag name. */
+
+      tag_name = ck_memdup(tmp, len + 1);
+      tag_name[len] = 0;
+      tmp += len;
+
+      /* Handle all parameters. */
+
+      while (*tmp && *tmp != '>') {
+        u8* param_name;
+        u8* clean_val = NULL;
+        u8* sfi_pos;
+
+        /* Shoo, whitespaces. */
+
+        space_len = strspn((char*)tmp, " \t\r\n");
+        tmp += space_len;
+
+        /* Grab parameter name. */
+
+        len = strcspn((char*)tmp, "=> \t\r\n");
+        param_name = ck_memdup(tmp, len + 1);
+        param_name[len] = 0;
+        tmp += len;
+
+        /* Name followed by '='? Grab value. */
+
+        if (*tmp == '=') {
+          u32 vlen;
+          u8 save, quote = 0;
+
+          tmp++;
+
+          if (*tmp == '\'') {
+            quote = 1;
+            vlen = strcspn((char*)++tmp, "'");
+          } else if (*tmp == '"') {
+            quote = 1;
+            vlen = strcspn((char*)++tmp, "\"");
+          } else vlen = strcspn((char*)tmp, " \t\r\n>");
+
+          save = tmp[vlen];
+          tmp[vlen] = 0;
+          clean_val = html_decode_param(tmp, 0);
+          tmp[vlen] = save;
+          tmp += vlen + quote;
+        }
+
+        if (!strcasecmp((char*)tag_name, "script") &&
+            !strcasecmp((char*)param_name, "src")) remote_script = 1;
+
+        /* CHECK 3.1: URL XSS and redirection issues. */
+
+        if ((!strcasecmp((char*)param_name, "href") ||
+            !strcasecmp((char*)param_name, "src") ||
+            !strcasecmp((char*)param_name, "action") ||
+            (!strcasecmp((char*)param_name, "value") && 
+             strcasecmp((char*)tag_name, "input")) ||
+            !strcasecmp((char*)param_name, "codebase")) && clean_val) {
+
+          if (!strncasecmp((char*)clean_val, "skipfish://", 12))
+            problem(PROB_URL_XSS, req, res, tag_name, req->pivot, 0);
+
+          /* A bit hairy, but in essence, links to attacker-supplied
+             stylesheets or scripts are super-bad; OBJECTs and IFRAMEs
+             are sorta noteworthy, depending on context; and A links
+             are usually of little relevance. */
+
+          if (!strncasecmp((char*)clean_val, "http://skipfish.invalid/", 25) ||
+              !strncasecmp((char*)clean_val, "//skipfish.invalid/", 20)) {
+
+            if (!strcasecmp((char*)tag_name, "script") ||
+                !strcasecmp((char*)tag_name, "link"))
+              problem(PROB_USER_URL_ACT, req, res, tag_name, req->pivot, 0);
+            else if (!strcasecmp((char*)tag_name, "a"))
+              problem(PROB_USER_LINK, req, res, tag_name, req->pivot, 0);
+            else
+              problem(PROB_USER_URL, req, res, tag_name, req->pivot, 0);
+
+          }
+
+        }
+
+        /* CHECK 3.2: META REFRESH XSSes, redirection. Also extract
+           charset, if available */
+
+        if (!strcasecmp((char*)tag_name, "meta") &&
+            !strcasecmp((char*)param_name, "content") && clean_val) {
+          u8* url = inl_strcasestr(clean_val, (u8*)"URL=");
+          u8  semi_safe = 0;
+
+          if (url) {
+            url += 4;
+            if (*url == '\'' || *url == '"') { url++; semi_safe = 1; }
+
+            if (!strncasecmp((char*)url, "http://skipfish.invalid/", 25) ||
+                !strncasecmp((char*)url, "//skipfish.invalid/", 20))
+              problem(PROB_URL_REDIR, req, res, (u8*)"injected URL in META refresh",
+                      req->pivot, 0);
+
+            /* Unescaped semicolon in Refresh headers is unsafe with MSIE6. */
+
+           if (!strncasecmp((char*)url, "skipfish://", 12) ||
+               (!semi_safe && strchr((char*)url, ';')))
+             problem(PROB_URL_XSS, req, res, (u8*)"injected URL in META refresh",
+                     req->pivot, 0);
+
+          } else {
+            u8* cset = inl_strcasestr(clean_val, (u8*)"charset=");
+            if (cset) {
+              if (res->meta_charset) {
+                if (strcasecmp((char*)cset+8, (char*)res->meta_charset))
+                  res->warn |= WARN_CFL_HDR;
+              } else res->meta_charset = ck_strdup(cset + 8);
+            }
+          }
+
+        }
+
+        /* CHECK 3.3: JavaScript on*=, CSS style= parameters. */
+
+        if ((!strncasecmp((char*)param_name, "on", 2) ||
+            !strcasecmp((char*)param_name, "style")) && clean_val) 
+          check_js_xss(req, res, clean_val);
+
+        /* CHECK 3.4: What looks like our sfi tags, not fully escaped. */
+
+        if ((sfi_pos = (u8*)strstr((char*)param_name, "sfi")) &&
+            sscanf((char*)sfi_pos, "sfi%06uv%06u", &tag_id, &scan_id) == 2) {
+
+          struct http_request* orig = get_xss_request(tag_id, scan_id);
+
+          if (orig)
+            problem(PROB_BODY_XSS, orig, res, (u8*)
+                    "injected 'sfi..' parameter value in a tag",
+                    req->pivot,  0);
+          else
+            problem(PROB_BODY_XSS, req, res, (u8*)
+                    "injected 'sfi...' parameter value in a tag (from previous"
+                    " scans)", req->pivot, 0);
+
+        }
+
+        ck_free(clean_val);
+        ck_free(param_name);
+
+      }
+
+      /* CHECK 3.5: Phew. Parameters analyzed. Let's check for XSS tags... */
+
+      if (sscanf((char*)tag_name, "sfi%06uv%06u", &tag_id, &scan_id) == 2) {
+        struct http_request* orig = get_xss_request(tag_id, scan_id);
+
+        if (orig)
+          problem(PROB_BODY_XSS, orig, res, (u8*)
+                  "injected '<sfi...>' tag seen in HTML", req->pivot, 0);
+        else
+          problem(PROB_BODY_XSS, req, res, (u8*)
+                  "injected '<sfi...>' tag seen in HTML (from previous scans)",
+                  req->pivot,  0);
+      }
+
+      /* CHECK 3.6: Non-remote SCRIPTs are of interest to JS XSS logic. */
+
+      if (!strcasecmp((char*)tag_name, "script") && !remote_script) {
+
+        u8* next = inl_strcasestr(tmp, (u8*)"</script>");
+        if (next) *next = 0;
+        check_js_xss(req, res, tmp);
+        if (next) *next = '<';
+        /* Don't skip right away, as there might be some nested HTML inside. */
+      }
+
+      /* CHECK 3.7: ...and so are stylesheets. */
+
+      if (!strcasecmp((char*)tag_name, "style")) {
+
+        u8* next = inl_strcasestr(tmp, (u8*)"</style>");
+        if (next) *next = 0;
+        check_js_xss(req, res, tmp);
+        if (next) *next = '<';
+
+      }
+
+      ck_free(tag_name);
+
+    } else tmp = (u8*)strchr((char*)tmp, '<');
+
+  } while (tmp && *tmp);
+
+  /* CHECK 4: Known exceptions / error pages, etc. */
+
+  check_for_stuff(req, res);
+
+binary_checks:
+
+  detect_mime(req, res);
+  res->sniffed_mime = (u8*)mime_map[res->sniff_mime_id][0];
+
+  /* No MIME checks on Content-Disposition: attachment responses. */
+
+  if ((tmp = GET_HDR((u8*)"Content-Disposition", &res->hdr)) &&
+      inl_strcasestr(tmp, (u8*)"attachment")) return;
+
+  if (!relaxed_mime) {
+
+    /* CHECK 5A: Renderable documents that are not CSS or static JS are of
+       particular interest when it comes to MIME / charset mistakes. */
+
+    if (is_mostly_ascii(res) && !is_css(res) && (!is_javascript(res) ||
+        (!strstr((char*)res->payload, "function ") &&
+        !strstr((char*)res->payload, "function(")))) high_risk = 1;
+
+  } else {
+
+    /* CHECK 5B: Documents with skipfish signature strings echoed back
+       are of particular interest when it comes to MIME / charset mistakes. */
+
+    u8* tmp = (u8*)strstr((char*)res->payload, "sfi");
+
+    if ((tmp && isdigit(tmp[3]) && tmp[9] == 'v') ||
+        strstr((char*)res->payload, "sfish") ||
+        strstr((char*)res->payload, "skipfish")) high_risk = 1;
+
+  }
+
+  /* CHECK 6: MIME mismatch? Ignore cases where the response had a valid
+     MIME type declared in headers, but we failed to map it to a known
+     value... and also failed to sniff. */
+
+  if (res->sniff_mime_id != res->decl_mime_id &&
+      !(res->header_mime && !res->decl_mime_id &&
+      (res->sniff_mime_id == MIME_ASC_GENERIC ||
+      res->sniff_mime_id == MIME_BIN_GENERIC)))
+    problem(high_risk ? PROB_BAD_MIME_DYN : PROB_BAD_MIME_STAT,
+            req, res, res->sniffed_mime, req->pivot, 0);
+
+  /* CHECK 7: application/octet-stream or text/plain; both have
+     unintended consequences (but complain only if 3 didn't fire). */
+
+  else if (res->header_mime && (!strcasecmp((char*)res->header_mime,
+      "application/octet-stream") || !strcasecmp((char*)res->header_mime,
+      "text/plain")))
+    problem(high_risk ? PROB_GEN_MIME_DYN : PROB_GEN_MIME_STAT,
+            req, res, res->sniffed_mime, req->pivot, 0);
+
+  /* CHECK 8: Missing charset? */
+
+  if (is_mostly_ascii(res) && !res->meta_charset && !res->header_charset)
+    problem(high_risk ? PROB_BAD_CSET_DYN : PROB_BAD_CSET_STAT,
+            req, res, 0, req->pivot, 0);
+
+  /* CHECK 9: Duplicate, inconsistent C-T or charset? */
+
+  if (is_mostly_ascii(res) && (res->warn & WARN_CFL_HDR || 
+      (res->meta_charset && res->header_charset && 
+      strcasecmp((char*)res->meta_charset, (char*)res->header_charset))))
+    problem(high_risk ? PROB_CFL_HDRS_DYN : PROB_CFL_HDRS_STAT,
+            req, res, 0, req->pivot, 0);
+
+  /* CHECK 10: Made up charset? */
+
+  if (res->header_charset || res->meta_charset) {
+    u32 i = 0;
+
+    while (valid_charsets[i]) {
+      if (!strcasecmp((char*)valid_charsets[i], (char*)(res->header_charset ?
+          res->header_charset : res->meta_charset))) break;
+      i++;
+    }
+
+    if (!valid_charsets[i])
+      problem(high_risk ? PROB_BAD_CSET_DYN : PROB_BAD_CSET_STAT,
+              req, res, res->header_charset ?
+              res->header_charset : res->meta_charset, req->pivot, 0);
+
+  }
+
+}
+
+
+/* Does MIME detection on a message. Most of this logic is reused from
+   ratproxy, with some improvements and additions. */
+
+static void detect_mime(struct http_request* req, struct http_response* res) {
+  u8 sniffbuf[SNIFF_LEN];
+
+  if (res->sniff_mime_id) return;
+
+  /* First, classify declared response MIME, if any. */
+
+  if (res->header_mime) {
+    u32 i;
+
+    for (i=0;i<MIME_COUNT;i++) {
+      u32 j = 0;
+
+      /* Leading ? means we need to do a prefix match. */
+
+      while (mime_map[i][j]) {
+        if (mime_map[i][j][0] == '?') {
+          if (!strncasecmp((char*)mime_map[i][j] + 1, (char*)res->header_mime,
+               strlen((char*)mime_map[i][j] + 1))) break;
+        } else {
+          if (!strcasecmp((char*)mime_map[i][j], (char*)res->header_mime))
+            break;
+        }
+        j++;
+      }
+
+      if (mime_map[i][j]) break;
+
+    }
+
+    if (i != MIME_COUNT) res->decl_mime_id = i;
+
+  }
+
+  /* Next, work out the actual MIME that should be set. Mostly
+     self-explanatory. */
+
+  memcpy(sniffbuf, res->payload,
+         (res->pay_len > SNIFF_LEN - 1) ? (SNIFF_LEN - 1) : res->pay_len);
+
+  sniffbuf[SNIFF_LEN - 1] = 0;
+
+  if (is_mostly_ascii(res)) {
+
+    /* ASCII checks. */
+
+    if (is_javascript(res)) {
+      res->sniff_mime_id = MIME_ASC_JAVASCRIPT;
+      return;
+    }
+
+    if (is_css(res)) {
+      res->sniff_mime_id = MIME_ASC_CSS;
+      return;
+    }
+
+
+    if (!strncmp((char*)sniffbuf, "%!PS", 4)) {
+      res->sniff_mime_id = MIME_ASC_POSTSCRIPT;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf, "{\\rtf", 5)) {
+      res->sniff_mime_id = MIME_ASC_RTF;
+      return;
+    }
+
+    /* Adobe PDF (may be mostly ASCII in some cases). */
+
+    if (!strncmp((char*)sniffbuf, "%PDF", 4)) {
+      res->sniff_mime_id = MIME_EXT_PDF;
+      return;
+    }
+
+    /* Several types of XML documents, taking into account that
+       they might be missing their xmlns=, etc: */
+
+    if (strstr((char*)sniffbuf, "<OpenSearch")) {
+      res->sniff_mime_id = MIME_XML_OPENSEARCH;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<channel>") ||
+        strstr((char*)sniffbuf, "<description>") ||
+        strstr((char*)sniffbuf, "<item>") ||
+        strstr((char*)sniffbuf, "<rdf:RDF") ||
+        strstr((char*)sniffbuf, "<rss")) {
+      res->sniff_mime_id = MIME_XML_RSS;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<feed") ||
+        strstr((char*)sniffbuf, "<updated>")) {
+      res->sniff_mime_id = MIME_XML_ATOM;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<wml") ||
+        inl_strcasestr(sniffbuf, (u8*)"<!DOCTYPE wml ")) {
+      res->sniff_mime_id = MIME_XML_WML;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<svg")) {
+      res->sniff_mime_id = MIME_XML_SVG;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<cross-domain-policy>")) {
+      res->sniff_mime_id = MIME_XML_CROSSDOMAIN;
+      return;
+    }
+
+    if (strstr((char*)sniffbuf, "<?xml")) {
+
+      if (inl_strcasestr(sniffbuf, (u8*)"<!DOCTYPE html"))
+        res->sniff_mime_id = MIME_XML_XHTML;
+      else
+        res->sniff_mime_id = MIME_XML_GENERIC;
+
+      return;
+    }
+
+    /* Do an unconvincing check for HTML once we ruled out
+       known XML cases. */
+
+    if (inl_strcasestr(sniffbuf, (u8*)"<html") ||
+        inl_strcasestr(sniffbuf, (u8*)"<meta") ||
+        inl_strcasestr(sniffbuf, (u8*)"<head") ||
+        inl_strcasestr(sniffbuf, (u8*)"<title") ||
+        inl_strcasestr(sniffbuf, (u8*)"<body") ||
+        inl_strcasestr(sniffbuf, (u8*)"<!doctype") ||
+        inl_strcasestr(sniffbuf, (u8*)"<--") ||
+        inl_strcasestr(sniffbuf, (u8*)"<style") ||
+        inl_strcasestr(sniffbuf, (u8*)"<script") ||
+        inl_strcasestr(sniffbuf, (u8*)"<font") ||
+        inl_strcasestr(sniffbuf, (u8*)"<span") ||
+        inl_strcasestr(sniffbuf, (u8*)"<div") ||
+        inl_strcasestr(sniffbuf, (u8*)"<img") ||
+        inl_strcasestr(sniffbuf, (u8*)"<br") ||
+        inl_strcasestr(sniffbuf, (u8*)"<td") ||
+        inl_strcasestr(sniffbuf, (u8*)"<h1") ||
+        inl_strcasestr(sniffbuf, (u8*)"<li") ||
+        inl_strcasestr(sniffbuf, (u8*)"href=")) {
+      res->sniff_mime_id = MIME_ASC_HTML;
+      return;
+    }
+
+    /* OK, we're out of ideas. Let's do a last-resort check for XML again,
+       now that HTML is also off the table. */
+
+    if (strstr((char*)sniffbuf, "<![CDATA[") ||
+        strstr((char*)sniffbuf, "</") || strstr((char*)sniffbuf, "/>")) {
+      res->sniff_mime_id = MIME_XML_GENERIC;
+      return;
+    }
+
+    res->sniff_mime_id = MIME_ASC_GENERIC;
+
+  } else {
+
+    /* Binary checks. Start with simple images (JPG, GIF, PNG, TIFF, BMP). */
+
+    if (sniffbuf[0] == 0xFF && sniffbuf[1] == 0xD8 && sniffbuf[2] == 0xFF) {
+      res->sniff_mime_id = MIME_IMG_JPEG;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf, "GIF8", 4)) {
+      res->sniff_mime_id = MIME_IMG_GIF;
+      return;
+    }
+
+    if (sniffbuf[0] == 0x89 && !strncmp((char*)sniffbuf + 1, "PNG", 3)) {
+      res->sniff_mime_id = MIME_IMG_PNG;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf, "BM", 2)) {
+      res->sniff_mime_id = MIME_IMG_BMP;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf, "II", 2) && sniffbuf[2] == 42 /* dec */) {
+      res->sniff_mime_id = MIME_IMG_TIFF;
+      return;
+    }
+
+    /* Next: RIFF containers (AVI, ANI, WAV). */
+
+    if (!strncmp((char*)sniffbuf, "RIFF", 4)) {
+
+      if (sniffbuf[8] == 'A') {
+        if (sniffbuf[9] == 'C')
+          res->sniff_mime_id = MIME_IMG_ANI;
+        else
+          res->sniff_mime_id = MIME_AV_AVI;
+      } else res->sniff_mime_id = MIME_AV_WAV;
+
+      return;
+
+    }
+
+    /* Cursor / ICO drama (we roll it back into BMP, because few sites
+       make the distinction anyway, and cursors are unlikely to be
+       attacker-supplied)... */
+
+    if (res->pay_len > 3 && !sniffbuf[0] && !sniffbuf[1] &&
+        sniffbuf[2] && !sniffbuf[3]) {
+      res->sniff_mime_id = MIME_IMG_BMP;
+      return;
+    }
+
+    /* Windows Media container (WMV, WMA, ASF). */
+
+    if (sniffbuf[0] == 0x30 && sniffbuf[1] == 0x26 && sniffbuf[2] == 0xB2) {
+      res->sniff_mime_id = MIME_AV_WMEDIA;
+      return;
+    }
+
+    /* MPEG formats, Ogg Vorbis, QuickTime, RealAudio, RealVideo. */
+
+    if (sniffbuf[0] == 0xFF && sniffbuf[1] == 0xFB) {
+      res->sniff_mime_id = MIME_AV_MP3;
+      return;
+    }
+
+    if (sniffbuf[0] == 0x00 && sniffbuf[1] == 0x00 && sniffbuf[2] == 0x01 &&
+        (sniffbuf[3] >> 4) == 0x0B) {
+      res->sniff_mime_id = MIME_AV_MPEG;
+      return;
+    }
+
+    if (!strncasecmp((char*)sniffbuf, "OggS", 4)) {
+      res->sniff_mime_id = MIME_AV_OGG;
+      return;
+    }
+
+    if (sniffbuf[0] == 0x28 && !strncasecmp((char*)sniffbuf + 1, "RMF", 3)) {
+      res->sniff_mime_id = MIME_AV_RA;
+      return;
+    }
+
+    if (sniffbuf[0] == 0x2E && !strncasecmp((char*)sniffbuf + 1, "RMF", 3)) {
+      res->sniff_mime_id = MIME_AV_RV;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf + 4, "free", 4) ||
+        !strncmp((char*)sniffbuf + 4, "mdat", 4) ||
+        !strncmp((char*)sniffbuf + 4, "wide", 4) ||
+        !strncmp((char*)sniffbuf + 4, "pnot", 4) ||
+        !strncmp((char*)sniffbuf + 4, "skip", 4) ||
+        !strncmp((char*)sniffbuf + 4, "moov", 4)) {
+      /* Oookay, that was weird... */
+      res->sniff_mime_id = MIME_AV_QT;
+      return;
+    }
+
+    /* Flash and FLV. */
+
+    if (!strncmp((char*)sniffbuf, "FLV", 3)) {
+      res->sniff_mime_id = MIME_AV_FLV;
+      return;
+    }
+
+    if (!strncmp((char*)sniffbuf, "FCWS", 4) ||
+        !strncmp((char*)sniffbuf, "CWS", 3)) {
+      res->sniff_mime_id = MIME_EXT_FLASH;
+      return;
+    }
+
+    /* Adobe PDF. */
+
+    if (!strncmp((char*)sniffbuf, "%PDF", 4)) {
+      res->sniff_mime_id = MIME_EXT_PDF;
+      return;
+    }
+
+    /* JAR versus ZIP. A bit tricky, because, well, they are both just
+       ZIP archives. */
+
+    if (!strncmp((char*)sniffbuf, "PK", 2) &&
+        sniffbuf[2] < 6 && sniffbuf[3] < 7) {
+
+      if (inl_memmem(res->payload, res->pay_len, "META-INF/", 9))
+        res->sniff_mime_id = MIME_EXT_JAR;
+      else
+        res->sniff_mime_id = MIME_BIN_ZIP;
+      return;
+
+    }
+
+    /* Java class files. */
+
+    if (sniffbuf[0] == 0xCA && sniffbuf[1] == 0xFE && sniffbuf[2] == 0xBA &&
+        sniffbuf[3] == 0xBE) {
+      res->sniff_mime_id = MIME_EXT_CLASS;
+      return;
+    }
+
+    /* The joy of Microsoft Office containers. */
+
+    if (res->pay_len > 512 && sniffbuf[0] == 0xD0 && sniffbuf[1] == 0xCF &&
+        sniffbuf[2] == 0x11 && sniffbuf[3] == 0xE0) {
+
+      switch (sniffbuf[512]) {
+        case 0xEC: res->sniff_mime_id = MIME_EXT_WORD; return;
+        case 0xFD:
+        case 0x09: res->sniff_mime_id = MIME_EXT_EXCEL; return;
+        case 0x00:
+        case 0x0F:
+        case 0xA0: res->sniff_mime_id = MIME_EXT_PPNT; return;
+      }
+
+    }
+
+    /* GZIP. Unfortunately, tar has no discernible header to speak of,
+       so we just let it slide - few sites are serving tars on purpose
+       anyway. */
+
+    if (sniffbuf[0] == 0x1F && sniffbuf[1] == 0x8B && sniffbuf[2] == 0x08) {
+      res->sniff_mime_id = MIME_BIN_GZIP;
+      return;
+    }
+
+    /* CAB. */
+
+    if (sniffbuf[0] == 'M' && sniffbuf[1] == 'S' && sniffbuf[2] == 'C' &&
+        sniffbuf[3] == 'F' && !sniffbuf[4]) {
+      res->sniff_mime_id = MIME_BIN_CAB;
+      return;
+    }
+
+    res->sniff_mime_id = MIME_BIN_GENERIC;
+
+  }
+
+  /* No more ideas? */
+
+}
+
+
+/* "Stuff" means various error messages and cool, unusual content. For large
+   files, this function may be sort-of expensive, but we need to look through
+   the entire response, as not all messages are full-page errors, etc. */
+
+static void check_for_stuff(struct http_request* req,
+                            struct http_response* res) {
+
+  u8 sniffbuf[SNIFF_LEN];
+  u8* tmp;
+
+  if (!res->pay_len || !is_mostly_ascii(res) || res->stuff_checked) return;
+
+  /* We will use sniffbuf for checks that do not need to look through the
+     entire file. */
+
+  memcpy(sniffbuf, res->payload,
+         (res->pay_len > SNIFF_LEN - 1) ? (SNIFF_LEN - 1) : res->pay_len);
+
+  sniffbuf[SNIFF_LEN - 1] = 0;
+
+  res->stuff_checked = 1;
+
+  /* Assorted interesting error messages. */
+
+  if (strstr((char*)res->payload, "<font face=\"Arial\" size=2>error '")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"Microsoft runtime error",
+            req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<span><H1>Server Error in '")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)
+            "ASP.NET Yellow Screen of Death", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "<title>JRun Servlet Error</title>")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"JRun servlet error", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "Exception in thread \"") ||
+      strstr((char*)res->payload, "at java.lang.") ||
+      (strstr((char*)res->payload, "\tat ") &&
+      (strstr((char*)res->payload, ".java:")))) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"Java exception trace", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<b>Fatal error</b>:")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"PHP error", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<b>Warning</b>:  MySQL: ") ||
+      strstr((char*)res->payload, "java.sql.SQLException")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"SQL server error", req->pivot, 0);
+    return;
+  }
+
+  if ((tmp = (u8*)strstr((char*)res->payload, "ORA-")) &&
+      isdigit(tmp[4]) && tmp[9] == ':') {
+    problem(PROB_ERROR_POI, req, res, (u8*)"Oracle server error", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "[an error occurred while processing")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"SHTML error", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "Traceback (most recent call last):")) {
+    problem(PROB_ERROR_POI, req, res, (u8*)"Python error", req->pivot, 0);
+    return;
+  }
+
+  /* Interesting files. */
+
+  if (strstr((char*)res->payload, "ADDRESS=(PROTOCOL=")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"SQL configuration or logs", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "<cross-domain-policy>")) {
+    problem(PROB_FILE_POI, req, res, (u8*)
+            "Flash cross-domain policy", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "<access-policy>")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Silverlight cross-domain policy",
+            req->pivot, 0);
+    return;
+  }
+
+  if (inl_strcasestr(sniffbuf, (u8*)"\nAuthType ") ||
+      inl_strcasestr(sniffbuf, (u8*)"\nOptions ") ||
+      inl_strcasestr(sniffbuf, (u8*)"\n<Directory ") ||
+      inl_strcasestr(sniffbuf, (u8*)"\nRequire ")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Apache config file", req->pivot, 0);
+    return;
+  }
+
+  if (res->sniff_mime_id == MIME_ASC_GENERIC) {
+    u8* x = sniffbuf;
+
+    /* Generic something:something[:...] password syntax. */
+
+    while (*x && (isalnum(*x) || strchr("._-+$", *x)) &&
+           (x - sniffbuf) < 64) x++;
+
+    if (x != sniffbuf && *x == ':') {
+      x++;
+      while (*x && (isalnum(*x) || strchr("./*!+=$", *x)) &&
+             (x - sniffbuf) < 128) x++;
+
+      if (*x == ':' || !*x || isspace(*x))
+        problem(PROB_FILE_POI, req, res, (u8*)
+                "Possible password file", req->pivot, 0);
+
+    }
+  }
+
+  if (inl_strcasestr(sniffbuf, (u8*)"\nDisallow:") ||
+      inl_strcasestr(sniffbuf, (u8*)"\rDisallow:")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"robots.txt ruleset", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "<web-app")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"web.xml config file", req->pivot, 0);
+    return;
+  }
+
+  if (res->sniff_mime_id == MIME_ASC_GENERIC) {
+    u8* x = sniffbuf;
+    u32 slashes = 0;
+
+    /* Five slashes in the first line in a plaintext file should be a
+       reasonably good check for CVS. */
+
+    while (*x && *x != '\n' &&
+           (x - sniffbuf) < 256) {
+      if (*x == '/') slashes++;
+      x++;
+    }
+
+    if (slashes == 5)
+      problem(PROB_FILE_POI, req, res, (u8*)"CVS RCS data", req->pivot, 0);
+
+  }
+
+  if (strstr((char*)sniffbuf, "<wc-entries") ||
+      strstr((char*)sniffbuf, "svn:special svn:")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"SVN RCS data", req->pivot, 0);
+    return;
+  }
+
+  /* This should also cover most cases of Perl, Python, etc. */
+
+  if (!strncmp((char*)sniffbuf, "#!/", 3)) {
+    problem(PROB_FILE_POI, req, res, (u8*)"shell script", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<?") && strstr((char*)res->payload, "?>") && 
+      !strstr((char*)sniffbuf, "<?xml") && !strstr((char*)res->payload, "# ?>") &&
+      !strstr((char*)res->payload, "<?import")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"PHP source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<%@") && strstr((char*)res->payload, "%>")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"JSP source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "<%") && strstr((char*)res->payload, "%>")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"ASP source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "\nimport java.")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Java source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "\n#include")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"C/C++ source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)res->payload, "End Sub\n") ||
+      strstr((char*)res->payload, "End Sub\r")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Visual Basic source", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "0] \"GET /")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Apache server logs", req->pivot, 0);
+    return;
+  }
+
+  if (strstr((char*)sniffbuf, "0, GET, /")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"IIS server logs", req->pivot, 0);
+    return;
+  }
+
+  /* Plain text, and every line contains ;, comma, or |? */
+
+  if (res->sniff_mime_id == MIME_ASC_GENERIC) {
+    u8* cur = res->payload;
+    u8  all_delim = 0;
+
+    do {
+      u8 *eol = (u8*)strchr((char*)cur, '\n');
+      u32 del = strcspn((char*)cur, ",|;\n");
+
+      if (!cur[del] || cur[del] == '\n') {
+        all_delim = 0;
+        break;
+      }
+
+      all_delim = 1;
+      cur = eol + 1;
+
+    } while (cur && *cur);
+
+    if (all_delim) {
+      problem(PROB_FILE_POI, req, res,
+              (u8*)"Delimited database dump", req->pivot, 0);
+      return;
+    }
+
+  }
+
+  /* Excel is almost always interesting on its own. */
+
+  if (res->sniff_mime_id == MIME_EXT_EXCEL) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Excel spreadsheet", req->pivot, 0);
+    return;
+  }
+
+  /* This is a bit dodgy, but the most prominent sign of non-browser JS on
+     Windows is the instantiation of obscure ActiveX objects to access local
+     filesystem, create documents, etc. Unfortunately, some sites may also be
+     creating obscure ActiveX objects; these would likely need to be just
+     blacklisted here. */
+
+  if (is_javascript(res) && strstr((char*)res->payload, "new ActiveXObject(") &&
+      !strstr((char*)res->payload, "XMLHTTP") &&
+      !strstr((char*)res->payload, "ShockwaveFlash")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"server-side JavaScript source",
+            req->pivot, 0);
+    return;
+  }
+
+  /* Three very lame rules follow; help improve. */
+
+  if (inl_strcasestr(res->payload, (u8*)"\nCREATE TABLE") ||
+      inl_strcasestr(res->payload, (u8*)"\nSELECT * FROM") ||
+      inl_strcasestr(res->payload, (u8*)"\nDROP TABLE")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"SQL script", req->pivot, 0);
+    return;
+  }
+
+  if (inl_strcasestr(sniffbuf, (u8*)"@echo ")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"DOS batch script", req->pivot, 0);
+    return;
+  }
+
+  if (inl_strcasestr(res->payload, (u8*)"(\"Wscript.")) {
+    problem(PROB_FILE_POI, req, res, (u8*)"Windows shell script", req->pivot, 0);
+    return;
+  }
+
+}
diff --git a/analysis.h b/analysis.h
new file mode 100644
index 0000000..7891569
--- /dev/null
+++ b/analysis.h
@@ -0,0 +1,198 @@
+/*
+   skipfish - content analysis
+   ---------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_ANALYSIS_C
+
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+#include "crawler.h"
+
+extern u8  no_parse,            /* Disable HTML link detection */
+           warn_mixed,          /* Warn on mixed content       */
+           log_ext_urls,        /* Log all external URLs       */
+           no_forms,            /* Do not submit forms         */
+           relaxed_mime,        /* Relax about cset / mime     */
+           pedantic_cache;      /* Match HTTP/1.0 and HTTP/1.1 */
+
+/* Helper macros to group various useful checks: */
+
+#define PIVOT_CHECKS(_req, _res) do { \
+    pivot_header_checks(_req, _res); \
+    content_checks(_req, _res); \
+    scrape_response(_req, _res); \
+  } while (0)
+
+
+#define RESP_CHECKS(_req, _res) do { \
+    content_checks(_req, _res); \
+    scrape_response(_req, _res); \
+  } while (0)
+
+
+/* Runs some rudimentary checks on top-level pivot HTTP responses. */
+
+void pivot_header_checks(struct http_request* req,
+                         struct http_response* res);
+
+/* Adds a new item to the form hint system. */
+
+void add_form_hint(u8* name, u8* value);
+
+/* Analyzes response headers (Location, etc), body to extract new links,
+   keyword guesses, examine forms, mixed content issues, etc. */
+
+void scrape_response(struct http_request* req, struct http_response* res);
+
+/* Analyzes response headers and body to detect stored XSS, redirection,
+   401, 500 codes, exception messages, source code, offensive comments, etc. */
+
+void content_checks(struct http_request* req, struct http_response* res);
+
+/* MIME detector output codes: */
+
+#define MIME_NONE               0       /* Checks missing or failed       */
+
+#define MIME_ASC_GENERIC        1       /* Unknown, but mostly 7bit       */
+#define MIME_ASC_HTML           2       /* Plain, non-XML HTML            */
+#define MIME_ASC_JAVASCRIPT     3       /* JavaScript or JSON             */
+#define MIME_ASC_CSS            4       /* Cascading Style Sheets         */
+#define MIME_ASC_POSTSCRIPT     5       /* PostScript                     */
+#define MIME_ASC_RTF            6       /* Rich Text Format               */
+
+#define MIME_XML_GENERIC        7       /* XML not recognized otherwise   */
+#define MIME_XML_OPENSEARCH     8       /* OpenSearch specification       */
+#define MIME_XML_RSS            9       /* Real Simple Syndication        */
+#define MIME_XML_ATOM           10      /* Atom feeds                     */
+#define MIME_XML_WML            11      /* WAP WML                        */
+#define MIME_XML_CROSSDOMAIN    12      /* crossdomain.xml (Flash)        */
+#define MIME_XML_SVG            13      /* Scalable Vector Graphics       */
+#define MIME_XML_XHTML          14      /* XML-based XHTML                */
+
+#define MIME_IMG_JPEG           15      /* JPEG                           */
+#define MIME_IMG_GIF            16      /* GIF                            */
+#define MIME_IMG_PNG            17      /* PNG                            */
+#define MIME_IMG_BMP            18      /* Windows BMP (including ICO)    */
+#define MIME_IMG_TIFF           19      /* TIFF                           */
+#define MIME_IMG_ANI            20      /* RIFF: ANI animated cursor      */
+
+#define MIME_AV_WAV             21      /* RIFF: WAV sound file           */
+#define MIME_AV_MP3             22      /* MPEG audio (commonly MP3)      */
+#define MIME_AV_OGG             23      /* Ogg Vorbis                     */
+#define MIME_AV_RA              24      /* Real audio                     */
+
+#define MIME_AV_AVI             25      /* RIFF: AVI container            */
+#define MIME_AV_MPEG            26      /* MPEG video                     */
+#define MIME_AV_QT              27      /* QuickTime                      */
+#define MIME_AV_FLV             28      /* Flash video                    */
+#define MIME_AV_RV              29      /* Real video                     */
+
+#define MIME_AV_WMEDIA          30      /* Windows Media audio            */
+
+#define MIME_EXT_FLASH          31      /* Adobe Flash                    */
+#define MIME_EXT_PDF            32      /* Adobe PDF                      */
+#define MIME_EXT_JAR            33      /* Sun Java archive               */
+#define MIME_EXT_CLASS          34      /* Sun Java class                 */
+#define MIME_EXT_WORD           35      /* Microsoft Word                 */
+#define MIME_EXT_EXCEL          36      /* Microsoft Excel                */
+#define MIME_EXT_PPNT           37      /* Microsoft Powerpoint           */
+
+#define MIME_BIN_ZIP            38      /* ZIP not recognized otherwise   */
+#define MIME_BIN_GZIP           39      /* GZIP                           */
+#define MIME_BIN_CAB            40      /* CAB                            */
+
+#define MIME_BIN_GENERIC        41      /* Binary, unknown type           */
+
+#define MIME_COUNT (MIME_BIN_GENERIC + 1)
+
+/* NULL-terminated MIME mapping sets. Canonical name should go first; do not
+   put misspelled or made up entries here. This is used to match server intent
+   with the outcome of MIME sniffing. */
+
+#ifdef _VIA_ANALYSIS_C
+
+static char* mime_map[MIME_COUNT][8] = {
+
+/* MIME_NONE            */ { 0 },
+
+/* MIME_ASC_GENERIC     */ { "text/plain", "?text/x-", "?text/vnd.",
+                             "?application/x-httpd-", "text/csv", 0 },
+/* MIME_ASC_HTML        */ { "text/html", 0 },
+/* MIME_ASC_JAVASCRIPT  */ { "application/javascript",
+                             "application/x-javascript",
+                             "application/json", "text/javascript", 0 },
+/* MIME_ASC_CSS         */ { "text/css", 0 },
+/* MIME_ASC_POSTSCRIPT  */ { "application/postscript", 0 },
+/* MIME_ASC_RTF         */ { "text/rtf", "application/rtf", 0 },
+
+/* MIME_XML_GENERIC     */ { "text/xml", "application/xml", 0 },
+/* MIME_XML_OPENSEARCH  */ { "application/opensearchdescription+xml", 0 },
+/* MIME_XML_RSS         */ { "application/rss+xml", 0 },
+/* MIME_XML_ATOM        */ { "application/atom+xml", 0 },
+/* MIME_XML_WML         */ { "text/vnd.wap.wml", 0 },
+/* MIME_XML_CROSSDOMAIN */ { "text/x-cross-domain-policy", 0 },
+/* MIME_XML_SVG         */ { "image/svg+xml", 0 },
+/* MIME_XML_XHTML       */ { "application/xhtml+xml", 0 },
+
+/* MIME_IMG_JPEG        */ { "image/jpeg", 0 },
+/* MIME_IMG_GIF         */ { "image/gif", 0 },
+/* MIME_IMG_PNG         */ { "image/png", 0 },
+/* MIME_IMG_BMP         */ { "image/x-ms-bmp", "image/bmp", "image/x-icon", 0 },
+/* MIME_IMG_TIFF        */ { "image/tiff", 0 },
+/* MIME_IMG_ANI         */ { "application/x-navi-animation", 0 },
+
+/* MIME_AV_WAV          */ { "audio/x-wav", "audio/wav", 0 },
+/* MIME_AV_MP3          */ { "audio/mpeg", 0 },
+/* MIME_AV_OGG          */ { "application/ogg", 0 },
+/* MIME_AV_RA           */ { "audio/vnd.rn-realaudio",
+                             "audio/x-pn-realaudio", "audio/x-realaudio", 0 },
+
+/* MIME_AV_AVI          */ { "video/avi", 0 },
+/* MIME_AV_MPEG         */ { "video/mpeg", "video/mp4", 0 },
+/* MIME_AV_QT           */ { "video/quicktime", 0 },
+/* MIME_AV_FLV          */ { "video/flv", "video/x-flv", 0 },
+/* MIME_AV_RV           */ { "video/vnd.rn-realvideo", 0 },
+
+/* MIME_AV_WMEDIA       */ { "video/x-ms-wmv", "audio/x-ms-wma",
+                             "video/x-ms-asf", 0 },
+
+/* MIME_EXT_FLASH       */ { "application/x-shockwave-flash", 0 },
+/* MIME_EXT_PDF         */ { "application/pdf", 0 },
+/* MIME_EXT_JAR         */ { "application/java-archive", 0 },
+/* MIME_EXT_CLASS       */ { "application/java-vm", 0 },
+/* MIME_EXT_WORD        */ { "application/msword", 0 },
+/* MIME_EXT_EXCEL       */ { "application/vnd.ms-excel", 0 },
+/* MIME_EXT_PPNT        */ { "application/vnd.ms-powerpoint", 0 },
+
+/* MIME_BIN_ZIP         */ { "application/zip", "application/x-zip-compressed", 0 },
+/* MIME_BIN_GZIP        */ { "application/x-gzip", "application/x-gunzip",
+                             "application/x-tar-gz", 0 },
+/* MIME_BIN_CAB         */ { "application/vnd.ms-cab-compressed", 0 },
+
+/* MIME_BIN_GENERIC     */ { "application/binary", "application/octet-stream",
+                             0 }
+
+};
+
+#endif /* _VIA_ANALYSIS_C */
+
+#endif /* !_HAVE_ANALYSIS_H */
diff --git a/assets/COPYING b/assets/COPYING
new file mode 100644
index 0000000..e4e1183
--- /dev/null
+++ b/assets/COPYING
@@ -0,0 +1,679 @@
+Icons used in HTML reports are copyrighted by the Crystal Project, and
+distributed under terms and conditions of the GNU Lesser General Public
+License. See http://www.everaldo.com/crystal/ for details.
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
diff --git a/assets/i_high.png b/assets/i_high.png
new file mode 100644
index 0000000..3fca948
Binary files /dev/null and b/assets/i_high.png differ
diff --git a/assets/i_low.png b/assets/i_low.png
new file mode 100644
index 0000000..a1bbc85
Binary files /dev/null and b/assets/i_low.png differ
diff --git a/assets/i_medium.png b/assets/i_medium.png
new file mode 100644
index 0000000..157d299
Binary files /dev/null and b/assets/i_medium.png differ
diff --git a/assets/i_note.png b/assets/i_note.png
new file mode 100644
index 0000000..e3deb64
Binary files /dev/null and b/assets/i_note.png differ
diff --git a/assets/i_warn.png b/assets/i_warn.png
new file mode 100644
index 0000000..67d3830
Binary files /dev/null and b/assets/i_warn.png differ
diff --git a/assets/index.html b/assets/index.html
new file mode 100644
index 0000000..7737e05
--- /dev/null
+++ b/assets/index.html
@@ -0,0 +1,758 @@
+<html>
+<head>
+<!--
+
+   skipfish - report renderer
+   --------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-->
+<title>Skipfish - scan results browser</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+
+<style>
+body {
+  font-family: 'Georgia', 'Arial', 'Helvetica';
+  background-color: white;
+}
+
+.hdr_table {
+  float: right;
+  border: 1px dotted #C0C000;
+  background-color: #FFFFF0;
+  font-size: 80%;
+}
+
+.summary1 {
+  padding: 0 1em 0 1em;
+}
+
+.summary2 {
+  color: teal;
+  padding: 0 1em 0 0;
+}  
+
+img {
+  vertical-align: middle;
+  padding: 0 .5em 0 0;
+}
+
+.i2 {
+  vertical-align: middle;
+  padding: 0 0 0 .5em;
+}
+
+.i3 {
+  vertical-align: middle;
+  padding: 0 .2em 0 .2em;
+}
+
+.idupe {
+  opacity: 0.4;
+  filter: alpha(opacity=40);
+}
+
+.child_ctr, .child_ctr_exp {
+  padding: 0.2ex 0.5em 0.2ex 0.5em;
+  border: 1px solid white;
+  white-space: nowrap;
+}
+
+td.child_ctr_exp:hover {
+  border: 1px solid #C0C0C0;
+  cursor: pointer;
+}
+
+td.child_ctr:hover {
+}
+
+.name {
+  font-weight: bold;
+}
+
+span.sum_name {
+  font-weight: bold;
+  border: 1px solid white;
+}
+
+span.sum_name:hover {
+  font-weight: bold;
+  cursor: pointer;
+  border: 1px solid #C0C0C0;
+}
+
+.dupe_name {
+  color: gray;
+}
+
+.fetch_info {
+  font-size: 70%;
+  color: gray;
+}
+
+.fetch_data {
+  color: teal;
+}
+
+.issue_desc {
+  font-weight: bold;
+}
+
+.comment {
+  color: crimson;
+  font-size:  70%;
+}
+
+a { text-decoration: none; }
+a:hover { text-decoration: underline; }
+
+h2 {
+  border-width: 0 0 1px 0;
+  border-style: solid;
+  border-color: crimson;
+}
+
+ol {
+  margin: 0.5em 0 0 0;
+  padding: 0 0 0 1.5em;
+}
+
+.issue_line {
+  border-width: 0 0 1px 0;
+  margin: 0.2em 0 0.2em 0;
+  border-style: dashed;
+  border-color: red;
+}
+
+.s_cnt {
+  font-size: 80%;
+  color: teal;
+}
+
+.req_div {
+  position: absolute;
+  top: 0;
+  left: 0;
+  margin: 5% 0 0 10%;
+  width: 75%;
+  height: 80%;
+  border: 3px outset teal;
+  display: none;
+  background-color: white;
+  z-index: 10;
+  padding: 10px;
+}
+
+.req_hdr {
+  background-color: #FFFFE0;
+  border: 1px outset teal;
+  font-size: 70%;
+  text-align: center;
+  padding: 2px;
+  cursor: pointer;
+}
+
+.req_txtarea {
+  border: 1px inset teal;
+  padding: 2px;
+  margin: 1% 0px 0px 0px;
+  width: 100%;
+  height: 95%;
+}
+
+div.req_hdr:hover {
+  border: 1px inset teal;
+}
+
+.cover {
+  opacity: 0.7;
+  filter: alpha(opacity=70);
+  background-color: #F0F0F0;
+  position: absolute;
+  top: 0;
+  left: 0;
+  height: 100%;
+  width: 100%;
+  display: none;
+}
+
+.supp_cat {
+  color: #606060;
+}
+
+</style>
+
+<script src="summary.js"></script>
+<script src="samples.js"></script>
+
+<script>
+
+var c_count = 0;
+var ignore_click = false;
+var max_samples = 100;
+
+/* Descriptions for issues reported by the scanner. */
+
+var issue_desc= {
+
+  "10101": "SSL certificate issuer information",
+  "10201": "New HTTP cookie added",
+  "10202": "New 'Server' header value seen",
+  "10203": "New 'Via' header value seen",
+  "10204": "New 'X-*' header value seen",
+  "10205": "New 404 signature seen",
+  "10401": "Resource not directly accessible",
+  "10402": "HTTP authentication required",
+  "10403": "Server error triggered",
+  "10501": "All external links",
+  "10502": "External URL redirector",
+  "10503": "All e-mail addresses",
+  "10504": "Links to unknown protocols",
+  "10505": "Unknown form field (can't autocomplete)",
+  "10601": "HTML form found",
+  "10602": "Password entry form - consider brute-force",
+  "10701": "User-supplied link rendered on a page",
+  "10801": "Incorrect or missing MIME type (low risk)",
+  "10802": "Generic MIME used (low risk)",
+  "10803": "Incorrect or missing charset (low risk)",
+  "10804": "Conflicting MIME / charset info (low risk)",
+  "10901": "Numerical filename - consider enumerating",
+  "10902": "OGNL-like parameter behavior",
+
+  "20101": "Resource fetch failed",
+  "20102": "Limits exceeded, fetch suppressed",
+  "20201": "Behavior checks failed",
+  "20202": "IPS filtering enabled",
+  "20203": "IPS filtering disabled again",
+  "20204": "Response varies randomly, skipping injection checks",
+  "20301": "Node should be a directory, detection error?",
+
+  "30101": "HTTP credentials seen in URLs",
+  "30201": "SSL certificate expired or not yet valid",
+  "30202": "Self-signed SSL certificate",
+  "30203": "SSL certificate host name mismatch",
+  "30204": "No SSL certificate data found",
+  "30301": "Directory listing restrictions bypassed",
+  "30401": "Redirection to attacker-supplied URLs",
+  "30402": "Attacker-supplied URLs in embedded content (lower risk)",
+  "30501": "External content embedded on a page (lower risk)",
+  "30502": "Mixed content embedded on a page (lower risk)",
+  "30601": "HTML form with no apparent XSRF protection",
+  "30602": "JSON response with no apparent XSSI protection",
+  "30701": "Incorrect caching directives (lower risk)",
+
+  "40101": "XSS vector in document body",
+  "40102": "XSS vector via arbitrary URLs",
+  "40103": "HTTP response header splitting",
+  "40104": "Attacker-supplied URLs in embedded content (higher risk)",
+  "40201": "External content embedded on a page (higher risk)",
+  "40202": "Mixed content embedded on a page (higher risk)",
+  "40301": "Incorrect or missing MIME type (higher rirsk)",
+  "40302": "Generic MIME type (higher risk)",
+  "40304": "Incorrect or missing charset (higher risk)",
+  "40305": "Conflicting MIME / charset info (higher risk)",
+  "40401": "Interesting file",
+  "40402": "Interesting server message",
+  "40501": "Directory traversal possible",
+  "40601": "Incorrect caching directives (higher risk)",
+
+  "50101": "Server-side XML injection vector",
+  "50102": "Shell injection vector",
+  "50103": "SQL injection vector",
+  "50104": "Format string vector",
+  "50105": "Integer overflow vector",
+  "50201": "SQL query or similar syntax in parameters"
+
+};
+
+
+/* Simple HTML escaping routine. */
+
+function H(str) { return str.replace(/</g,'&lt;').replace(/"/g,'&quot;'); }
+
+
+/* Simple truncation routine. */
+
+function TRUNC(str) { if (str.length > 70) return str.substr(0,69) + "..."; else return str; }
+
+
+/* Initializes scan information, loads top-level view. */
+
+function initialize() {
+  document.getElementById('sf_version').innerHTML = sf_version;
+  document.getElementById('scan_date').innerHTML = scan_date;
+  document.getElementById('scan_seed').innerHTML = scan_seed;
+  document.getElementById('scan_time').innerHTML =
+    Math.floor(scan_ms / 1000 / 60 / 60) + " hr " +
+    Math.floor((scan_ms / 1000 / 60)) % 60  + " min " +
+    Math.floor((scan_ms / 1000)) % 60  + " sec " +
+    (scan_ms % 1000) + " ms";
+
+  load_node('./', 'root');
+  load_mime_summaries();
+  load_issue_summaries();
+
+}
+
+
+/* Implements pretty, pointless fades. */
+
+function next_opacity(tid, new_val) {
+  var t = document.getElementById(tid);
+  t.style.opacity = new_val;
+  t.style.filter = "alpha(opacity=" + (new_val * 100) + ")";
+  if (new_val < 1.0) 
+    setTimeout('next_opacity("' + tid + '", ' + (new_val + 0.1) + ')', 50);
+}
+
+
+/* Loads or toggles visibility of a node. */
+
+function toggle_node(dir, tid) {
+  var t = document.getElementById('c_' + tid);
+
+  if (ignore_click) { ignore_click = false; return; }
+
+  if (!t.loaded) {
+    load_node(dir, tid);
+    document.getElementById('exp_' + tid).src = 'n_expanded.png';
+    document.getElementById('exp_' + tid).title = 'Click to collapse';
+    t.loaded = true;
+    return;
+  }
+
+  if (t.style.display == 'none') {
+    document.getElementById('exp_' + tid).src = 'n_expanded.png';
+    t.style.display = 'block';
+    document.getElementById('exp_' + tid).title = 'Click to collapse';
+    next_opacity('c_' + tid, 0);
+  } else {
+    document.getElementById('exp_' + tid).src = 'n_collapsed.png';
+    t.style.display = 'none';
+    document.getElementById('exp_' + tid).title = 'Click to expand';
+  }
+
+}
+
+
+/* Displays request or response dump in a faux window. */
+
+function show_dat(path, ignore) {
+  var out = document.getElementById('req_txtarea'),
+      cov = document.getElementById('cover');
+
+  document.body.style.overflow = 'hidden';
+
+  out.value = '';
+
+  var x = new XMLHttpRequest();
+  var content;
+
+  var pX = window.scrollX ? window.scrollX : document.body.scrollLeft;
+  var pY = window.scrollY ? window.scrollY : document.body.scrollTop;
+
+  out.parentNode.style.left = pX;
+  out.parentNode.style.top = pY;
+  cov.style.left = pX;
+  cov.style.top = pY;
+
+  out.parentNode.style.display = 'block';
+  cov.style.display = 'block';
+
+  x.open('GET', path + '/request.dat', false);
+  x.send(null);
+
+  content = '=== REQUEST ===\n\n' + x.responseText;
+
+  x.open('GET', path + '/response.dat', false);
+  x.send(null);
+
+  if (x.responseText.substr(0,5) == 'HTTP/') 
+    content += '\n=== RESPONSE ===\n\n' + x.responseText + '\n=== END OF DATA ===\n';
+  else content += '\n=== RESPONSE NOT AVAILABLE ===\n\n=== END OF DATA ===\n';
+
+  out.value = content;
+  delete x;
+
+  out.focus();
+
+  if (ignore) ignore_click = true;
+  return false;
+}
+
+/* Displays request or response dump in a proper window. */
+
+function show_win(path, ignore) {
+  var out = window.open('','_blank','scroll=yes,addressbar=no');
+  var x = new XMLHttpRequest();
+  var content;
+
+  x.open('GET', path + '/request.dat', false);
+  x.send(null);
+
+  content = '=== REQUEST ===\n\n' + x.responseText;
+
+  x.open('GET', path + '/response.dat', false);
+  x.send(null);
+
+  if (x.responseText.substr(0,5) == 'HTTP/') 
+    content += '\n=== RESPONSE ===\n\n' + x.responseText + '\n=== END OF DATA ===\n';
+  else content += '\n=== RESPONSE NOT AVAILABLE ===\n\n=== END OF DATA ===\n';
+
+  out.document.body.innerHTML = '<pre></pre>';
+
+  out.document.body.firstChild.appendChild(out.document.createTextNode(content));
+  delete x;
+
+  if (ignore) ignore_click = true;
+  return false;
+}
+
+
+/* Hides request view. */
+
+function hide_dat() {
+
+  /* Work around a glitch in WebKit. */
+
+  if (navigator.userAgent.indexOf('WebKit') == -1)
+    document.body.style.overflow = 'auto';
+  else
+    document.body.style.overflow = 'scroll';
+
+  document.getElementById('req_div').style.display = 'none';
+  document.getElementById('cover').style.display = 'none'
+}
+
+
+/* Loads issues, children for a node, renders HTML. */
+
+function load_node(dir, tid) {
+  var x = new XMLHttpRequest();
+  var t = document.getElementById('c_' + tid);
+
+  x.open('GET', dir + 'child_index.js', false);
+  x.send(null);
+  eval(x.responseText);
+
+  x.open('GET', dir + 'issue_index.js', false);
+  x.send(null);
+  eval(x.responseText);
+
+  delete x;
+
+  next_opacity('c_' + tid, 0);
+
+  if (issue.length > 0)
+    t.innerHTML += '<div class="issue_line"></div>';
+
+  for (var cno = 0; cno < issue.length; cno++) {
+    var i = issue[cno];
+    var add_html;
+
+    add_html = '<table><tr><td valign="top">\n';
+
+    switch (i.severity) {
+      case 0:  add_html += '<img src="i_note.png" title="Informational note">'; break;
+      case 1:  add_html += '<img src="i_warn.png" title="Internal warning">'; break;
+      case 2:  add_html += '<img src="i_low.png" title="Low risk or low specificity">'; break;
+      case 3:  add_html += '<img src="i_medium.png" title="Medium risk - data compromise">'; break;
+      case 4:  add_html += '<img src="i_high.png" title="High risk: system compromise">'; break;
+    }
+
+    add_html += '</td>\n<td><div style="issue_desc">' + issue_desc[i.type] + '</div>\n<ol>\n';
+
+    for (var cno2 = cno; cno2 < issue.length; cno2++) {
+      var i2 = issue[cno2];
+
+      if (i2.type != i.type) break;
+
+      if (i2.fetched) {
+        add_html += '<li><div class="fetch_info">' + 
+                    'Code: <span class="fetch_data">' + i2.code + '</span>, ' +
+                    'length: <span class="fetch_data">' + i2.len + '</span>, ' +
+                    'declared: <span class="fetch_data">' + H(i2.decl_mime) + '</span>, ';
+        if (i2.sniff_mime != '[none]') add_html +=
+                    'detected: <span class="fetch_data">' + H(i2.sniff_mime) + '</span>, ';
+        add_html += 'charset: <span class="fetch_data">' + H(i2.cset) + '</span> ' + 
+                    '[ <a href="#" onclick="return show_dat(\'' + dir + i2.dir + '\', false)">show trace</a> ' +
+                    '<a href="#" onclick="return show_win(\'' + dir + i2.dir + '\', false)">+</a> ]</div>\n';
+      } else {
+        add_html += '<li><div class="fetch_info">' + 
+                    'Fetch result: ' + i2.error + '</div>';
+      }
+  
+      if (i2.extra.length > 0) add_html += '<div class="comment">Memo: ' + H(i2.extra) + '</div>\n';
+
+    }
+
+    cno = cno2 - 1;
+    add_html += '</ol>\n';
+
+    add_html += '</td></tr></table>\n';
+    t.innerHTML += add_html;
+
+  }
+
+  if (issue.length > 0)
+    t.innerHTML += '<div class="issue_line"></div>';
+
+  for (var cno = 0; cno < child.length; cno++) {
+    var c = child[cno];
+    var has_child = false;
+    var add_html, cstr = '';
+
+    add_html = '<table><tr><td valign="top">\n';
+
+    if (c.dupe) cstr = 'class="idupe" ';
+
+    switch (c.type) {
+      case 10: add_html += '<img ' + cstr + 'src="p_serv.png" title="Server node">'; break;
+      case 11: add_html += '<img ' + cstr + 'src="p_dir.png" title="Directory node">'; break;
+      case 12: add_html += '<img ' + cstr + 'src="p_file.png" title="File node">'; break;
+      case 13: add_html += '<img ' + cstr + 'src="p_pinfo.png" title="Script-like file">'; break;
+      case 100: add_html += '<img ' + cstr + 'src="p_param.png" title="GET or POST parameter">'; break;
+      case 101: add_html += '<img ' + cstr + 'src="p_value.png" title="Alternative parameter value">'; break;
+      default: add_html += '<img ' + cstr + 'src="p_unknown.png" title="Unknown node">';
+    }
+
+    if (c.child_cnt > 0 || c.issue_cnt[0] + c.issue_cnt[1] + c.issue_cnt[2] +
+        c.issue_cnt[3] + c.issue_cnt[4] > 0) {
+      add_html += '</td>\n<td class="child_ctr_exp" onclick="toggle_node(\'' +  
+                   dir + c.dir + '/\', ' + c_count + ')"' + '>';
+      has_child = true;
+    } else {
+      add_html += '</td>\n<td class="child_ctr">'
+    }
+
+    if (has_child)
+      add_html += '<img src="n_collapsed.png" id="exp_' + c_count + '"' +
+                  ' title="Click to expand">\n';
+
+    if (c.missing) {
+      if (c.linked == 2) 
+        add_html += '<img src="n_missing.png" title="Resource missing">';
+      else
+        add_html += '<img src="n_maybe_missing.png" ' + 
+                    'title="Resource missing (guessed link)">';
+    }
+
+    if (!c.fetched) 
+      add_html += '<img src="n_failed.png" title="Fetch failed">';
+
+    if (c.dupe) add_html += '<img src="n_clone.png" title="Suspected duplicate">' + 
+      '<span class="dupe_name" title="' + H(c.url) + '">' + H(TRUNC(c.name)) + '</span>\n';
+    else add_html += '<span class="name" title="' + H(c.url) + '">' + H(TRUNC(c.name)) + '</span>\n';
+
+    if (c.linked == 0)
+      add_html += '<img src="n_unlinked.png" title="Not linked (brute-forced)" class="i2">';
+
+    add_html += '<span id="child_info">';
+
+    if (c.issue_cnt[4] > 0)
+      add_html += '<img class="i2" src="i_high.png" title="High risk">' + c.issue_cnt[4];
+
+    if (c.issue_cnt[3] > 0)
+      add_html += '<img class="i2" src="i_medium.png" title="Medium risk">' + c.issue_cnt[3];
+
+    if (c.issue_cnt[2] > 0)
+      add_html += '<img class="i2" src="i_low.png" title="Low risk">' + c.issue_cnt[2];
+
+    if (c.issue_cnt[1] > 0)
+      add_html += '<img class="i2" src="i_warn.png" title="Warnings">' + c.issue_cnt[1];
+
+    if (c.issue_cnt[0] > 0)
+      add_html += '<img class="i2" src="i_note.png"  title="Notes">' + c.issue_cnt[0];
+
+    if (c.child_cnt > 0)
+      add_html += '<img class="i2" src="n_children.png" title="Unique children nodes">' + c.child_cnt;
+
+    add_html += '</span>\n';
+
+    if (c.fetched) {
+      add_html += '<div class="fetch_info">' + 
+                  'Code: <span class="fetch_data">' + c.code + '</span>, ' +
+                  'length: <span class="fetch_data">' + c.len + '</span>, ' +
+                  'declared: <span class="fetch_data">' + H(c.decl_mime) + '</span>, ';
+      if (c.sniff_mime != '[none]') add_html +=
+                  'detected: <span class="fetch_data">' + H(c.sniff_mime) + '</span>, ';
+
+      if (has_child)
+        add_html += 'charset: <span class="fetch_data">' + H(c.cset) + '</span> ' + 
+                    '[ <a href="#" onclick="return show_dat(\'' + dir + c.dir + '\', true)">show trace</a> ' +
+                    '<a href="#" onclick="return show_win(\'' + dir + c.dir + '\', true)">+</a> ]</div>\n';
+      else
+        add_html += 'charset: <span class="fetch_data">' + H(c.cset) + '</span> ' + 
+                    '[ <a href="#" onclick="return show_dat(\'' + dir + c.dir + '\', false)">show trace</a> ' +
+                    '<a href="#" onclick="return show_win(\'' + dir + c.dir + '\', false)">+</a> ]</div>\n';
+    } else {
+      add_html += '<div class="fetch_info">' + 
+                  'Fetch result: ' + c.error + '</div>\n';
+    }
+
+    if (has_child) add_html += '</tr><tr>\n<td></td>\n<td id="c_' + c_count + '">';
+
+    add_html += '</td></tr></table>\n';
+    t.innerHTML += add_html;
+    c_count++;
+
+  }
+
+}
+
+
+/* Picks the lesser of two evils. */
+
+function MIN(a,b) { if (a > b) return b; else return a; }
+
+
+/* Toggles visibility of a summary view. */
+
+function show_sum(t) {
+  var target = t.nextSibling.nextSibling.nextSibling.nextSibling;
+  if (target.style.display == 'block') {
+    target.style.display = 'none';
+  } else {
+    next_opacity(target.id, 0);
+    target.style.display = 'block';
+  }
+}
+
+
+/* Loads MIME summaries. */
+
+function load_mime_summaries() {
+  var t = document.getElementById('doc_types');
+
+  for (var cno = 0; cno < mime_samples.length; cno++) {
+    var m = mime_samples[cno], limit = MIN(max_samples, m.samples.length);
+    var add_html;
+
+    add_html = '<table><tr><td valign="top"><img src="mime_entry.png"></td>\n<td valign="top">';
+    add_html += '<span class="sum_name" onclick="show_sum(this)">' + H(m.mime) + '</span>\n<span class="s_cnt">(' + 
+                m.samples.length + ')</span>\n<ol id="sum_' + (c_count++) + '" style="display: none">\n';
+
+    for (var sno = 0; sno < limit; sno++) {
+
+      add_html += '<li><a target="_blank" href="' + H(m.samples[sno].url) + '">' + H(m.samples[sno].url) + '</a> ';
+
+      if (m.samples[sno].linked == 0)
+        add_html += '<img src="n_unlinked.png" title="Not linked (brute-forced)" class="i3"> ';
+
+      add_html += '<span class="s_cnt">(' + m.samples[sno].len + ' bytes)</span> <span class="fetch_info">' +
+                  '[ <a href="#" onclick="return show_dat(\'' + m.samples[sno].dir + '\', false)">show trace</a> ' +
+                  '<a href="#" onclick="return show_win(\'' + m.samples[sno].dir + '\', false)">+</a> ]</span>\n';
+
+    }
+
+    add_html += '</ol></tr></td></table>\n';
+
+    t.innerHTML += add_html;
+
+  }
+
+}
+
+
+/* Loads issue summaries. */
+
+function load_issue_summaries() {
+  var t = document.getElementById('issue_types');
+
+  for (var cno = 0; cno < issue_samples.length; cno++) {
+    var i = issue_samples[cno], limit = MIN(max_samples, i.samples.length);
+    var add_html;
+
+    add_html = '<table><tr><td valign="top">';
+
+    switch (i.severity) {
+      case 0:  add_html += '<img src="i_note.png" title="Informational note">'; break;
+      case 1:  add_html += '<img src="i_warn.png" title="Internal warning">'; break;
+      case 2:  add_html += '<img src="i_low.png" title="Low risk or low specificity">'; break;
+      case 3:  add_html += '<img src="i_medium.png" title="Medium risk - data compromise">'; break;
+      case 4:  add_html += '<img src="i_high.png" title="High risk: system compromise">'; break;
+    }
+
+    add_html += '</td>\n<td valign="top"><span class="sum_name" onclick="show_sum(this)">' + 
+      issue_desc[i.type] + '</span>\n<span class="s_cnt">(' + i.samples.length + ')</span>\n' +
+      '<ol id="sum_' + (c_count++) + '" style="display: none">\n';
+
+    for (var sno = 0; sno < limit; sno++) {
+
+      add_html += '<li> <a target="_blank" href="' + H(i.samples[sno].url) + '">' + H(i.samples[sno].url) + '</a> <span class="fetch_info">' +
+                  '[ <a href="#" onclick="return show_dat(\'' + i.samples[sno].dir + '\', false)">show trace</a> ' +
+                  '<a href="#" onclick="return show_win(\'' + i.samples[sno].dir + '\', false)">+</a> ]</span>\n';
+
+      if (i.samples[sno].extra && i.samples[sno].extra.length > 0) 
+        add_html += '<div class="comment">Memo: ' + H(i.samples[sno].extra) + '</div>\n';
+
+    }
+
+    add_html += '</ol></tr></td></table>\n';
+
+    t.innerHTML += add_html;
+
+  }
+
+}
+
+
+/* Warns about CSS support issues. */
+
+if ('\v' == 'v')
+  alert('WARNING: This page works better with Firefox, Safari, Chrome, Opera, etc.\n\n' +
+        'Known problems in Internet Explorer include incorrectly rendered PNG icons, cursors,\n' +
+        'HTML request dumps, incorrect CSS padding for many elements, and so forth. To my best\n' +
+        'knowledge, these patterns trace back to problems with MSIE, not with this viewer.');
+
+</script>
+
+</head>
+<body onload="initialize()">
+
+<img src="sf_name.png" width="203" height="93" style="float: left">
+
+<div class="req_div" id="req_div">
+<div class="req_hdr" id="req_hdr" onclick="hide_dat()">HTTP trace - click this bar or hit ESC to close</div>
+<textarea class="req_txtarea" id="req_txtarea" readonly onkeyup="if (event.keyCode == 27) hide_dat();"></textarea>
+</div>
+<div id="cover" class="cover"></div>
+
+<table class="hdr_table">
+<tr><td class="summary1">Scanner version:</td><td class="summary2" id="sf_version"></td>
+<td class="summary1">Scan date:</td><td class="summary2" id="scan_date"></td></tr>
+<tr><td class="summary1">Random seed:</td><td class="summary2" id="scan_seed"></td>
+<td class="summary1">Total time:</td><td class="summary2" id="scan_time"></td></tr>
+</table>
+
+<br clear="all">
+
+<h2>Crawl results - click to expand:</h2>
+<div id="c_root" class="child_ctr">
+</div>
+
+<h2 class="supp_cat">Document type overview - click to expand:</h2>
+<div id="doc_types">
+</div>
+
+<h2 class="supp_cat">Issue type overview - click to expand:</h2>
+<div id="issue_types">
+</div>
+<p>
+<span class="fetch_info">NOTE: 100 samples maximum per issue or document type.</span>
+
diff --git a/assets/mime_entry.png b/assets/mime_entry.png
new file mode 100644
index 0000000..12eb14d
Binary files /dev/null and b/assets/mime_entry.png differ
diff --git a/assets/n_children.png b/assets/n_children.png
new file mode 100644
index 0000000..4be7913
Binary files /dev/null and b/assets/n_children.png differ
diff --git a/assets/n_clone.png b/assets/n_clone.png
new file mode 100644
index 0000000..38939fb
Binary files /dev/null and b/assets/n_clone.png differ
diff --git a/assets/n_collapsed.png b/assets/n_collapsed.png
new file mode 100644
index 0000000..ab776bc
Binary files /dev/null and b/assets/n_collapsed.png differ
diff --git a/assets/n_expanded.png b/assets/n_expanded.png
new file mode 100644
index 0000000..ea1ead3
Binary files /dev/null and b/assets/n_expanded.png differ
diff --git a/assets/n_failed.png b/assets/n_failed.png
new file mode 100644
index 0000000..07c3b1f
Binary files /dev/null and b/assets/n_failed.png differ
diff --git a/assets/n_maybe_missing.png b/assets/n_maybe_missing.png
new file mode 100644
index 0000000..3f53a38
Binary files /dev/null and b/assets/n_maybe_missing.png differ
diff --git a/assets/n_missing.png b/assets/n_missing.png
new file mode 100644
index 0000000..84cce41
Binary files /dev/null and b/assets/n_missing.png differ
diff --git a/assets/n_unlinked.png b/assets/n_unlinked.png
new file mode 100644
index 0000000..659234c
Binary files /dev/null and b/assets/n_unlinked.png differ
diff --git a/assets/p_dir.png b/assets/p_dir.png
new file mode 100644
index 0000000..e6d20ee
Binary files /dev/null and b/assets/p_dir.png differ
diff --git a/assets/p_file.png b/assets/p_file.png
new file mode 100644
index 0000000..6d3f7e5
Binary files /dev/null and b/assets/p_file.png differ
diff --git a/assets/p_param.png b/assets/p_param.png
new file mode 100644
index 0000000..df958fa
Binary files /dev/null and b/assets/p_param.png differ
diff --git a/assets/p_pinfo.png b/assets/p_pinfo.png
new file mode 100644
index 0000000..59d6879
Binary files /dev/null and b/assets/p_pinfo.png differ
diff --git a/assets/p_serv.png b/assets/p_serv.png
new file mode 100644
index 0000000..b3cf7a0
Binary files /dev/null and b/assets/p_serv.png differ
diff --git a/assets/p_unknown.png b/assets/p_unknown.png
new file mode 100644
index 0000000..ab6b05e
Binary files /dev/null and b/assets/p_unknown.png differ
diff --git a/assets/p_value.png b/assets/p_value.png
new file mode 100644
index 0000000..cbbd4a6
Binary files /dev/null and b/assets/p_value.png differ
diff --git a/assets/sf_name.png b/assets/sf_name.png
new file mode 100644
index 0000000..2b43202
Binary files /dev/null and b/assets/sf_name.png differ
diff --git a/config.h b/config.h
new file mode 100644
index 0000000..4873c67
--- /dev/null
+++ b/config.h
@@ -0,0 +1,242 @@
+/*
+   skipfish - configurable settings
+   --------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_CONFIG_H
+#define _HAVE_CONFIG_H
+
+#define VERSION "1.00b"
+
+#define USE_COLOR               1       /* Use terminal colors             */
+
+/* Various default settings for HTTP client (cmdline override): */
+
+#define MAX_CONNECTIONS         50      /* Simultaneous connection cap     */
+#define MAX_CONN_HOST           10      /* Per-host connction cap          */
+#define MAX_REQUESTS            1e8     /* Total request count cap         */
+#define MAX_FAIL                100     /* Max consecutive failed requests */
+#define RW_TMOUT                10      /* Individual network R/W timeout  */
+#define RESP_TMOUT              20      /* Total request time limit        */
+#define IDLE_TMOUT              10      /* Connection tear down threshold  */
+#define SIZE_LIMIT              200000  /* Response size cap               */
+#define MAX_GUESSES             256     /* Guess-based wordlist size limit */
+
+/* HTTP client constants: */
+
+#define MAX_URL_LEN             1024    /* Maximum length of an URL        */
+#define MAX_DNS_LEN             255     /* Maximum length of a host name   */
+#define READ_CHUNK              4096    /* Read buffer size                */
+
+/* Define this to use FILO, rather than FIFO, scheduling for new requests.
+   FILO ensures a more uniform distribution of requests when fuzzing multiple
+   directories at once, but may reduce the odds of spotting some stored
+   XSSes, and increase memory usage a bit. */
+
+// #define QUEUE_FILO              1
+
+/* Dummy file to upload to the server where possible. */
+
+#define DUMMY_EXT               "gif"
+#define DUMMY_FILE              "GIF89a,\x01<html>"
+#define DUMMY_MIME              "image/gif"
+
+/* Allocator settings: */
+
+#define MAX_ALLOC       0x50000000      /* Refuse larger allocations.      */
+
+/* Configurable settings for crawl database (cmdline override): */
+
+#define MAX_DEPTH       16              /* Maximum crawl tree depth        */
+#define MAX_CHILDREN    1024            /* Maximum children per tree node  */
+
+#define DEF_WORDLIST    "skipfish.wl"   /* Default wordlist file           */
+
+/* Crawl / analysis constants: */
+
+#define MAX_WORD        64              /* Maximum wordlist item length    */
+#define GUESS_PROB      50              /* Guess word addition probability */
+#define WORD_HASH       256             /* Hash table for wordlists        */
+#define SNIFF_LEN       1024            /* MIME sniffing buffer size       */
+#define MAX_SAMPLES     1024            /* Max issue / MIME samples        */
+
+/* Page fingerprinting constants: */
+
+#define FP_SIZE         10              /* Page fingerprint size           */
+#define FP_MAX_LEN      15              /* Maximum word length to count    */
+#define FP_T_REL        5               /* Relative matching tolerance (%) */
+#define FP_T_ABS        6               /* Absolute matching tolerance     */
+#define FP_B_FAIL       3               /* Max number of failed buckets    */
+
+#define BH_CHECKS       15              /* Page verification check count   */
+
+/* Crawler / probe constants: */
+
+#define BOGUS_FILE     "sfi9876"        /* Name that should not exist      */
+#define MAX_404        4                /* Maximum number of 404 sigs      */
+#define PAR_MAX_DIGITS 6                /* Max digits in a fuzzable int    */
+#define PAR_INT_FUZZ   100              /* Fuzz by + / - this much         */
+
+#ifdef QUEUE_FILO
+#define DICT_BATCH     200              /* Brute-force queue block         */
+#else
+#define DICT_BATCH     1000             /* Brute-force queue block         */
+#endif /* ^QUEUE_FILO */
+
+/* Single query for IPS detection - Evil Query of Doom (tm). */
+
+#define IPS_TEST \
+  "?_test1=c:\\windows\\system32\\cmd.exe" \
+  "&_test2=/etc/passwd" \
+  "&_test3=|/bin/sh" \
+  "&_test4=(SELECT * FROM nonexistent) --" \
+  "&_test5=>/no/such/file" \
+  "&_test6=<script>alert(1)</script>" \
+  "&_test7=javascript:alert(1)"
+
+/* A benign query with a similar character set to compare with EQoD. */
+
+#define IPS_SAFE \
+  "?_test1=ccddeeeimmnossstwwxy.:\\\\\\" \
+  "&_test2=acdepsstw//" \
+  "&_test3=bhins//" \
+  "&_test4=CEEFLMORSTeeinnnosttx--*" \
+  "&_test5=cefhilnosu///" \
+  "&_test6=acceiilpprrrssttt1)(" \
+  "&_test7=aaaceijlprrsttv1):("
+
+/* XSRF token detector settings: */
+
+#define XSRF_B16_MIN    8               /* Minimum base10/16 token length  */
+#define XSRF_B16_MAX    45              /* Maximum base10/16 token length  */
+#define XSRF_B16_NUM    2               /* ...minimum digit count          */
+#define XSRF_B64_MIN    6               /* Minimum base32/64 token length  */
+#define XSRF_B64_MAX    32              /* Maximum base32/64 token length  */
+#define XSRF_B64_NUM    1               /* ...minimum digit count &&       */
+#define XSRF_B64_CASE   2               /* ...minimum uppercase count      */
+#define XSRF_B64_NUM2   3               /* ...digit count override         */
+#define XSRF_B64_SLASH  2               /* ...maximum slash count          */
+
+#ifdef _VIA_DATABASE_C
+
+/* Domains we always trust (identical to -B options). These entries do not
+   generate cross-domain content inclusion warnings. NULL-terminated. */
+
+static const char* always_trust_domains[] = {
+  ".google-analytics.com",
+  ".googleapis.com",
+  ".googleadservices.com",
+  ".googlesyndication.com",
+  "www.w3.org",
+  0
+};
+
+#endif /* _VIA_DATABASE_C */
+
+#ifdef _VIA_ANALYSIS_C
+
+/* NULL-terminated list of JSON-like response prefixes we consider to
+   be sufficiently safe against cross-site script inclusion (courtesy
+   ratproxy). */
+
+static const char* json_safe[] = {
+  "while(1);",                          /* Parser looping                  */
+  "while (1);",                         /* ...                             */
+  "while(true);",                       /* ...                             */
+  "while (true);",                      /* ...                             */
+  "&&&",                                /* Parser breaking                 */
+  "//OK[",                              /* Line commenting                 */
+  "{\"",                                /* Serialized object               */
+  "{{\"",                               /* Serialized object               */
+  "throw 1; <",                         /* Magical combo                   */
+  ")]}'",                               /* Recommended magic               */
+  0
+};
+
+/* NULL-terminated list of known valid charsets. Charsets not on the list are
+   considered dangerous (as they may trigger charset sniffing).
+
+   Note that many common misspellings, such as "utf8", are not valid and NOT
+   RECOGNIZED by browsers, leading to content sniffing. Do not add them here.
+
+   Also note that SF does not support encoding not compatible with US ASCII
+   transport (e.g., UTF-16, UTF-32). Lastly, variable-length encodings
+   other than utf-8 may have character consumption issues that are not
+   tested for at this point. */
+
+static const char* valid_charsets[] = {
+  "utf-8",                              /* Valid 8-bit safe Unicode       */
+  "iso8859-1",                          /* Western Europe                 */
+  "iso8859-2",                          /* Central Europe                 */
+  "iso8859-15",                         /* New flavor of ISO8859-1        */
+  "iso8859-16",                         /* New flavor of ISO8859-2        */
+  "iso-8859-1",                         /* Browser-supported misspellings */
+  "iso-8859-2",                         /* -                              */
+  "iso-8859-15",                        /* -                              */
+  "iso-8859-16",                        /* -                              */
+  "windows-1252",                       /* Microsoft's Western Europe     */
+  "windows-1250",                       /* Microsoft's Central Europe     */
+  "us-ascii",                           /* Old school but generally safe  */
+  "koi8-r",                             /* 8-bit and US ASCII compatible  */
+  0
+};
+
+
+/* Default form auto-fill rules - used to pair up form fields with fun
+   values! Do not attempt security attacks here, though - this is to maximize
+   crawl coverage, not to exploit anything. The last item must have a name
+   of NULL, and the value will be used as a default option when no other
+   matches found. */
+
+static const char* form_suggestion[][2] = {
+
+  { "phone"    , "6505550100" },        /* Reserved */
+  { "zip"      , "94043" },
+  { "first"    , "John"  },
+  { "last"     , "Smith" },
+  { "name"     , "Smith" },
+  { "mail"     , "skipfish@example.com" },
+  { "street"   , "1600 Amphitheatre Pkwy" },
+  { "city"     , "Mountain View" },
+  { "state"    , "CA" },
+  { "country"  , "US" },
+  { "language" , "en" },
+  { "company"  , "ACME" },
+  { "search"   , "skipfish" },
+  { "login"    , "skipfish" },
+  { "user"     , "skipfish" },
+  { "pass"     , "skipfish" },
+  { "year"     , "2010" },
+  { "card"     , "4111111111111111" }, /* Reserved */
+  { "code"     , "000" },
+  { "cvv"      , "000" },
+  { "expir"    , "1212" },
+  { "ssn"      , "987654320" },        /* Reserved */
+  { "url"      , "http://example.com/?sfish_form_test" },
+  { "site"     , "http://example.com/?sfish_form_test" },
+  { "domain"   , "example.com" },
+  { "search"   , "a" },
+  { NULL       , "1" }
+
+};
+
+#endif /* _VIA_ANALYSIS_C */
+
+#endif /* ! _HAVE_CONFIG_H */
diff --git a/crawler.c b/crawler.c
new file mode 100644
index 0000000..b2219a5
--- /dev/null
+++ b/crawler.c
@@ -0,0 +1,2776 @@
+/*
+   skipfish - crawler state machine
+   --------------------------------
+
+   Includes dictionary and security injection logic.
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#define _VIA_CRAWLER_C
+
+#include "debug.h"
+#include "config.h"
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+#include "crawler.h"
+#include "analysis.h"
+
+u32 crawl_prob = 100;    /* Crawl probability (1-100%)     */
+u8  no_fuzz_ext;         /* Don't fuzz extensions for dirs */
+
+/*
+
+  *************************
+  **** GENERAL HELPERS ****
+  *************************
+
+  Assorted functions used by all the crawl callbacks for manipulating
+  requests, parsing responses, etc.
+
+ */
+
+
+/* Classifies a response, with a special handling of "unavailable" and
+   "gateway timeout" codes. */
+
+#define FETCH_FAIL(_res) ((_res)->state != STATE_OK || (_res)->code == 503 || \
+  (_res)->code == 504)
+
+/* Dumps request, response (for debugging only). */
+
+u8 show_response(struct http_request* req, struct http_response* res) {
+
+  dump_http_request(req);
+
+  if (FETCH_FAIL(res)) {
+    SAY("^^^ REQUEST SHOWN ABOVE CAUSED ERROR: %d ^^^\n", res->state);
+    return 0;
+  }
+
+  dump_http_response(res);
+
+  return 0; /* Do not keep req/res */
+
+}
+
+
+/* Strips trailing / from a directory request, optionally replaces it with
+   a new value. */
+
+static void replace_slash(struct http_request* req, u8* new_val) {
+  u32 i;
+
+  for (i=0;i<req->par.c;i++)
+    if (req->par.t[i] == PARAM_PATH && !req->par.n[i] && !req->par.v[i][0]) {
+      if (new_val) {
+        ck_free(req->par.v[i]);
+        req->par.v[i] = ck_strdup(new_val);
+      } else req->par.t[i] = PARAM_NONE;
+      return;
+    }
+
+  /* Could not find a slash segment - create a new segment instead. */
+
+  set_value(PARAM_PATH, 0, new_val, -1, &req->par);
+
+}
+
+
+/* Releases children for crawling (called once parent node had 404, IPS
+   probes done, etc). Note that non-directories might have locked
+   children too. */
+
+static void unlock_children(struct pivot_desc* pv) {
+  u32 i;
+
+  DEBUG_HELPER(pv);
+
+  for (i=0;i<pv->child_cnt;i++)
+    if (pv->child[i]->state == PSTATE_PENDING) {
+
+      pv->child[i]->state = PSTATE_FETCH;
+
+      if (!pv->child[i]->res) async_request(pv->child[i]->req);
+      else switch (pv->child[i]->type) {
+
+        case PIVOT_DIR:     fetch_dir_callback(pv->req, pv->res); break;
+        case PIVOT_PARAM:
+        case PIVOT_FILE:    fetch_file_callback(pv->req, pv->res); break;
+        case PIVOT_UNKNOWN: fetch_unknown_callback(pv->req, pv->res); break;
+        default: FATAL("Unknown pivot type '%u'", pv->type);
+
+      }
+
+    }
+
+}
+
+
+/* Handles response error for callbacks in a generalized manner. If 'stop' is
+   1, marks the entire pivot as busted, unlocks children. */
+
+static void handle_error(struct http_request* req, struct http_response* res,
+                         u8* desc, u8 stop) {
+
+  DEBUG_CALLBACK(req, res);
+
+  if (res->state == STATE_SUPPRESS) {
+    problem(PROB_LIMITS, req, res, (u8*)"Too many previous fetch failures",
+            req->pivot, 0);
+  } else {
+    problem(PROB_FETCH_FAIL, req, res, desc, req->pivot, 0);
+  }
+
+  if (stop) {
+    req->pivot->state = PSTATE_DONE;
+    unlock_children(req->pivot);
+  }
+
+}
+
+
+/* Finds nearest "real" directory parent, so that we can consult it for 404
+   signatures, etc. Return NULL if dir found, but signature-less. */
+
+static struct pivot_desc* dir_parent(struct pivot_desc* pv) {
+  struct pivot_desc* ret;
+
+  ret = pv->parent;
+
+  while (ret && ret->type != PIVOT_DIR && ret->type != PIVOT_SERV) 
+    ret = ret->parent;
+
+  if (ret && !ret->r404_cnt) return NULL;
+  return ret;
+}
+
+
+/* Deletes any cached requests and responses stored by injection probes. */
+
+static void destroy_misc_data(struct pivot_desc* pv,
+                              struct http_request* self) {
+  u32 i;
+
+  for (i=0;i<10;i++) {
+
+    if (pv->misc_req[i] != self) {
+
+      if (pv->misc_req[i])
+        destroy_request(pv->misc_req[i]);
+
+      if (pv->misc_res[i])
+        destroy_response(pv->misc_res[i]);
+
+    }
+
+    pv->misc_req[i] = NULL;
+    pv->misc_res[i] = NULL;
+
+  }
+
+  pv->misc_cnt = 0;
+
+}
+
+
+
+/*
+
+  ***************************************
+  **** ASSORTED FORWARD DECLARATIONS ****
+  ***************************************
+
+ */
+
+static u8 dir_404_callback(struct http_request*, struct http_response*);
+static u8 dir_ips_callback(struct http_request*, struct http_response*);
+static void inject_init(struct pivot_desc*);
+static void crawl_dir_dict_init(struct pivot_desc*);
+static u8 dir_dict_callback(struct http_request*, struct http_response*);
+static u8 inject_check0_callback(struct http_request*, struct http_response*);
+static u8 inject_check1_callback(struct http_request*, struct http_response*);
+static u8 inject_check2_callback(struct http_request*, struct http_response*);
+static u8 inject_check3_callback(struct http_request*, struct http_response*);
+static u8 inject_check4_callback(struct http_request*, struct http_response*);
+static u8 inject_check5_callback(struct http_request*, struct http_response*);
+static u8 inject_check6_callback(struct http_request*, struct http_response*);
+static u8 inject_check7_callback(struct http_request*, struct http_response*);
+static u8 inject_check8_callback(struct http_request*, struct http_response*);
+static u8 inject_check9_callback(struct http_request*, struct http_response*);
+static void crawl_par_numerical_init(struct pivot_desc*);
+static u8 par_check_callback(struct http_request*, struct http_response*);
+static u8 unknown_check_callback(struct http_request*, struct http_response*);
+static u8 par_numerical_callback(struct http_request*, struct http_response*);
+static u8 par_dict_callback(struct http_request*, struct http_response*);
+static u8 par_trylist_callback(struct http_request*, struct http_response*);
+static void crawl_par_dict_init(struct pivot_desc*);
+static void crawl_parametric_init(struct pivot_desc*);
+static void end_injection_checks(struct pivot_desc*);
+static u8 par_ognl_callback(struct http_request*, struct http_response*);
+
+
+/*
+
+  ********************************
+  **** CASE-SENSITIVITY CHECK ****
+  ********************************
+
+ */
+
+static u8 check_case_callback(struct http_request* req,
+                              struct http_response* res) {
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    RPAR(req)->c_checked = 0;
+    return 0;
+  }
+
+  if (!same_page(&res->sig, &RPRES(req)->sig))
+    RPAR(req)->csens = 1;
+
+  return 0;
+
+}
+
+
+static void check_case(struct pivot_desc* pv) {
+  u32 i, len;
+  s32 last = -1;
+  struct http_request* n;
+
+  if (pv->parent->c_checked) return;
+
+  DEBUG_HELPER(pv);
+
+  for (i=0;i<pv->req->par.c;i++)
+    if (PATH_SUBTYPE(pv->req->par.t[i]) && pv->req->par.v[i][0]) last = i;
+
+  if (last < 0) return;
+
+  len = strlen((char*)pv->req->par.v[last]);
+
+  for (i=0;i<len;i++) if (isalpha(pv->req->par.v[last][i])) break;
+
+  if (i == len) return;
+
+  pv->parent->c_checked = 1;
+
+  n = req_copy(pv->req, pv, 1);
+  n->callback = check_case_callback;
+
+  /* Change case. */
+
+  n->par.v[last][i] = islower(n->par.v[last][i]) ? toupper(n->par.v[last][i]) :
+                      tolower(n->par.v[last][i]);
+
+  DEBUG("* candidate parameter: %s -> %s\n", pv->req->par.v[last],
+        n->par.v[last]);
+
+  async_request(n);
+
+}
+
+
+/*
+
+  ************************************
+  **** SECONDARY EXTENSION PROBES ****
+  ************************************
+
+  For each new entry discovered through brute-force that already bears an
+  extension, we should also try appending a secondary extension. This is to
+  spot things such as foo.php.old, .inc, .gz, etc.
+
+ */
+
+
+/* Schedules secondary extension tests, if warranted; is_param set to 1
+   if this is a parametric node, 0 if the last path segment needs to be
+   checked. */
+
+static void secondary_ext_init(struct pivot_desc* pv, struct http_request* req,
+                               struct http_response* res, u8 is_param) {
+
+  u8 *base_name, *fpos, *lpos, *ex;
+  s32 tpar = -1, i = 0, spar = -1;
+
+  DEBUG_HELPER(req->pivot);
+  DEBUG_HELPER(pv);
+
+  if (is_param) {
+
+    tpar = pv->fuzz_par;
+
+  } else {
+
+    /* Find last path segment other than NULL-''. */
+    for (i=0;i<req->par.c;i++)
+      if (PATH_SUBTYPE(req->par.t[i])) {
+        if ((req->par.t[i] == PARAM_PATH &&
+            !req->par.n[i] && !req->par.v[i][0])) spar = i; else tpar = i;
+      }
+
+  }
+
+  if (tpar < 0) return;
+
+  base_name = req->par.v[tpar];
+
+  /* Reject parameters with no '.' (unless in no_fuzz_ext mode),
+     with too many '.'s, or '.' in an odd location. */
+
+  fpos = (u8*)strchr((char*)base_name, '.');
+
+  if (!no_fuzz_ext || fpos)
+    if (!fpos || fpos == base_name || !fpos[1]) return;
+
+  lpos = (u8*)strrchr((char*)base_name, '.');
+
+  if (fpos != lpos) return;
+
+  i = 0;
+
+  while ((ex = wordlist_get_extension(i))) {
+    u8* tmp = ck_alloc(strlen((char*)base_name) + strlen((char*)ex) + 2);
+    u32 c;
+
+    sprintf((char*)tmp, "%s.%s", base_name, ex);
+
+    /* Matching child? If yes, don't bother. */
+
+    for (c=0;c<pv->child_cnt;c++)
+      if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+          (char*)pv->child[c]->name))) break;
+
+    /* Matching current node? */
+
+    if (pv->fuzz_par != -1 &&
+        !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+        (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt;
+
+    if (c == pv->child_cnt) {
+      struct http_request* n = req_copy(req, pv, 1);
+
+      /* Remove trailing slash if present. */
+      if (spar >= 0) n->par.t[spar] = PARAM_NONE;
+
+      ck_free(n->par.v[tpar]);
+      n->par.v[tpar] = tmp;
+
+      n->user_val = 1;
+
+      memcpy(&n->same_sig, &res->sig, sizeof(struct http_sig));
+
+      n->callback = is_param ? par_dict_callback : dir_dict_callback;
+      /* Both handlers recognize user_val == 1 as a special indicator. */
+      async_request(n);
+
+    } else ck_free(tmp);
+
+    i++;
+  }
+
+}
+
+
+/*
+
+  ************************************
+  **** SECURITY INJECTION TESTING ****
+  ************************************
+
+  Generic attack vector injection tests for directories, parameters, etc.
+
+ */
+
+/* Internal helper macros: */
+
+#define TPAR(_req) ((_req)->par.v[(_req)->pivot->fuzz_par])
+
+#define SET_VECTOR(_state, _req, _str) do { \
+    if (_state == PSTATE_CHILD_INJECT) { \
+      replace_slash((_req), (u8*)_str); \
+    } else { \
+      ck_free(TPAR(_req)); \
+      TPAR(_req) = ck_strdup((u8*)_str); \
+    } \
+  } while (0)
+
+#define APPEND_VECTOR(_state, _req, _str) do { \
+    if (_state == PSTATE_CHILD_INJECT) { \
+      replace_slash((_req), (u8*)_str); \
+    } else { \
+      u8* _n = ck_alloc(strlen((char*)TPAR(_req)) + strlen((char*)_str) + 1); \
+      sprintf((char*)_n, "%s%s", TPAR(_req), _str); \
+      ck_free(TPAR(_req)); \
+      TPAR(_req) = _n; \
+    } \
+  } while (0)
+
+
+/* Common initialization of security injection attacks. */
+
+static void inject_init(struct pivot_desc* pv) {
+  struct http_request* n;
+  u32 i;
+
+  /* pv->state may change after async_request() calls in
+     insta-fail mode, so we should cache accordingly. */
+
+  DEBUG_HELPER(pv);
+
+  /* CHECK 0: See if the response is stable. If it fluctuates
+     randomly, we probably need to skip injection tests. */
+
+  pv->misc_cnt = BH_CHECKS;
+
+  for (i=0;i<BH_CHECKS;i++) {
+    n = req_copy(pv->req, pv, 1);
+    n->callback = inject_check0_callback;
+    n->user_val = i;
+    async_request(n);
+  }
+
+}
+
+
+/* CALLBACK FOR CHECK 0: Confirms that the location is behaving
+   reasonably. */
+
+static u8 inject_check0_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+  u8* tmp = NULL;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during page variability checks", 0);
+  } else {
+    if (!same_page(&RPRES(req)->sig, &res->sig)) {
+      req->pivot->res_varies = 1;
+      problem(PROB_VARIES, req, res, 0, req->pivot, 0);
+    }
+  }
+
+  if ((--req->pivot->misc_cnt)) return 0;
+
+  /* If response fluctuates, do not perform any injection checks at all. */
+
+  if (req->pivot->res_varies) {
+    end_injection_checks(req->pivot);
+    return 0;
+  }
+
+  /* CHECK 1: Directory listing - 4 requests. The logic here is a bit
+     different for parametric targets (which are easy to examine with 
+     a ./ trick) and directories (which require a more complex 
+     comparison). */
+
+  req->pivot->misc_cnt = 0;
+
+  n = req_copy(req->pivot->req, req->pivot, 1);
+
+  if (orig_state == PSTATE_CHILD_INJECT) {
+    replace_slash(n, (u8*)".");
+    set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par);
+  } else {
+    tmp = ck_alloc(strlen((char*)TPAR(n)) + 5);
+    sprintf((char*)tmp, ".../%s", TPAR(n));
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup(tmp);
+    req->pivot->i_skip_add = 6;
+  }
+
+  n->callback = inject_check1_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(req->pivot->req, req->pivot, 1);
+
+  if (orig_state == PSTATE_CHILD_INJECT) {
+    replace_slash(n, (u8*)".sf");
+    set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par);
+  } else {
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup(tmp + 2);
+  }
+
+  n->callback = inject_check1_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  n = req_copy(req->pivot->req, req->pivot, 1);
+
+  if (orig_state == PSTATE_CHILD_INJECT) {
+    replace_slash(n, (u8*)"\\.\\");
+  } else {
+    tmp[3] = '\\';
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup(tmp);
+  }
+
+  n->callback = inject_check1_callback;
+  n->user_val = 2;
+  async_request(n);
+
+  n = req_copy(req->pivot->req, req->pivot, 1);
+
+  if (orig_state == PSTATE_CHILD_INJECT) {
+    replace_slash(n, (u8*)"\\.sf\\");
+  } else {
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup(tmp + 2);
+    ck_free(tmp);
+  }
+
+  n->callback = inject_check1_callback;
+  n->user_val = 3;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 1: Sees if we managed to list a directory, or find
+   a traversal vector. Called four times, parallelized. */
+
+static u8 inject_check1_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[0 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during directory listing / traversal attacks", 0);
+    req->pivot->i_skip[0 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 4) return 1;
+
+  /* Got all responses. For directories, this is:
+
+       pivot   = /
+       misc[0] = /./
+       misc[1] = /.sf/
+       misc[2] = \.\
+       misc[3] = \.sf\
+
+     Here, if pivot != misc[0], and misc[0] != misc[1], we probably
+     managed to list a hidden dir. The same test is carried out for
+     misc[2] and misc[3].
+
+     For parameters, this is:
+
+       misc[0] = .../known_val
+       misc[1] = ./known_val
+       misc[2] = ...\known_val
+       misc[3] = .\known_val
+
+     Here, the test is simpler: if misc[1] != misc[0], or misc[3] !=
+     misc[2], we probably have a bug.
+
+ */
+
+  if (orig_state == PSTATE_CHILD_INJECT) {
+
+    if (!same_page(&MRES(0)->sig, &RPRES(req)->sig) &&
+        !same_page(&MRES(0)->sig, &MRES(1)->sig)) {
+      problem(PROB_DIR_LIST, MREQ(0), MRES(0),
+        (u8*)"unique response for /./", 
+        req->pivot, 0);
+
+      /* Use pivot's request, rather than MREQ(0), for link scraping;
+         MREQ(0) contains an "illegal" manually constructed path. */
+
+      RESP_CHECKS(RPREQ(req), MRES(0));
+    }
+
+    if (!same_page(&MRES(2)->sig, &RPRES(req)->sig) &&
+        !same_page(&MRES(2)->sig, &MRES(3)->sig)) {
+      problem(PROB_DIR_LIST, MREQ(2), MRES(2), 
+        (u8*)"unique response for \\.\\", 
+        req->pivot, 0);
+      RESP_CHECKS(MREQ(2), MRES(2));
+    }
+
+  } else {
+
+    if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) {
+      problem(PROB_DIR_TRAVERSAL, MREQ(1), MRES(1), 
+        (u8*)"responses for ./val and .../val look different", 
+        req->pivot, 0);
+      RESP_CHECKS(MREQ(0), MRES(0));
+    }
+
+    if (!same_page(&MRES(2)->sig, &MRES(3)->sig)) {
+      problem(PROB_DIR_TRAVERSAL, MREQ(3), MRES(3),
+        (u8*)"responses for .\\val and ...\\val look different", 
+        req->pivot, 0);
+      RESP_CHECKS(MREQ(2), MRES(2));
+    }
+
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* CHECK 2: Backend XML injection - 2 requests. */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "sfish>'>\"><sfish></sfish>");
+  n->callback = inject_check2_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "sfish>'>\"></sfish><sfish>");
+  n->callback = inject_check2_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 2: Examines the response for XML injection. Called twice,
+   parallelized. */
+
+static u8 inject_check2_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[1 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during backend XML injection attacks", 0);
+    req->pivot->i_skip[1 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 2) return 1;
+
+  /* Got all responses:
+
+       misc[0] = valid XML
+       misc[1] = bad XML
+
+     If misc[0] != misc[1], we probably have XML injection on backend side. */
+
+  if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) {
+    problem(PROB_XML_INJECT, MREQ(0), MRES(0), 
+      (u8*)"responses for <sfish></sfish> and </sfish><sfish> look different",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(1), MRES(1));
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* CHECK 3: Shell command injection - 9 requests. */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "`true`");
+  n->callback = inject_check3_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "`false`");
+  n->callback = inject_check3_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "`uname`");
+  n->callback = inject_check3_callback;
+  n->user_val = 2;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\"`true`\"");
+  n->callback = inject_check3_callback;
+  n->user_val = 3;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\"`false`\"");
+  n->callback = inject_check3_callback;
+  n->user_val = 4;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\"`uname`\"");
+  n->callback = inject_check3_callback;
+  n->user_val = 5;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "'`true`'");
+  n->callback = inject_check3_callback;
+  n->user_val = 6;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "'`false`'");
+  n->callback = inject_check3_callback;
+  n->user_val = 7;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "'`uname`'");
+  n->callback = inject_check3_callback;
+  n->user_val = 8;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 3: Looks for shell injection patterns. Called several
+   times, parallelized. */
+
+static u8 inject_check3_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[2 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during path-based shell injection attacks", 0);
+    req->pivot->i_skip[2 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 9) return 1;
+
+  /* Got all responses:
+
+       misc[0] = `true`
+       misc[1] = `false`
+       misc[2] = `uname`
+       misc[3] = "`true`"
+       misc[4] = "`false`"
+       misc[5] = "`uname`"
+       misc[6] = '`true`'
+       misc[7] = "`false`"
+       misc[8] = '`uname`'
+
+     If misc[0] == misc[1], but misc[0] != misc[2], we probably have shell
+     injection. Ditto for the remaining triplets. We use the `false` case
+     to avoid errors on search fields, etc. */
+
+  if (same_page(&MRES(0)->sig, &MRES(1)->sig) &&
+      !same_page(&MRES(0)->sig, &MRES(2)->sig)) {
+    problem(PROB_SH_INJECT, MREQ(0), MRES(0), 
+      (u8*)"responses to `true` and `false` different than to `uname`",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(2), MRES(2));
+  }
+
+  if (same_page(&MRES(3)->sig, &MRES(4)->sig) &&
+      !same_page(&MRES(3)->sig, &MRES(5)->sig)) {
+    problem(PROB_SH_INJECT, MREQ(3), MRES(3),
+      (u8*)"responses to `true` and `false` different than to `uname`",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(5), MRES(5));
+  }
+
+  if (same_page(&MRES(6)->sig, &MRES(7)->sig) &&
+      !same_page(&MRES(6)->sig, &MRES(8)->sig)) {
+    problem(PROB_SH_INJECT, MREQ(6), MRES(6),
+      (u8*)"responses to `true` and `false` different than to `uname`",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(8), MRES(8));
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* CHECK 4: Cross-site scripting - two requests (also test common
+     "special" error pages). */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, new_xss_tag(NULL));
+  set_value(PARAM_HEADER, (u8*)"Referer", new_xss_tag(NULL), 0, &n->par);
+  register_xss_tag(n);
+  n->callback = inject_check4_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, new_xss_tag((u8*)".htaccess.aspx"));
+  register_xss_tag(n);
+  n->callback = inject_check4_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 4: Checks for XSS. Called twice. */
+
+static u8 inject_check4_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* Note that this is not a differential check, so we can let
+     503, 504 codes slide. */
+
+  if (res->state != STATE_OK) {
+    handle_error(req, res, (u8*)"during cross-site scripting attacks", 0);
+    goto schedule_next;
+  }
+
+  /* Content checks do automatic HTML parsing and XSS detection.
+     scrape_page() is generally not advisable here. */
+
+  content_checks(req, res);
+
+  /* CHECK 5: URL redirection - 3 requests */
+
+schedule_next:
+
+  if (req->user_val) return 0;
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "http://skipfish.invalid/;?");
+  n->callback = inject_check5_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "//skipfish.invalid/;?");
+  n->callback = inject_check5_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "skipfish://invalid/;?");
+  n->callback = inject_check5_callback;
+  n->user_val = 2;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 5: Checks for URL redirection or XSS problems. Called
+   several times, paralallelized, can work on individual responses. */
+
+static u8 inject_check5_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u8* val;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* Likewise, not a differential check. */
+
+  if (res->state != STATE_OK) {
+    handle_error(req, res, (u8*)"during URL injection attacks", 0);
+    goto schedule_next;
+  }
+
+  /* Check Location, Refresh headers. */
+
+  val = GET_HDR((u8*)"Location", &res->hdr);
+
+  if (val) {
+
+    if (!strncasecmp((char*)val, "http://skipfish.invalid/", 25) ||
+        !strncasecmp((char*)val, "//skipfish.invalid/", 21))
+      problem(PROB_URL_REDIR, req, res, (u8*)"injected URL in 'Location' header",
+              req->pivot, 0);
+
+    if (!strncasecmp((char*)val, "skipfish://", 12))
+      problem(PROB_URL_XSS, req, res, (u8*)"injected URL in 'Location' header",
+              req->pivot, 0);
+
+  }
+
+  val = GET_HDR((u8*)"Refresh", &res->hdr);
+
+  if (val && (val = (u8*)strchr((char*)val, '=')) && val++) {
+    u8 semi_safe = 0;
+
+    if (*val == '\'' || *val == '"') { val++; semi_safe++; }
+
+    if (!strncasecmp((char*)val, "http://skipfish.invalid/", 25) ||
+        !strncasecmp((char*)val, "//skipfish.invalid/", 20))
+      problem(PROB_URL_REDIR, req, res, (u8*)"injected URL in 'Refresh' header",
+              req->pivot, 0);
+
+    /* Unescaped semicolon in Refresh headers is unsafe with MSIE6. */
+
+    if (!strncasecmp((char*)val, "skipfish://", 12) ||
+        (!semi_safe && strchr((char*)val, ';')))
+      problem(PROB_URL_XSS, req, res, (u8*)"injected URL in 'Refresh' header",
+              req->pivot, 0);
+
+  }
+
+  /* META tags and JS will be checked by content_checks(). We're not
+     calling scrape_page(), because we don't want to accumulate bogus,
+     injected links. */
+
+  content_checks(req, res);
+
+schedule_next:
+
+  if (req->user_val != 2) return 0;
+
+  /* CHECK 6: header splitting - 2 requests */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "bogus\nSkipfish-Inject:bogus");
+  n->callback = inject_check6_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "bogus\rSkipfish-Inject:bogus");
+  n->callback = inject_check6_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 6: A simple test for request splitting. Called
+   twice, parallelized, can work on individual responses. */
+
+static u8 inject_check6_callback(struct http_request* req,
+                                 struct http_response* res) {
+  u8 is_num = 0;
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* Not differential. */
+
+  if (res->state != STATE_OK) {
+    handle_error(req, res, (u8*)"during header injection attacks", 0);
+    goto schedule_next;
+  }
+
+  /* Check headers - that's all! */
+
+  if (GET_HDR((u8*)"Skipfish-Inject", &res->hdr))
+    problem(PROB_HTTP_INJECT, req, res, 
+      (u8*)"successfully injected 'Skipfish-Inject' header into response",
+      req->pivot, 0);
+
+schedule_next:
+
+  if (req->user_val != 1) return 0;
+
+  /* CHECK 7: SQL injection - 6 requests */
+
+  if (orig_state != PSTATE_CHILD_INJECT) {
+    u8* pstr = TPAR(RPREQ(req));
+    u32 c = strspn((char*)pstr, "01234567890.+-");
+    if (!pstr[c]) is_num = 1;
+  }
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  if (!is_num) SET_VECTOR(orig_state, n, "9-8");
+  else APPEND_VECTOR(orig_state, n, "-0");
+  n->callback = inject_check7_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  if (!is_num) SET_VECTOR(orig_state, n, "8-7");
+  else APPEND_VECTOR(orig_state, n, "-0-0");
+  n->callback = inject_check7_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  if (!is_num) SET_VECTOR(orig_state, n, "9-1");
+  else APPEND_VECTOR(orig_state, n, "-0-9");
+  n->callback = inject_check7_callback;
+  n->user_val = 2;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\\\'\\\"");
+  set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\\\'\\\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\\\'\\\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\\\'\\\",en", 0,
+            &n->par);
+  n->callback = inject_check7_callback;
+  n->user_val = 3;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\'\"");
+  set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\'\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\'\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\'\",en", 0,
+            &n->par);
+  n->callback = inject_check7_callback;
+  n->user_val = 4;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  APPEND_VECTOR(orig_state, n, "\\\\\'\\\\\"");
+  set_value(PARAM_HEADER, (u8*)"User-Agent", (u8*)"sfish\\\\\'\\\\\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Referer", (u8*)"sfish\\\\\'\\\\\"", 0, &n->par);
+  set_value(PARAM_HEADER, (u8*)"Accept-Language", (u8*)"sfish\\\\\'\\\\\",en", 0,
+            &n->par);
+  n->callback = inject_check7_callback;
+  n->user_val = 5;
+  async_request(n);
+
+  /* TODO: We should probably also attempt cookie injection here. */
+
+  return 0;
+
+}
+
+
+/* CALLBACK FOR CHECK 7: See if we have any indication of SQL injection. */
+
+static u8 inject_check7_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[3 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during SQL injection attacks", 0);
+    req->pivot->i_skip[3 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 6) return 1;
+
+  /* Got all data:
+
+       misc[0] = 9-8 (or orig-0)
+       misc[1] = 8-7 (or orig-0-0)
+       misc[2] = 9-1 (or orig-0-9)
+       misc[3] = [orig]\'\"
+       misc[4] = [orig]'"
+       misc[5] = [orig]\\'\\"
+
+     If misc[0] == misc[1], but misc[0] != misc[2], probable (numeric) SQL
+     injection. If misc[3] != misc[4] and misc[4] != misc[5],
+     probable text SQL injection.
+
+   */
+
+  if (same_page(&MRES(0)->sig, &MRES(1)->sig) &&
+      !same_page(&MRES(0)->sig, &MRES(2)->sig)) {
+    problem(PROB_SQL_INJECT, MREQ(0), MRES(0),
+      (u8*)"response suggests arithmetic evaluation on server side",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(0), MRES(0));
+    RESP_CHECKS(MREQ(2), MRES(2));
+  }
+
+  if (!same_page(&MRES(3)->sig, &MRES(4)->sig) && 
+      !same_page(&MRES(3)->sig, &MRES(5)->sig)) {
+    problem(PROB_SQL_INJECT, MREQ(4), MRES(4), 
+      (u8*)"response to '\" different than to \\'\\\"", req->pivot, 0);
+    RESP_CHECKS(MREQ(3), MRES(3));
+    RESP_CHECKS(MREQ(4), MRES(4));
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* CHECK 8: format string attacks - 2 requests. */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "sfish%dn%dn%dn%dn%dn%dn%dn%dn");
+  n->callback = inject_check8_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "sfish%nd%nd%nd%nd%nd%nd%nd%nd");
+  n->callback = inject_check8_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  return 0;
+}
+
+
+/* Check for format string bugs. */
+
+static u8 inject_check8_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct http_request* n;
+  u32 orig_state = req->pivot->state;
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[4 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during format string attacks", 0);
+    req->pivot->i_skip[4 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 2) return 1;
+
+  /* Got all data:
+
+       misc[0] = %dn... (harmless)
+       misc[1] = %nd... (crashy)
+
+     If misc[0] != misc[1], probable format string vuln.
+
+   */
+
+  if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) {
+    problem(PROB_FMT_STRING, MREQ(1), MRES(1),
+      (u8*)"response to %dn%dn%dn... different than to %nd%nd%nd...",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(1), MRES(1));
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* CHECK 9: integer overflow bugs - 9 requests. */
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "-0000012345");
+  n->callback = inject_check9_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "-2147483649");
+  n->callback = inject_check9_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "-2147483648");
+  n->callback = inject_check9_callback;
+  n->user_val = 2;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "0000012345");
+  n->callback = inject_check9_callback;
+  n->user_val = 3;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "2147483647");
+  n->callback = inject_check9_callback;
+  n->user_val = 4;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "2147483648");
+  n->callback = inject_check9_callback;
+  n->user_val = 5;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "4294967295");
+  n->callback = inject_check9_callback;
+  n->user_val = 6;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "4294967296");
+  n->callback = inject_check9_callback;
+  n->user_val = 7;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  SET_VECTOR(orig_state, n, "0000023456");
+  n->callback = inject_check9_callback;
+  n->user_val = 8;
+  async_request(n);
+
+  return 0;
+}
+
+
+/* Check for format string bugs, then wrap up the injection
+   phase.. */
+
+static u8 inject_check9_callback(struct http_request* req,
+                                 struct http_response* res) {
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[5 + req->pivot->i_skip_add]) return 0;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during integer overflow attacks", 0);
+    req->pivot->i_skip[5 + req->pivot->i_skip_add] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 9) return 1;
+
+  /* Got all data:
+
+       misc[0] = -12345 (baseline)
+       misc[1] = -(2^31-1)
+       misc[2] = -2^31
+       misc[3] = 12345 (baseline)
+       misc[4] = 2^31-1
+       misc[5] = 2^31
+       misc[6] = 2^32-1
+       misc[7] = 2^32
+       misc[8] = 23456 (validation)
+
+     If misc[3] != misc[8], skip tests - we're likely dealing with a
+     search field instead.
+
+     If misc[0] != misc[1] or misc[2], probable integer overflow;
+     ditto for 3 vs 4, 5, 6, 7.
+
+   */
+
+  if (!same_page(&MRES(3)->sig, &MRES(8)->sig))
+    goto schedule_next;
+
+  if (!same_page(&MRES(0)->sig, &MRES(1)->sig)) {
+    problem(PROB_INT_OVER, MREQ(1), MRES(1), 
+      (u8*)"response to -(2^31-1) different than to -12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(1), MRES(1));
+  }
+
+  if (!same_page(&MRES(0)->sig, &MRES(2)->sig)) {
+    problem(PROB_INT_OVER, MREQ(2), MRES(2), 
+      (u8*)"response to -2^31 different than to -12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(2), MRES(2));
+  }
+
+  if (!same_page(&MRES(3)->sig, &MRES(4)->sig)) {
+    problem(PROB_INT_OVER, MREQ(4), MRES(4),
+      (u8*)"response to 2^31-1 different than to 12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(4), MRES(4));
+  }
+
+  if (!same_page(&MRES(3)->sig, &MRES(5)->sig)) {
+    problem(PROB_INT_OVER, MREQ(5), MRES(5), 
+      (u8*)"response to 2^31 different than to 12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(5), MRES(5));
+  }
+
+  if (!same_page(&MRES(3)->sig, &MRES(6)->sig)) {
+    problem(PROB_INT_OVER, MREQ(6), MRES(6),
+      (u8*)"response to 2^32-1 different than to 12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(6), MRES(6));
+  }
+
+  if (!same_page(&MRES(3)->sig, &MRES(7)->sig)) {
+    problem(PROB_INT_OVER, MREQ(7), MRES(7),
+      (u8*)"response to 2^32 different than to 12345",
+      req->pivot, 0);
+    RESP_CHECKS(MREQ(7), MRES(7));
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+  end_injection_checks(req->pivot);
+
+  return 0;
+
+}
+
+
+/* Ends injection checks, proceeds with brute-force attacks, etc. */
+
+static void end_injection_checks(struct pivot_desc* pv) {
+
+  if (pv->state == PSTATE_CHILD_INJECT) {
+
+    /* Do not proceed with parametric tests if pivot is not
+       in scope (but got added as a parent of an in-scope
+       node), or 404 checks went wrong. */
+
+    if (url_allowed(pv->req)) {
+
+      if (pv->r404_cnt) {
+        pv->state   = PSTATE_CHILD_DICT;
+        pv->cur_key = 0;
+        crawl_dir_dict_init(pv);
+      } else {
+        crawl_parametric_init(pv);
+      }
+
+    } else {
+
+      pv->state = PSTATE_DONE;
+      return;
+
+    }
+
+  } else {
+
+    if (pv->bogus_par) {
+      pv->state = PSTATE_DONE;
+    } else {
+      crawl_par_numerical_init(pv);
+    }
+
+  }
+
+}
+
+
+
+/*
+
+  *****************************
+  * GENERIC PARAMETRIC CHECKS *
+  *****************************
+
+  Tests specific to parametric nodes, such as foo=bar (query and
+  POST parameters, directories, etc).
+
+ */
+
+/* Initializes initial parametric testing probe. It may get called on
+   pivots with no specific parameters to fuzz, in which case, we want to
+   proceed to PSTATE_DONE. */
+
+static void crawl_parametric_init(struct pivot_desc* pv) {
+  struct http_request* n;
+  u32 i;
+
+  if (pv->fuzz_par < 0 || !url_allowed(pv->req)) {
+    pv->state = PSTATE_DONE;
+    return;
+  }
+
+  DEBUG_HELPER(pv);
+
+  pv->state = PSTATE_PAR_CHECK;
+
+  /* TEST 1: parameter behavior. */
+
+  pv->ck_pending += BH_CHECKS;
+
+  for (i=0;i<BH_CHECKS;i++) {
+    n = req_copy(pv->req, pv, 1);
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup((u8*)BOGUS_FILE);
+    n->callback = par_check_callback;
+    n->user_val = i;
+    async_request(n);
+  }
+
+}
+
+
+/* CALLBACK FOR TEST 1: Checks if the parameter causes a significant
+   change on the resulting page (suggesting it should be brute-forced,
+   not just injection-tested). */
+
+static u8 par_check_callback(struct http_request* req,
+                             struct http_response* res) {
+
+  struct http_request* n;
+  u8* tmp;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during parameter behavior tests", 0);
+    goto schedule_next;
+  }
+
+  if (same_page(&res->sig, &RPRES(req)->sig)) {
+    DEBUG("* Parameter seems to have no effect.\n");
+    req->pivot->bogus_par = 1;
+    goto schedule_next;
+  }
+
+  DEBUG("* Parameter seems to have some effect:\n");
+  debug_same_page(&res->sig, &RPRES(req)->sig);
+
+  if (req->pivot->bogus_par) {
+    DEBUG("* We already classified it as having no effect, whoops.\n");
+    req->pivot->res_varies = 1;
+    problem(PROB_VARIES, req, res, 0, req->pivot, 0);
+    goto schedule_next;
+  }
+
+  /* If we do not have a signature yet, record it. Otherwise, make sure
+     it did not change. */
+
+  if (!req->pivot->r404_cnt) {
+
+    DEBUG("* New signature, recorded.\n");
+    memcpy(&req->pivot->r404[0], &res->sig, sizeof(struct http_sig));
+    req->pivot->r404_cnt = 1;
+
+  } else {
+
+    if (!same_page(&res->sig, &req->pivot->r404[0])) {
+      DEBUG("* Signature does not match previous responses, whoops.\n");
+      req->pivot->res_varies = 1;
+      problem(PROB_VARIES, req, res, 0, req->pivot, 0);
+      goto schedule_next;
+    }
+
+  }
+
+schedule_next:
+
+  if ((--req->pivot->ck_pending)) return 0;
+
+  /* All probes failed? Assume bogus parameter, what else to do... */
+
+  if (!req->pivot->r404_cnt) 
+    req->pivot->bogus_par = 1;
+
+  /* If the parameter has an effect, schedule OGNL checks. */
+
+  if (!req->pivot->bogus_par && !req->pivot->res_varies &&
+       req->par.n[req->pivot->fuzz_par]) {
+
+    n = req_copy(req->pivot->req, req->pivot, 1);
+    tmp = ck_alloc(strlen((char*)n->par.n[req->pivot->fuzz_par]) + 8);
+    sprintf((char*)tmp, "[0]['%s']", n->par.n[req->pivot->fuzz_par]);
+    ck_free(n->par.n[req->pivot->fuzz_par]);
+    n->par.n[req->pivot->fuzz_par] = tmp;
+    n->callback = par_ognl_callback;
+    n->user_val = 0;
+    async_request(n);
+
+    n = req_copy(req->pivot->req, req->pivot, 1);
+    ck_free(n->par.n[req->pivot->fuzz_par]);
+    n->par.n[req->pivot->fuzz_par] = ck_strdup((u8*)"[0]['sfish']");
+    n->callback = par_ognl_callback;
+    n->user_val = 1;
+    async_request(n);
+
+  }
+
+  /* Injection attacks should be carried out even if we think this
+     parameter has no visible effect; but injection checks will not proceed
+     to dictionary fuzzing if bogus_par or res_varies is set. */
+
+  req->pivot->state = PSTATE_PAR_INJECT;
+  inject_init(req->pivot);
+
+  return 0;
+
+}
+
+
+/* Said OGNL check... */
+
+static u8 par_ognl_callback(struct http_request* req,
+                            struct http_response* res) {
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during OGNL tests", 0);
+    return 0;
+  }
+
+  /* First response is meant to give the same result. Second
+     is meant to give a different one. */
+
+  if (req->user_val == 0) {
+    if (same_page(&req->pivot->res->sig, &res->sig))
+      req->pivot->ognl_check++;
+  } else {
+    if (!same_page(&req->pivot->res->sig, &res->sig))
+      req->pivot->ognl_check++;
+  }
+
+  if (req->pivot->ognl_check == 2)
+    problem(PROB_OGNL, req, res,
+      (u8*)"response to [0]['name']=... identical to name=...", 
+      req->pivot, 0);
+
+  return 0;
+
+}
+
+
+/* STAGE 2: Tries numerical brute-force (if any reasonably sized
+   integer is actually found in the name). */
+
+static void crawl_par_numerical_init(struct pivot_desc* pv) {
+  u8 *val = TPAR(pv->req), *out, fmt[16];
+  u32 i, dig, tail;
+  s32 val_i, range_st, range_en;
+  u8  zero_padded = 0;
+
+  DEBUG_HELPER(pv);
+
+  if (pv->child_cnt >= max_children) goto schedule_next;
+
+  /* Skip to the first digit, then to first non-digit. */
+
+  i = 0;
+  while (val[i] && !isdigit(val[i])) i++;
+  if (!val[i]) goto schedule_next;
+
+  dig = i;
+  while (val[i] && isdigit(val[i])) i++;
+  tail = i;
+
+  /* Too many digits is a no-go. */
+
+  if (tail - dig > PAR_MAX_DIGITS) goto schedule_next;
+
+  if (val[dig] == '0' && tail - dig > 1) zero_padded = 1;
+
+  val_i = atoi((char*)val + dig);
+  range_st = val_i - PAR_INT_FUZZ;
+  range_en = val_i + PAR_INT_FUZZ;
+  if (range_st < 0) range_st = 0;
+
+  if (zero_padded) sprintf((char*)fmt, "%%.%us%%0%uu%%s", dig, tail - dig);
+  else sprintf((char*)fmt, "%%.%us%%%uu%%s", dig, tail - dig);
+
+  out = ck_alloc(strlen((char*)val) + 16);
+
+  /* Let's roll! */
+
+  pv->state = PSTATE_PAR_NUMBER;
+
+  pv->num_pending = range_en - range_st + 1;
+
+  for (i=range_st;i<=range_en;i++) {
+    struct http_request* n;
+
+    if (i == val_i) continue;
+    sprintf((char*)out, (char*)fmt, val, i, val + tail);
+
+    n = req_copy(pv->req, pv, 1);
+    ck_free(TPAR(n));
+    TPAR(n) = ck_strdup((u8*)out);
+    n->callback = par_numerical_callback;
+    async_request(n);
+
+  }
+
+  ck_free(out);
+
+  if (!pv->num_pending) goto schedule_next;
+  return;
+
+schedule_next:
+
+  pv->state = PSTATE_PAR_DICT;
+  crawl_par_dict_init(pv);
+
+  /* Pew pew! */
+
+}
+
+
+/* CALLBACK FOR STAGE 2: Examines the output of numerical brute-force,
+   creates PIVOT_VALUE nodes if the response looks different from pivot,
+   nearby 404 sigs. */
+
+static u8 par_numerical_callback(struct http_request* req,
+                                 struct http_response* res) {
+  struct pivot_desc *par, *n = NULL, *orig_pv = req->pivot;
+  u32 i;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during numerical brute-force tests", 0);
+    goto schedule_next;
+  }
+
+  /* Looks like parent, or like its 404 signature? */
+
+  if (same_page(&res->sig, &req->pivot->r404[0]) ||
+      same_page(&res->sig, &req->pivot->res->sig))
+    goto schedule_next;
+
+  par = dir_parent(req->pivot);
+
+  /* Check with parent if sigs available, but if not - no biggie. */
+
+  if (par)
+    for (i=0;i<par->r404_cnt;i++)
+      if (same_page(&res->sig, &par->r404[i])) goto schedule_next;
+
+  /* Matching child? If yes, don't bother. */
+
+  for (i=0;i<req->pivot->child_cnt;i++)
+    if (req->pivot->child[i]->type == PIVOT_VALUE &&
+        !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req),
+        (char*)req->pivot->child[i]->name))) goto schedule_next;
+
+  if (req->pivot->child_cnt >= max_children) goto schedule_next;
+
+  /* Hmm, looks like we're onto something. Let's manually create a dummy
+     pivot and attach it to current node, without any activity planned.
+     Attach any response notes to that pivot. */
+
+  n = ck_alloc(sizeof(struct pivot_desc));
+
+  n->type     = PIVOT_VALUE;
+  n->state    = PSTATE_DONE;
+  n->name     = ck_strdup(TPAR(req));
+  n->req      = req;
+  n->res      = res;
+  n->fuzz_par = req->pivot->fuzz_par;
+  n->parent   = req->pivot;
+
+  DEBUG("--- New pivot (value): %s ---\n", n->name);
+
+  req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1)
+                                 * sizeof(struct pivot_desc*));
+
+  req->pivot->child[req->pivot->child_cnt++] = n;
+
+  req->pivot = n;
+
+  RESP_CHECKS(req, res);
+
+  secondary_ext_init(orig_pv, req, res, 1);
+
+schedule_next:
+
+  if (!(--(orig_pv->num_pending))) {
+    orig_pv->state = PSTATE_PAR_DICT;
+    crawl_par_dict_init(orig_pv);
+  }
+
+  /* Copied over to pivot. */
+  return n ? 1 : 0;
+
+}
+
+
+/* STAGE 3: Tries dictionary brute-force. This is fairly similar to the
+   directory dictionary version, but with additional try_list logic, etc. */
+
+static void crawl_par_dict_init(struct pivot_desc* pv) {
+  static u8 in_dict_init;
+  struct http_request* n;
+  u8 *kw, *ex;
+  u32 i, c;
+
+  /* Too many requests still pending, or already done? */
+
+  if (in_dict_init || pv->pdic_pending > DICT_BATCH || 
+      pv->state != PSTATE_PAR_DICT) return;
+
+  DEBUG_HELPER(pv);
+
+restart_dict:
+
+  if (pv->child_cnt >= max_children) {
+    crawl_par_trylist_init(pv);
+    return;
+  }
+
+  i = 0;
+
+  kw = (pv->pdic_guess ? wordlist_get_guess : wordlist_get_word)
+       (pv->pdic_cur_key);
+
+  if (!kw) {
+
+    /* No more keywords. Move to guesswords if not there already, or
+       advance to try list otherwise. */
+
+    if (pv->pdic_guess) { crawl_par_trylist_init(pv); return; }
+
+    pv->pdic_guess   = 1;
+    pv->pdic_cur_key = 0;
+    goto restart_dict;
+
+  }
+
+  /* Use crawl_prob/100 dictionary entries. */
+
+  if (R(100) < crawl_prob) {
+
+    /* Schedule extension-less probe, if the keyword is not
+       on the child list. */
+
+    for (c=0;c<pv->child_cnt;c++)
+      if (pv->type == PIVOT_VALUE &&
+          !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw,
+          (char*)pv->child[c]->name))) break;
+
+    /* ...and does not match the node itself. */
+
+    if (pv->fuzz_par != -1 &&
+        !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw,
+        (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt;
+
+    if (c == pv->child_cnt) {
+      n = req_copy(pv->req, pv, 1);
+      ck_free(TPAR(n));
+      TPAR(n) = ck_strdup(kw);
+      n->callback = par_dict_callback;
+      pv->pdic_pending++;
+      in_dict_init = 1;
+      async_request(n);
+      in_dict_init = 0;
+    }
+
+    /* Schedule probes for all extensions for the current word, but
+       only if the original parameter contained '.' somewhere,
+       and only if string is not on the try list. */
+
+    if (strchr((char*)TPAR(pv->req), '.'))
+      while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) {
+
+        u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2);
+
+        sprintf((char*)tmp, "%s.%s", kw, ex);
+
+        for (c=0;c<pv->child_cnt;c++)
+          if (pv->type == PIVOT_VALUE &&
+              !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+              (char*)pv->child[c]->name))) break;
+
+        if (pv->fuzz_par != -1 &&
+            !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+            (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt;
+
+        if (c == pv->child_cnt) {
+          n = req_copy(pv->req, pv, 1);
+          ck_free(TPAR(n));
+          TPAR(n) = tmp;
+          n->callback = par_dict_callback;
+          pv->pdic_pending++;
+          in_dict_init = 1;
+          async_request(n);
+          in_dict_init = 0;
+        } else ck_free(tmp);
+
+        i++;
+      }
+
+  }
+
+  pv->pdic_cur_key++;
+
+  if (pv->pdic_pending < DICT_BATCH) goto restart_dict;
+
+}
+
+
+/* CALLBACK FOR STAGE 3: Examines the output of directory brute-force. */
+
+static u8 par_dict_callback(struct http_request* req,
+                            struct http_response* res) {
+  struct pivot_desc *par, *n = NULL, *orig_pv = req->pivot;
+  u8 keep = 0;
+  u32 i;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (!req->user_val)
+    req->pivot->pdic_pending--;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during parameter brute-force tests", 0);
+    goto schedule_next;
+  }
+
+  /* Same as parent or parent's 404? Don't bother. */
+
+  if (same_page(&res->sig, &req->pivot->r404[0]) ||
+      same_page(&res->sig, &RPRES(req)->sig)) goto schedule_next;
+
+  par = dir_parent(req->pivot);
+
+  if (par)
+    for (i=0;i<par->r404_cnt;i++)
+      if (same_page(&res->sig, &par->r404[i])) goto schedule_next;
+
+  /* Matching child? If yes, don't bother. */
+
+  for (i=0;i<req->pivot->child_cnt;i++)
+    if (req->pivot->child[i]->type == PIVOT_VALUE &&
+        !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req),
+        (char*)req->pivot->child[i]->name))) goto schedule_next;
+
+  if (req->pivot->child_cnt >= max_children) goto schedule_next;
+
+  n = ck_alloc(sizeof(struct pivot_desc));
+
+  n->type     = PIVOT_VALUE;
+  n->state    = PSTATE_DONE;
+  n->name     = ck_strdup(TPAR(req));
+  n->req      = req;
+  n->res      = res;
+  n->fuzz_par = req->pivot->fuzz_par;
+  n->parent   = req->pivot;
+
+  DEBUG("--- New pivot (value): %s ---\n", n->name);
+
+  req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1)
+                                 * sizeof(struct pivot_desc*));
+
+  req->pivot->child[req->pivot->child_cnt++] = n;
+  req->pivot = n;
+
+  keep = 1;
+
+  RESP_CHECKS(req, res);
+
+  if (!req->user_val)
+    secondary_ext_init(orig_pv, req, res, 1);
+
+schedule_next:
+
+  if (!req->user_val) 
+    crawl_par_dict_init(orig_pv);
+
+  return keep;
+
+}
+
+
+/* STAGE 4: Handles try list (this may be called again after request is
+   completed, when new entries are added to the try list). */
+
+void crawl_par_trylist_init(struct pivot_desc* pv) {
+  u32 i;
+
+  /* If the parameter does not seem to be doing anything, there is
+     no point in going through the try list if restarted. */
+
+  if (pv->fuzz_par == -1 || pv->bogus_par || pv->res_varies
+      || pv->child_cnt >= max_children) {
+    pv->state = PSTATE_DONE;
+    return;
+  } else
+    pv->state = PSTATE_PAR_TRYLIST;
+
+  DEBUG_HELPER(pv);
+
+  pv->try_pending += (pv->try_cnt - pv->try_cur);
+
+  for (i=pv->try_cur;i<pv->try_cnt;i++) {
+    u32 c;
+
+    /* If we already have a child by this name, don't poke it again. */
+
+    for (c=0;c<pv->child_cnt;c++)
+      if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)pv->try_list[i],
+            (char*)pv->child[c]->name))) break;
+
+    /* Matching current node? Ditto. */
+
+    if (pv->fuzz_par != -1 &&
+        !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)pv->try_list[i],
+        (char*)pv->req->par.v[pv->fuzz_par]))) continue;
+
+    if (c == pv->child_cnt && R(100) < crawl_prob) {
+      struct http_request* n;
+      n = req_copy(pv->req, pv, 1);
+      ck_free(TPAR(n));
+      TPAR(n) = ck_strdup(pv->try_list[i]);
+      n->callback = par_trylist_callback;
+      async_request(n);
+    } else 
+      if (!pv->child[c]->linked) pv->child[c]->linked = 1;
+
+  }
+
+  pv->try_cur = i;
+
+  if (!pv->try_pending) {
+    pv->state = PSTATE_DONE;
+    return;
+  }
+
+}
+
+
+/* CALLBACK FOR STAGE 4: Examines the output of try list fetches. */
+
+static u8 par_trylist_callback(struct http_request* req,
+                               struct http_response* res) {
+  struct pivot_desc *par, *n = NULL;
+  struct pivot_desc* orig_pv = req->pivot;
+  u32 i;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during try list fetches", 0);
+    goto schedule_next;
+  }
+
+  /* Same as parent or parent's 404? Don't bother. */
+
+  if (same_page(&res->sig, &req->pivot->r404[0]) ||
+      same_page(&res->sig, &RPRES(req)->sig)) goto schedule_next;
+
+  par = dir_parent(req->pivot);
+
+  if (par)
+    for (i=0;i<par->r404_cnt;i++)
+      if (same_page(&res->sig, &par->r404[i])) goto schedule_next;
+
+  /* Name matching known child? If yes, don't bother. */
+
+  for (i=0;i<req->pivot->child_cnt;i++)
+    if (req->pivot->child[i]->type == PIVOT_VALUE &&
+        !((is_c_sens(req->pivot) ? strcmp : strcasecmp)((char*)TPAR(req),
+        (char*)req->pivot->child[i]->name))) goto schedule_next;
+
+  if (req->pivot->child_cnt >= max_children) goto schedule_next;
+
+  n = ck_alloc(sizeof(struct pivot_desc));
+
+  n->type     = PIVOT_VALUE;
+  n->state    = PSTATE_DONE;
+  n->name     = ck_strdup(TPAR(req));
+  n->req      = req;
+  n->res      = res;
+  n->fuzz_par = req->pivot->fuzz_par;
+  n->parent   = req->pivot;
+
+  DEBUG("--- New pivot (value): %s ---\n", n->name);
+
+  req->pivot->child = ck_realloc(req->pivot->child, (req->pivot->child_cnt + 1)
+                                 * sizeof(struct pivot_desc*));
+
+  req->pivot->child[req->pivot->child_cnt++] = n;
+  req->pivot = n;
+
+  RESP_CHECKS(req, res);
+
+  secondary_ext_init(orig_pv, req, res, 1);
+
+schedule_next:
+
+  if (!(--(orig_pv->try_pending)))
+    orig_pv->state = PSTATE_DONE;
+
+  /* Copied over to pivot. */
+  return n ? 1 : 0;
+
+}
+
+
+/*
+
+  ***************************
+  **** PIVOT_FILE CHECKS ****
+  ***************************
+
+  Used on confirmed file or parameter type pivots.
+
+ */
+
+/* Initial callback for content fetch. Nothing interesting here, spare for
+   basic sanity checks. */
+
+u8 fetch_file_callback(struct http_request* req, struct http_response* res) {
+  u32 i = 0;
+  struct pivot_desc* par;
+
+  RPRES(req) = res;
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during initial file fetch", 1);
+    return 1;
+  }
+
+  /* Matches parent's 404? */
+
+  par = dir_parent(req->pivot);
+
+  if (par)
+    for (i=0;i<par->r404_cnt;i++)
+      if (same_page(&res->sig, &par->r404[i])) break;
+
+  /* If no signatures on parents, fall back to a basic 404 check, it's
+     the least we could do. */
+
+  if ((!par && res->code == 404) || (par && i != par->r404_cnt)) {
+
+    req->pivot->missing = 1;
+
+  } else {
+
+    if (res->code > 400)
+      problem(PROB_NO_ACCESS, req, res, NULL, req->pivot, 0);
+
+    /* Do not bother with checks on files or params if
+       content identical to parent. */
+
+    if (!RPAR(req)->res || !same_page(&res->sig, &RPAR(req)->res->sig)) {
+      RESP_CHECKS(req, res);
+      if (par && req->pivot->type != PIVOT_PARAM) 
+        secondary_ext_init(par, req, res, 0);
+    }
+
+    if (req->pivot->type == PIVOT_FILE)
+      check_case(req->pivot);
+
+  }
+
+  unlock_children(req->pivot);
+  crawl_parametric_init(req->pivot);
+
+  /* This is the initial callback, keep the response. */
+  return 1;
+
+}
+
+
+/*
+
+  ********************
+  * PIVOT_DIR CHECKS *
+  ********************
+
+  These checks are called on all pivot points determined to correspond to
+  real directories.
+
+ */
+
+
+/* STAGE 1: Handles initial fetch of a directory. Called once. */
+
+u8 fetch_dir_callback(struct http_request* req, struct http_response* res) {
+  struct http_request* n;
+  struct pivot_desc* par;
+  RPRES(req) = res;
+
+  DEBUG_CALLBACK(req, res);
+
+  /* Error at this point means we should give up on other probes in this
+     directory. */
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during initial directory fetch", 1);
+    return 1;
+  }
+
+  if (req->pivot->type == PIVOT_SERV)
+    PIVOT_CHECKS(req, res);
+
+  /* The next step is checking 404 responses for all extensions (starting
+     with an empty one), which would also determine if the directory exists
+     at all, etc. We make an exception for server pivot, though, which is
+     presumed to be a directory (so we do PIVOT_CHECKS right away). */
+
+  req->pivot->state = PSTATE_404_CHECK;
+  n = req_copy(req, req->pivot, 1);
+  replace_slash(n, (u8*)BOGUS_FILE);
+
+  n->user_val = 0;
+  n->callback = dir_404_callback;
+  req->pivot->r404_pending++;
+
+  async_request(n);
+
+  par = dir_parent(req->pivot);
+  if (par) secondary_ext_init(par, req, res, 0);
+
+  /* Header, response belong to pivot - keep. */
+  return 1;
+}
+
+
+/* STAGE 2: Called on 404 checks, sequentially for each response. First
+   called once, with user_val = 0, for no extension; when called
+   multiple times to gather signatures. If not enough or too many
+   signatures found, the directory is deemed to be fubar. */
+
+static u8 dir_404_callback(struct http_request* req,
+                           struct http_response* res) {
+
+  struct http_request* n;
+  u32 i;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->r404_skip) goto schedule_next;
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during 404 response checks", 0);
+    goto schedule_next;
+  }
+
+  /* If the first 404 probe returned something that looks like the
+     "root" page for the currently tested directory, panic. But don't
+     do that check on server pivots. */
+
+  if (!req->user_val && req->pivot->type != PIVOT_SERV && RPRES(req) && 
+      same_page(&res->sig, &RPRES(req)->sig)) {
+    DEBUG("* First 404 probe identical with parent!\n");
+    goto schedule_next;
+  } else if (!req->user_val) {
+    DEBUG("* First 404 probe differs from parent (%d)\n",
+          RPRES(req) ? RPRES(req)->code : 0);
+  }
+
+  /* Check if this is a new signature. */
+
+  for (i=0;i<req->pivot->r404_cnt;i++)
+    if (same_page(&res->sig, &req->pivot->r404[i])) break;
+
+  if (i == req->pivot->r404_cnt) {
+    struct pivot_desc* par;
+
+    DEBUG("* New signature found (%u).\n", req->pivot->r404_cnt);
+
+    /* Need to add a new one. Make sure we're not over the limit. */
+
+    if (req->pivot->r404_cnt >= MAX_404) {
+
+       req->pivot->r404_skip = 1;
+
+       problem(PROB_404_FAIL, RPREQ(req), RPRES(req),
+               (u8*)"too many 404 signatures found", req->pivot, 0);
+
+       goto schedule_next;
+
+    }
+
+    memcpy(&req->pivot->r404[i], &res->sig, sizeof(struct http_sig));
+    req->pivot->r404_cnt++;
+
+    /* Is this a new signature not seen on parent? Notify if so,
+       and check it thoroughly. */
+
+    par = dir_parent(req->pivot);
+
+    if (par) {
+
+      for (i=0;i<par->r404_cnt;i++)
+        if (same_page(&res->sig, &par->r404[i])) break;
+
+    }
+
+    if (!par || i == par->r404_cnt) {
+      problem(PROB_NEW_404, req, res, NULL, req->pivot, 1);
+      RESP_CHECKS(req, res);
+    }
+
+  }
+
+schedule_next:
+
+  /* First probe OK? */
+
+  if (!req->user_val) {
+    u8* nk;
+    u32 cur_ext = 0;
+
+    /* First probe should already yield a 404 signature. */
+
+    if (!req->pivot->r404_cnt) {
+      DEBUG("* First probe failed to yield a signature.\n");
+      goto bad_404;
+    }
+
+    DEBUG("* First probe yielded a valid signature.\n");
+
+    /* At this point, we can be reasonably sure the response is
+       meaningful. */
+
+    PIVOT_CHECKS(req->pivot->req, req->pivot->res);
+    check_case(req->pivot);
+
+    /* Aaand schedule all the remaining probes. */
+
+    while ((nk = wordlist_get_extension(cur_ext++))) {
+      u8* tmp = ck_alloc(strlen(BOGUS_FILE) + strlen((char*)nk) + 2);
+
+      n = req_copy(RPREQ(req), req->pivot, 1);
+
+      sprintf((char*)tmp, "%s.%s", BOGUS_FILE, nk);
+      replace_slash(n, tmp);
+      ck_free(tmp);
+      n->callback = dir_404_callback;
+      n->user_val = 1;
+
+      /* r404_pending is at least 1 to begin with, so this is safe
+         even if async_request() has a synchronous effect. */
+
+      req->pivot->r404_pending++;
+      async_request(n);
+
+    }
+
+    /* Also issue 404 probe for "lpt9", as "con", "prn", "nul", "lpt#",
+       etc, are handled in a really annoying way by IIS. */
+
+    n = req_copy(RPREQ(req), req->pivot, 1);
+    replace_slash(n, (u8*)"lpt9");
+    n->callback = dir_404_callback;
+    n->user_val = 1;
+    req->pivot->r404_pending++;
+    async_request(n);
+
+    /* ...and for ~user, since this sometimes has a custom response, too. */
+
+    n = req_copy(RPREQ(req), req->pivot, 1);
+    replace_slash(n, (u8*)"~" BOGUS_FILE);
+    n->callback = dir_404_callback;
+    n->user_val = 1;
+    req->pivot->r404_pending++;
+    async_request(n);
+
+    /* Lastly, make sure that directory 404 is on file. */
+
+    n = req_copy(RPREQ(req), req->pivot, 1);
+    replace_slash(n, (u8*)BOGUS_FILE);
+    set_value(PARAM_PATH, 0, (u8*)"", -1, &n->par);
+    n->callback = dir_404_callback;
+    n->user_val = 1;
+    req->pivot->r404_pending++;
+    async_request(n);
+
+  }
+
+  if (--(req->pivot->r404_pending)) return 0;
+
+  /* If we're here, all probes completed, and we had no major errors.
+     If no signatures gathered, try to offer useful advice. */
+
+bad_404:
+
+  if (!req->pivot->r404_cnt || req->pivot->r404_skip) {
+
+    DEBUG("* 404 detection failed.\n");
+
+    if (RPRES(req)->code == 404) {
+
+      req->pivot->missing = 1;
+
+    } else if (RPRES(req)->code >= 400) {
+
+      problem(PROB_NO_ACCESS, RPREQ(req), RPRES(req), NULL, req->pivot, 0);
+
+      /* Additional check for 401, 500 codes, as we're not calling
+         content_checks() otherwise. */
+
+      if (RPRES(req)->code == 401)
+        problem(PROB_AUTH_REQ, RPREQ(req), RPRES(req), NULL, req->pivot, 0);
+      else if (RPRES(req)->code >= 500)
+       problem(PROB_SERV_ERR, RPREQ(req), RPRES(req), NULL, req->pivot, 0);
+
+    } else {
+
+      if (req->pivot->type != PIVOT_SERV) {
+        req->pivot->type = PIVOT_PATHINFO;
+        replace_slash(req->pivot->req, NULL);
+      } else 
+        problem(PROB_404_FAIL, RPREQ(req), RPRES(req),
+                (u8*)"no distinctive 404 behavior detected", req->pivot, 0);
+    }
+
+    req->pivot->r404_cnt = 0;
+
+    /* We can still try parsing the response, if it differs from parent
+       in any way... */
+
+    if (!RPAR(req)->res || !same_page(&RPRES(req)->sig, &RPAR(req)->res->sig))
+      PIVOT_CHECKS(req->pivot->req, req->pivot->res);
+
+  } else DEBUG("* 404 detection successful.\n");
+
+  /* Note that per-extension 404 probes coupled with a limit on the number of
+     404 signatures largely eliminates the need for BH_COUNT identical probes
+     to confirm sane behavior here. */
+
+  /* Regardless of the outcome, let's schedule a final IPS check. Theoretically,
+     a single request would be fine; but some servers, such as gws, tend
+     to respond to /?foo very differently than to /. */
+
+  req->pivot->state = PSTATE_IPS_CHECK;
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  tokenize_path((u8*)IPS_TEST, n, 0);
+  n->callback = dir_ips_callback;
+  n->user_val = 0;
+  async_request(n);
+
+  n = req_copy(RPREQ(req), req->pivot, 1);
+  tokenize_path((u8*)IPS_SAFE, n, 0);
+  n->callback = dir_ips_callback;
+  n->user_val = 1;
+  async_request(n);
+
+  return 0;
+
+}
+
+
+/* STAGE 3: Called on IPS check, twice. */
+
+static u8 dir_ips_callback(struct http_request* req,
+                           struct http_response* res) {
+  struct pivot_desc* par;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (req->pivot->i_skip[4]) return 0;
+
+  if (req->user_val == 1 && FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during IPS tests", 0);
+    req->pivot->i_skip[4] = 1;
+    goto schedule_next;
+  }
+
+  req->pivot->misc_req[req->user_val] = req;
+  req->pivot->misc_res[req->user_val] = res;
+  if ((++req->pivot->misc_cnt) != 2) return 1;
+
+  par = dir_parent(req->pivot);
+
+  if (!par || !par->uses_ips) {
+
+    if (MRES(0)->state != STATE_OK)
+      problem(PROB_IPS_FILTER, MREQ(0), MRES(0),
+              (u8*)"request timed out (could also be a flaky server)",
+              req->pivot, 0);
+    else if (!same_page(&MRES(0)->sig, &MRES(1)->sig))
+      problem(PROB_IPS_FILTER, MREQ(0), MRES(0), NULL, req->pivot, 0);
+
+  } else {
+
+    if (MRES(0)->state == STATE_OK && same_page(&MRES(0)->sig, &MRES(1)->sig))
+      problem(PROB_IPS_FILTER_OFF, MREQ(0), MRES(0), NULL, req->pivot, 0);
+
+  }
+
+schedule_next:
+
+  destroy_misc_data(req->pivot, req);
+
+  /* Schedule injection attacks. */
+
+  unlock_children(req->pivot);
+
+  req->pivot->state = PSTATE_CHILD_INJECT;
+  inject_init(req->pivot);
+
+  return 0;
+}
+
+
+/* STAGE 5: Start / update directory brute-force. */
+
+static void crawl_dir_dict_init(struct pivot_desc* pv) {
+  static u8 in_dict_init;
+  struct http_request* n;
+  u8 *kw, *ex;
+  u32 i, c;
+
+  /* Too many requests still pending, or already moved on to
+     parametric tests? */
+
+  if (in_dict_init || pv->pending > DICT_BATCH || pv->state != PSTATE_CHILD_DICT)
+    return;
+
+  if (pv->child_cnt >= max_children) {
+    crawl_parametric_init(pv);
+    return;
+  }
+
+  if (pv->no_fuzz) {
+    if (pv->no_fuzz == 1)
+      problem(PROB_LIMITS, pv->req, pv->res, 
+              (u8*)"Recursion limit reached, not fuzzing", pv, 0);
+    else
+      problem(PROB_LIMITS, pv->req, pv->res, 
+              (u8*)"Directory out of scope, not fuzzing", pv, 0);
+    crawl_parametric_init(pv);
+    return;
+  }
+
+  DEBUG_HELPER(pv);
+
+restart_dict:
+
+  kw = (pv->guess ? wordlist_get_guess : wordlist_get_word)(pv->cur_key);
+
+  if (!kw) {
+
+    /* No more keywords. Move to guesswords if not there already, or
+       advance to parametric tests otherwise. */
+
+    if (pv->guess) { crawl_parametric_init(pv); return; }
+
+    pv->guess   = 1;
+    pv->cur_key = 0;
+    goto restart_dict;
+
+  }
+
+  /* Only schedule crawl_prob% dictionary entries. */
+
+  if (R(100) < crawl_prob) {
+
+    /* Schedule extension-less probe, unless the name is already
+       on child list. */
+
+    for (c=0;c<pv->child_cnt;c++)
+      if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw,
+          (char*)pv->child[c]->name))) break;
+
+    /* Matching current node? */
+
+    if (pv->fuzz_par != -1 &&
+        !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)kw,
+        (char*)pv->req->par.v[pv->fuzz_par]))) c = ~pv->child_cnt;
+
+    if (c == pv->child_cnt) {
+      n = req_copy(pv->req, pv, 1);
+      replace_slash(n, kw);
+      n->callback = dir_dict_callback;
+      pv->pending++;
+      in_dict_init = 1;
+      async_request(n);
+      in_dict_init = 0;
+
+      /* Some web frameworks respond with 404 to /foo, but
+         something else to /foo/. Let's try to account for these, too,
+         to the extend possible. */
+
+      n = req_copy(pv->req, pv, 1);
+      replace_slash(n, kw);
+      set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par);
+      n->callback = dir_dict_callback;
+      pv->pending++;
+      in_dict_init = 1;
+      async_request(n);
+      in_dict_init = 0;
+
+    }
+
+    /* Schedule probes for all extensions for the current word,
+       likewise. */
+
+    i = 0;
+
+    while (!no_fuzz_ext && (ex = wordlist_get_extension(i))) {
+
+      u8* tmp = ck_alloc(strlen((char*)kw) + strlen((char*)ex) + 2);
+
+      sprintf((char*)tmp, "%s.%s", kw, ex);
+
+      for (c=0;c<pv->child_cnt;c++)
+        if (!((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+            (char*)pv->child[c]->name))) break;
+
+      if (pv->fuzz_par != -1 &&
+          !((is_c_sens(pv) ? strcmp : strcasecmp)((char*)tmp,
+          (char*)pv->req->par.v[pv->fuzz_par]))) c = pv->child_cnt;
+
+      if (c == pv->child_cnt) {
+        n = req_copy(pv->req, pv, 1);
+        replace_slash(n, tmp);
+        n->callback = dir_dict_callback;
+        pv->pending++;
+        in_dict_init = 1;
+        async_request(n);
+        in_dict_init = 0;
+      }
+
+      ck_free(tmp);
+
+      i++;
+    }
+
+  }
+
+  pv->cur_key++;
+
+  /* This scheduled extension_cnt + 1 requests - which, depending on
+     settings, may be anywhere from 1 to 200 or so. Grab more keywords
+     until we have a decent number scheduled, to improve parallelism. */
+
+  if (pv->pending < DICT_BATCH) goto restart_dict;
+
+}
+
+
+/* CALLBACK FOR STAGE 5: Checks for a hit, schedules some more. */
+
+static u8 dir_dict_callback(struct http_request* req,
+                            struct http_response* res) {
+  u32 i;
+  u8* lp = NULL;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during path-based dictionary probes", 0);
+  } else {
+
+    /* Check if 404 */
+
+    if (!req->pivot->r404_cnt)
+      DEBUG("Bad pivot with no sigs! Pivot name = '%s'\n",
+            req->pivot->name);
+
+    if (res->code == 403)
+      for (i=0;i<req->par.c;i++)
+        if (req->par.t[i] == PARAM_PATH && req->par.v[i][0])
+          lp = req->par.v[i];
+
+    for (i=0;i<req->pivot->r404_cnt;i++)
+      if (same_page(&res->sig, &req->pivot->r404[i])) break;
+
+    /* Special case for secondary extension fuzzing - skip secondary
+       extensions that seemingly return the same document. */
+
+    if (req->user_val && same_page(&res->sig, &req->same_sig))
+      i = ~req->pivot->r404_cnt;
+
+    /* Do not add 403 responses to .ht* requests - workaround for
+       Apache filtering to keep reports clean. */
+
+    if (lp && !strncmp((char*)lp,".ht",3))
+      i = ~req->pivot->r404_cnt;
+
+    /* If not 404, do response, and does not look like
+       parent's original file signature, add pivot. */
+
+    if (i == req->pivot->r404_cnt) maybe_add_pivot(req, res, 0);
+
+  }
+
+  /* Try replenishing the queue. */
+
+  if (!req->user_val) {
+    req->pivot->pending--;
+    crawl_dir_dict_init(req->pivot);
+  }
+
+  return 0;
+
+}
+
+
+/*
+
+  ************************
+  * PIVOT_UNKNOWN CHECKS *
+  ************************
+
+  Callbacks used on resources of unknown type. Proceed to parametric checks
+  if something goes wrong, or file / dir checks if detection successful.
+
+ */
+
+/* STAGE 1: callback on the original request. */
+
+u8 fetch_unknown_callback(struct http_request* req, struct http_response* res) {
+  u32 i = 0 /* bad gcc */;
+  struct pivot_desc* par;
+  struct http_request* n;
+
+  RPRES(req) = res;
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during initial resource fetch", 1);
+    return 1;
+  }
+
+  /* Matches parent's 404? */
+
+  par = dir_parent(req->pivot);
+
+  if (par)
+    for (i=0;i<par->r404_cnt;i++)
+      if (same_page(&res->sig, &par->r404[i])) break;
+
+  /* Again, 404 is the least we could do. */
+
+  if ((!par && res->code == 404) || (par && i != par->r404_cnt)) {
+
+    req->pivot->missing = 1;
+    unlock_children(req->pivot);
+    crawl_parametric_init(req->pivot);
+    return 1;
+
+  }
+
+  /* If the response looks like parent's original unknown_callback()
+     response, assume file. This is a workaround for some really
+     quirky architectures. */
+
+  if (par && res->pay_len && res->code == 200 && 
+      same_page(&par->unk_sig, &res->sig)) {
+
+    req->pivot->type = PIVOT_FILE;
+    return fetch_file_callback(req, res);
+
+  }
+
+  /* Schedule a request to settle the type of this pivot point. */
+
+  n = req_copy(req, req->pivot, 1);
+  set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->par);
+  n->callback = unknown_check_callback;
+  async_request(n);
+
+  /* This is the initial callback, keep the response. */
+
+  return 1;
+
+}
+
+
+/* CALLBACK FOR STAGE 1: Tries to figure out if this is a directory. */
+
+static u8 unknown_check_callback(struct http_request* req,
+                                 struct http_response* res) {
+  u8 keep = 0;
+
+  DEBUG_CALLBACK(req, res);
+
+  if (FETCH_FAIL(res)) {
+    handle_error(req, res, (u8*)"during node type checks", 0);
+    goto schedule_next;
+  }
+
+  /* If pivot == res, we are probably dealing with PATH_INFO-style
+     plot device, which is best approached as a directory anyway
+     (worst-case scenario, dir handlers will dismiss it as
+     misbehaving and demote it to PIVOT_PATHINFO after some extra
+     checks).
+
+     If pivot != res, and res is not a 404 response, assume dir;
+     and if it is 404, assume file.
+
+     We also have a special case if the original request returned a
+     non-empty 2xx response, but the new one returned 4xx or 5xx - this is
+     likely a file, too. */
+
+  if (same_page(&RPRES(req)->sig, &res->sig)) goto assume_dir; else {
+    u32 i = 0;
+    struct pivot_desc* par = dir_parent(req->pivot);
+
+    if (par)
+      for (i=0;i<par->r404_cnt;i++)
+        if (same_page(&res->sig, &par->r404[i])) break;
+
+    if ((!par && res->code == 404) || (par && i != par->r404_cnt) ||
+        (RPRES(req)->code < 300 && res->code >= 400 && RPRES(req)->pay_len)) {
+
+      req->pivot->type = PIVOT_FILE;
+
+    } else {
+
+assume_dir:
+
+      req->pivot->type = PIVOT_DIR;
+
+      /* Replace original request, response with new data. */
+
+      destroy_request(RPREQ(req));
+
+      if (RPRES(req)) {
+        memcpy(&req->pivot->unk_sig, &RPRES(req)->sig, sizeof(struct http_sig));
+        destroy_response(RPRES(req));
+      }
+
+      RPREQ(req) = req;
+      RPRES(req) = res;
+
+      keep = 1;
+
+    }
+
+  }
+
+schedule_next:
+
+  /* Well, we need to do something. */
+
+  if (req->pivot->type == PIVOT_DIR)
+    fetch_dir_callback(RPREQ(req), RPRES(req));
+  else fetch_file_callback(RPREQ(req), RPRES(req));
+
+  return keep;
+}
+
diff --git a/crawler.h b/crawler.h
new file mode 100644
index 0000000..8f7e77a
--- /dev/null
+++ b/crawler.h
@@ -0,0 +1,96 @@
+/*
+   skipfish - crawler state machine
+   --------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_CRAWLER_H
+
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+
+extern u32 crawl_prob;          /* Crawl probability (1-100%)  */
+extern u8  no_parse,            /* Disable HTML link detection */
+           warn_mixed,          /* Warn on mixed content?      */
+           no_fuzz_ext,         /* Don't fuzz ext in dirs?     */
+           log_ext_urls;        /* Log external URLs?          */
+
+/* Provisional debugging callback. */
+
+u8 show_response(struct http_request* req, struct http_response* res);
+
+/* Asynchronous request callback for the initial PSTATE_FETCH request of
+   PIVOT_UNKNOWN resources. */
+
+u8 fetch_unknown_callback(struct http_request* req, struct http_response* res);
+
+/* Asynchronous request callback for the initial PSTATE_FETCH request of
+   PIVOT_FILE resources. */
+
+u8 fetch_file_callback(struct http_request* req, struct http_response* res);
+
+/* Asynchronous request callback for the initial PSTATE_FETCH request of
+   PIVOT_DIR resources. */
+
+u8 fetch_dir_callback(struct http_request* req, struct http_response* res);
+
+/* Initializes the crawl of try_list items for a pivot point (if any still
+   not crawled). */
+
+void crawl_par_trylist_init(struct pivot_desc* pv);
+
+/* Adds new name=value to form hints list. */
+
+void add_form_hint(u8* name, u8* value);
+
+/* Macros to access various useful pivot points: */
+
+#define MREQ(_x) (req->pivot->misc_req[_x])
+#define MRES(_x) (req->pivot->misc_res[_x])
+#define RPAR(_req) ((_req)->pivot->parent)
+#define RPREQ(_req) ((_req)->pivot->req)
+#define RPRES(_req) ((_req)->pivot->res)
+
+/* Debugging instrumentation for callbacks and callback helpers: */
+
+#ifdef LOG_STDERR
+
+#define DEBUG_CALLBACK(_req, _res) do { \
+    u8* _url = serialize_path(_req, 1, 1); \
+    DEBUG("* %s: URL %s (%u, len %u)\n", __FUNCTION__, _url, (_res) ? \
+          (_res)->code : 0, (_res) ? (_res)->pay_len : 0); \
+    ck_free(_url); \
+  } while (0)
+
+#define DEBUG_HELPER(_pv) do { \
+    u8* _url = serialize_path((_pv)->req, 1, 1); \
+    DEBUG("* %s: URL %s (%u, len %u)\n", __FUNCTION__, _url, (_pv)->res ? \
+          (_pv)->res->code : 0, (_pv)->res ? (_pv)->res->pay_len : 0); \
+    ck_free(_url); \
+  } while (0)
+
+#else
+
+#define DEBUG_CALLBACK(_req, _res)
+#define DEBUG_HELPER(_pv)
+
+#endif /* ^LOG_STDERR */
+
+#endif /* !_HAVE_CRAWLER_H */
diff --git a/database.c b/database.c
new file mode 100644
index 0000000..94b6d5d
--- /dev/null
+++ b/database.c
@@ -0,0 +1,1356 @@
+/*
+   skipfish - database & crawl management
+   --------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#define _VIA_DATABASE_C
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "debug.h"
+#include "config.h"
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+#include "crawler.h"
+#include "string-inl.h"
+
+struct pivot_desc root_pivot;
+
+u8 **deny_urls,                         /* List of banned URL substrings   */
+   **deny_strings,                      /* List of banned page substrings  */
+   **allow_urls,                        /* List of required URL substrings */
+   **allow_domains,                     /* List of allowed vhosts          */
+   **trust_domains;                     /* List of trusted vhosts          */
+
+u32 num_deny_urls,
+    num_deny_strings,
+    num_allow_urls,
+    num_allow_domains,
+    num_trust_domains;
+
+u32 max_depth    = MAX_DEPTH,
+    max_children = MAX_CHILDREN,
+    max_guesses  = MAX_GUESSES;
+
+u8  dont_add_words;                     /* No auto dictionary building     */
+
+struct kw_entry {
+  u8* word;                             /* Keyword itself                  */
+  u32 hit_cnt;                          /* Number of confirmed sightings   */
+  u8  is_ext;                           /* Is an extension?                */
+  u8  hit_already;                      /* Had its hit count bumped up?    */
+  u32 total_age;                        /* Total age (in scan cycles)      */
+  u32 last_age;                         /* Age since last hit              */
+};
+
+static struct kw_entry*
+  keyword[WORD_HASH];                   /* Keyword collection (bucketed)   */
+
+static u32 keyword_cnt[WORD_HASH];      /* Per-bucket keyword counts       */
+
+static u8 **extension,                  /* Extension list                  */
+          **guess;                      /* Keyword candidate list          */
+
+static u32 guess_cnt,                   /* Number of keyword candidates    */
+           extension_cnt,               /* Number of extensions            */
+           keyword_total_cnt,           /* Current keyword count           */
+           keyword_orig_cnt;            /* At-boot keyword count           */
+
+static u32 cur_xss_id, scan_id;         /* Stored XSS manager IDs          */
+static struct http_request** xss_req;   /* Stored XSS manager req cache    */
+
+
+
+
+/* Maps a parsed URL (in req) to the pivot tree, creating or modifying nodes
+   as necessary, and scheduling them for crawl. This should be called only
+   on requests that were *not* yet retrieved. */
+
+void maybe_add_pivot(struct http_request* req, struct http_response* res,
+                     u8 via_link) {
+
+  struct pivot_desc *cur = NULL;
+
+  u32 i, par_cnt = 0, path_cnt = 0, pno;
+  u8 ends_with_slash = 0;
+
+#ifdef LOG_STDERR
+
+  u8* url = serialize_path(req, 1, 1);
+  DEBUG("--- New pivot requested: %s (%d) --\n", url, via_link);
+  ck_free(url);
+
+#endif /* LOG_STDERR */
+
+  if (!req) FATAL("Invalid request data.");
+
+  /* Initialize root pivot if not done already. */
+
+  if (!root_pivot.type) {
+    root_pivot.type     = PIVOT_ROOT;
+    root_pivot.state    = PSTATE_DONE;
+    root_pivot.linked   = 2;
+    root_pivot.fuzz_par = -1;
+    root_pivot.name     = ck_strdup((u8*)"[root]");
+  }
+
+  if (!url_allowed(req)) { url_scope++; return; }
+
+  /* Count the number of path and query parameters in the request. */
+
+  for (i=0;i<req->par.c;i++) {
+
+    if (QUERY_SUBTYPE(req->par.t[i]) || POST_SUBTYPE(req->par.t[i])) par_cnt++;
+
+    if (PATH_SUBTYPE(req->par.t[i])) {
+
+      if (req->par.t[i] == PARAM_PATH && !req->par.n[i] && !req->par.v[i][0])
+        ends_with_slash = 0; else ends_with_slash = 1;
+
+      path_cnt++;
+
+    }
+
+    /* While we're at it, try to learn new keywords. */
+
+    if (PATH_SUBTYPE(req->par.t[i]) || QUERY_SUBTYPE(req->par.t[i])) {
+      if (req->par.n[i]) wordlist_confirm_word(req->par.n[i]);
+      wordlist_confirm_word(req->par.v[i]);
+    }
+
+  }
+
+  /* Try to find pivot point for the host. */
+
+  for (i=0;i<root_pivot.child_cnt;i++) {
+    cur = root_pivot.child[i];
+    if (!strcasecmp((char*)cur->req->host, (char*)req->host) &&
+        cur->req->port == req->port &&
+        cur->req->proto == req->proto) break;
+  }
+
+  if (i == root_pivot.child_cnt) {
+
+    /* No server pivot found, we need to create one. */
+
+    cur = ck_alloc(sizeof(struct pivot_desc));
+
+    root_pivot.child = ck_realloc(root_pivot.child,
+      (root_pivot.child_cnt + 1) * sizeof(struct pivot_desc*));
+
+    root_pivot.child[root_pivot.child_cnt++] = cur;
+
+    cur->type     = PIVOT_SERV;
+    cur->state    = PSTATE_FETCH;
+    cur->linked   = 2;
+    cur->fuzz_par = -1;
+    cur->parent   = &root_pivot;
+
+    /* Copy the original request, sans path. Create a dummy
+       root dir entry instead. Derive pivot name by serializing
+       the URL of the associated stub request. */
+
+    cur->req = req_copy(req, cur, 0);
+    set_value(PARAM_PATH, NULL, (u8*)"", -1, &cur->req->par);
+    cur->name = serialize_path(cur->req, 1, 0);
+    cur->req->callback = fetch_dir_callback;
+
+    /* If matching response not provided, schedule request. */
+
+    if (res && !par_cnt && path_cnt == 1) {
+      cur->res = res_copy(res);
+      fetch_dir_callback(req, cur->res);
+    } else async_request(cur->req);
+
+    wordlist_confirm_word(req->host);
+
+  }
+
+  /* One way or the other, 'cur' now points to server pivot. Let's
+     walk through all path elements, and follow or create sub-pivots
+     for them. */
+
+  pno = 0;
+
+  for (i=0;i<path_cnt;i++) {
+    u8* pname;
+    u32 c, ccnt;
+
+    while (!PATH_SUBTYPE(req->par.t[pno])) pno++;
+
+    /* Bail out on the trailing NULL-'' indicator, if present. It is
+       used to denote a directory, and will always be the last path
+       element. */
+
+    if (i == path_cnt - 1 && req->par.t[pno] == PARAM_PATH &&
+        !req->par.n[pno] && !req->par.v[pno][0]) break;
+
+    pname = req->par.n[pno] ? req->par.n[pno] : req->par.v[pno];
+
+    ccnt  = cur->child_cnt;
+
+    /* Try to find a matching node. */
+
+    for (c=0;c<ccnt;c++)
+      if (!(is_c_sens(cur) ? strcmp : strcasecmp)((char*)pname,
+           (char*)cur->child[c]->name)) {
+        cur = cur->child[c];
+        if (cur->linked < via_link) cur->linked = via_link;
+        break;
+      }
+
+    if (c == ccnt) {
+
+      /* Node not found. We need to create one. */
+
+      struct pivot_desc* n;
+
+      /* Enforce user limits. */
+
+      if ((i + 1) >= max_depth || cur->child_cnt > max_children)
+        return;
+
+      /* Create and link back to parent. */
+
+      n = ck_alloc(sizeof(struct pivot_desc));
+
+      cur->child = ck_realloc(cur->child, (cur->child_cnt + 1) *
+                              sizeof(struct pivot_desc*));
+
+      cur->child[cur->child_cnt++] = n;
+
+      n->parent  = cur;
+      n->linked  = via_link;
+      n->name    = ck_strdup(pname);
+
+      /* Copy the original request, then copy over path up to the
+         current point. */
+
+      n->req     = req_copy(req, n, 0);
+
+      for (c=0;c<=pno;c++)
+        if (PATH_SUBTYPE(req->par.t[c]))
+          set_value(req->par.t[c], req->par.n[c], req->par.v[c], -1,
+                    &n->req->par);
+
+      /* If name is parametric, indicate which parameter to fuzz. */
+
+      if (req->par.n[pno]) n->fuzz_par = n->req->par.c - 1;
+        else n->fuzz_par = -1;
+
+      /* Do not fuzz out-of-scope or limit exceeded dirs... */
+
+      if ((i + 1) == max_depth - 1) n->no_fuzz = 1;
+
+      if (i != path_cnt - 1) {
+
+        /* This is not the last path segment, so let's assume a "directory"
+           (hierarchy node, to be more accurate), and schedule directory
+           tests. */
+
+        set_value(PARAM_PATH, NULL, (u8*)"", -1, &n->req->par);
+        n->type = PIVOT_DIR;
+        n->req->callback = fetch_dir_callback;
+
+        if (!url_allowed(n->req)) n->no_fuzz = 2;
+
+        /* Subdirectory tests require parent directory 404 testing to complete
+           first. If these are still pending, wait a bit. */
+
+        if (cur->state > PSTATE_IPS_CHECK) {
+
+          n->state = PSTATE_FETCH;
+
+          /* If this actually *is* the last parameter, taking into account the
+             early-out hack mentioned above, and we were offered a response -
+             make use of it and don't schedule a new request. */
+
+          if (i == path_cnt - 2 && ends_with_slash && res) {
+
+            n->res   = res_copy(res);
+            fetch_dir_callback(n->req, n->res);
+
+          } else async_request(n->req);
+
+        } else n->state = PSTATE_PENDING;
+
+      } else {
+
+        /* Last segment. If no parameters, copy response body, mark type as
+           "unknown", schedule extra checks. */
+
+        if (!url_allowed(n->req)) n->no_fuzz = 2;
+
+        if (!par_cnt) {
+
+          n->type  = PIVOT_UNKNOWN;
+          n->res   = res_copy(res);
+          n->req->callback = fetch_unknown_callback;
+
+          if (cur->state > PSTATE_IPS_CHECK) {
+
+            n->state = PSTATE_FETCH;
+
+            /* If we already have a response, call the callback directly
+               (it will schedule further requests on its own). */
+
+            if (!res) {
+              n->state = PSTATE_FETCH;
+              async_request(n->req);
+            } else fetch_unknown_callback(n->req, n->res);
+
+          } else n->state = PSTATE_PENDING;
+
+        } else {
+
+          /* Parameters found. Assume file, schedule a fetch. */
+
+          n->type = PIVOT_FILE;
+          n->req->callback = fetch_file_callback;
+
+          if (cur->state > PSTATE_IPS_CHECK) {
+            n->state = PSTATE_FETCH;
+            async_request(n->req);
+          } else n->state = PSTATE_PENDING;
+
+        }
+
+      }
+
+      cur = n;
+
+    }
+
+    /* At this point, 'cur' points to a newly created or existing node
+       for the path element. If this element is parametric, make sure
+       that its value is on the 'try' list. */
+
+    if (req->par.n[pno]) {
+
+      for (c=0;c<cur->try_cnt;c++)
+        if (cur->try_list[c] && !(is_c_sens(cur) ? strcmp : strcasecmp)
+            ((char*)req->par.v[pno], (char*)cur->try_list[c])) break;
+
+      /* Not found on the list - try adding. */
+
+      if (c == cur->try_cnt) {
+
+        cur->try_list = ck_realloc(cur->try_list, (cur->try_cnt + 1) *
+                                   sizeof(u8*));
+        cur->try_list[cur->try_cnt++] = ck_strdup(req->par.v[pno]);
+
+        if (cur->state == PSTATE_DONE)
+          crawl_par_trylist_init(cur);
+
+      }
+
+    }
+
+    pno++;
+
+  }
+
+  /* Phew! At this point, 'cur' points to the final path element, and now,
+     we just need to take care of parameters. Each parameter has its own
+     pivot point, and a full copy of the request. */
+
+  pno = 0;
+
+  for (i=0;i<par_cnt;i++) {
+    u8* pname;
+    u32 c, ccnt;
+
+    while (!QUERY_SUBTYPE(req->par.t[pno]) && !POST_SUBTYPE(req->par.t[pno]))
+      pno++;
+
+    pname = req->par.n[pno] ? req->par.n[pno] : (u8*)"[blank]";
+    ccnt  = cur->child_cnt;
+
+    /* Try to find a matching node. */
+
+    for (c=0;c<ccnt;c++)
+      if (!(is_c_sens(cur) ? strcmp : strcasecmp)((char*)pname,
+            (char*)cur->child[c]->name)) {
+        cur = cur->child[c];
+        if (cur->linked < via_link) cur->linked = via_link;
+        break;
+      }
+
+    if (c == ccnt) {
+
+      /* Node not found. We need to create one. */
+
+      struct pivot_desc* n;
+
+      /* Enforce user limits. */
+
+      if (cur->child_cnt > max_children) {
+        problem(PROB_LIMITS, req, res, (u8*)"Child node limit exceeded", cur, 0);
+        return;
+      }
+
+      /* Create and link back to parent. */
+
+      n = ck_alloc(sizeof(struct pivot_desc));
+
+      cur->child = ck_realloc(cur->child, (cur->child_cnt + 1) *
+                              sizeof(struct pivot_desc*));
+
+      cur->child[cur->child_cnt++] = n;
+
+      n->parent  = cur;
+      n->type    = PIVOT_PARAM;
+      n->linked  = via_link;
+      n->name    = ck_strdup(pname);
+
+      /* Copy the original request, in full. Remember not to fuzz
+         file inputs. */
+
+      n->req      = req_copy(req, n, 1);
+      n->fuzz_par = req->par.t[pno] == PARAM_POST_F ? -1 : pno;
+      n->res      = res_copy(res);
+
+      /* File fetcher does everything we need. */
+
+      n->req->callback = fetch_file_callback;
+
+      if (cur->state > PSTATE_IPS_CHECK) {
+        n->state = PSTATE_FETCH;
+        if (res) fetch_file_callback(n->req, n->res);
+        else async_request(n->req);
+      } else n->state = PSTATE_PENDING;
+
+      cur = n;
+
+    }
+
+    /* Ok, again, 'cur' is at the appropriate node. Make sure the
+       current value is on the 'try' list. */
+
+    for (c=0;c<cur->try_cnt;c++)
+      if (cur->try_list[c] && !(is_c_sens(cur) ? strcmp : strcasecmp)
+          ((char*)req->par.v[pno], (char*)cur->try_list[c])) break;
+
+    /* Not found on the list - try adding. */
+
+    if (c == cur->try_cnt) {
+
+      cur->try_list = ck_realloc(cur->try_list, (cur->try_cnt + 1) *
+                                 sizeof(u8*));
+      cur->try_list[cur->try_cnt++] = ck_strdup(req->par.v[pno]);
+
+      if (cur->state == PSTATE_DONE)
+        crawl_par_trylist_init(cur);
+
+    }
+
+    /* Parameters are not hierarchical, so go back to the parent node. */
+
+    cur = cur->parent;
+    pno++;
+
+  }
+
+  /* Done, at last! */
+
+}
+
+
+/* Finds the host-level pivot point for global issues. */
+
+struct pivot_desc* host_pivot(struct pivot_desc* pv) {
+  while (pv->parent && pv->parent->parent) pv = pv->parent;
+  return pv;
+}
+
+
+/* Gets case sensitivity info from the nearest DIR / SERV node. */
+
+u8 is_c_sens(struct pivot_desc* pv) {
+  while (pv->parent && (pv->type != PIVOT_DIR || pv->type != PIVOT_SERV))
+    pv = pv->parent;
+  return pv->csens;
+}
+
+
+/* Registers a problem, if not duplicate (res, extra may be NULL): */
+
+void problem(u32 type, struct http_request* req, struct http_response* res,
+             u8* extra, struct pivot_desc* pv, u8 allow_dup) {
+
+  u32 i;
+
+  if (pv->type == PIVOT_NONE) FATAL("Uninitialized pivot point");
+  if (type == PROB_NONE || !req) FATAL("Invalid issue data");
+
+  DEBUG("--- NEW PROBLEM - type: %u, extra: '%s' ---\n", type, extra);
+
+  /* Check for duplicates */
+
+  if (!allow_dup)
+    for (i=0;i<pv->issue_cnt;i++)
+      if (type == pv->issue[i].type && !strcmp(extra ? (char*)extra : "",
+          pv->issue[i].extra ? (char*)pv->issue[i].extra : "")) return;
+
+  pv->issue = ck_realloc(pv->issue, (pv->issue_cnt + 1) *
+                         sizeof(struct issue_desc));
+
+  pv->issue[pv->issue_cnt].type  = type;
+  pv->issue[pv->issue_cnt].extra = extra ? ck_strdup(extra) : NULL;
+  pv->issue[pv->issue_cnt].req   = req_copy(req, pv, 1);
+  pv->issue[pv->issue_cnt].res   = res_copy(res);
+
+  /* Mark copies of half-baked requests as done. */
+
+  if (res && res->state < STATE_OK) {
+    pv->issue[pv->issue_cnt].res->state = STATE_OK;
+    ck_free(pv->issue[pv->issue_cnt].res->payload);
+    pv->issue[pv->issue_cnt].res->payload = 
+      ck_strdup((u8*)"[...truncated...]\n");
+    pv->issue[pv->issue_cnt].res->pay_len = 18;
+  }
+
+  pv->issue_cnt++;
+
+}
+
+
+
+/* Three functions to check if the URL is permitted under current rules
+   (0 = no, 1 = yes): */
+
+u8 url_allowed_host(struct http_request* req) {
+  u32 i;
+
+  for (i=0;i<num_allow_domains;i++) {
+
+    if (allow_domains[i][0] == '.') {
+
+      u8* pos = inl_strcasestr(req->host, allow_domains[i]);
+
+      if (pos && strlen((char*)req->host) ==
+          strlen((char*)allow_domains[i]) + (pos - req->host))
+        return 1;
+ 
+    } else
+      if (!strcasecmp((char*)req->host, (char*)allow_domains[i]))
+        return 1;
+
+  }
+
+  return 0;
+}
+
+
+u8 url_trusted_host(struct http_request* req) {
+  u32 i;
+
+  i = 0;
+
+  while (always_trust_domains[i]) {
+
+    if (always_trust_domains[i][0] == '.') {
+
+      u8* pos = inl_strcasestr(req->host, (u8*)always_trust_domains[i]);
+
+      if (pos && strlen((char*)req->host) ==
+          strlen(always_trust_domains[i]) + (pos - req->host))
+        return 1;
+    } else 
+      if (!strcasecmp((char*)req->host, (char*)always_trust_domains[i]))
+        return 1;
+
+    i++;
+
+  }
+
+  for (i=0;i<num_trust_domains;i++) {
+
+    u8* pos = inl_strcasestr(req->host, trust_domains[i]);
+
+    if (pos && strlen((char*)req->host) ==
+        strlen((char*)trust_domains[i]) + (pos - req->host))
+      return 1;
+
+  }
+
+  return 0;
+}
+
+u8 url_allowed(struct http_request* req) {
+  u8* url = serialize_path(req, 1, 0);
+  u32 i;
+
+  /* Check blacklist first */
+
+  for (i=0;i<num_deny_urls;i++)
+    if (inl_strcasestr(url, deny_urls[i])) {
+      ck_free(url);
+      return 0;
+    }
+
+  /* Check whitelist next */
+
+  if (num_allow_urls) {
+    u8 permit = 0;
+
+    for (i=0;i<num_allow_urls;i++)
+      if (inl_strcasestr(url, allow_urls[i])) {
+        permit = 1;
+        break;
+      }
+
+    if (!permit) { ck_free(url); return 0; }
+  }
+
+  ck_free(url);
+
+  return url_allowed_host(req);
+
+}
+
+
+/* Compares the checksums for two responses: */
+
+u8 same_page(struct http_sig* sig1, struct http_sig* sig2) {
+  u32 i, bucket_fail = 0;
+  s32 total_diff  = 0;
+  u32 total_scale = 0;
+
+  if (sig1->code != sig2->code) return 0;
+
+  for (i=0;i<FP_SIZE;i++) {
+    s32 diff = sig1->data[i] - sig2->data[i];
+    u32 scale = sig1->data[i] + sig2->data[i];
+
+    if (abs(diff) > 1 + (scale * FP_T_REL / 100) ||
+        abs(diff) > FP_T_ABS)
+      if (++bucket_fail > FP_B_FAIL) return 0;
+
+    total_diff  += diff;
+    total_scale += scale;
+
+  }
+
+  if (abs(total_diff) > 1 + (total_scale * FP_T_REL / 100))
+    return 0;
+
+  return 1;
+
+}
+
+
+/* Dumps signature data: */
+
+void dump_signature(struct http_sig* sig) {
+  u32 i;
+
+  DEBUG("SIG %03d: ", sig->code);
+  for (i=0;i<FP_SIZE;i++) DEBUG("[%04d] ", sig->data[i]);
+  DEBUG("\n");
+
+}
+
+
+/* Debugs signature comparison: */
+
+void debug_same_page(struct http_sig* sig1, struct http_sig* sig2) {
+
+#ifdef LOG_STDERR
+
+  u32 i;
+  s32 total_diff  = 0;
+  u32 total_scale = 0;
+
+  dump_signature(sig1);
+  dump_signature(sig2);
+
+  DEBUG("         ");
+
+  for (i=0;i<FP_SIZE;i++) {
+    s32 diff = sig1->data[i] - sig2->data[i];
+    DEBUG("[%04d] ", diff);
+  }
+
+  DEBUG("(diff)\n         ");
+
+  for (i=0;i<FP_SIZE;i++) {
+    s32 diff = sig1->data[i] - sig2->data[i];
+    u32 scale = sig1->data[i] + sig2->data[i];
+
+    if (abs(diff) > 1 + (scale * FP_T_REL / 100) ||
+        abs(diff) > FP_T_ABS)
+      DEBUG("[FAIL] "); else DEBUG("[pass] ");
+
+    total_diff  += diff;
+    total_scale += scale;
+  }
+
+  DEBUG("\n         ");
+
+  for (i=0;i<FP_SIZE;i++) {
+    u32 scale = sig1->data[i] + sig2->data[i];
+
+    DEBUG("[%04d] ", (u32)( 1 + (scale * FP_T_REL / 100)));
+  }
+
+  DEBUG("(allow)\n");
+
+  DEBUG("Total diff: %d, scale %d, allow %d\n",
+    total_diff, total_scale, 1 + (u32)(total_scale * FP_T_REL / 100));
+
+#endif /* LOG_STDERR */
+
+}
+
+
+
+/* Keyword management: */
+
+
+/* Word hashing helper. */
+
+static inline u32 hash_word(u8* str) {
+  register u32 ret = 0;
+  register u8  cur;
+
+  if (str)
+    while ((cur=*str)) {
+      ret = ~ret ^ (cur) ^
+            (cur << 5)   ^ (~cur >> 5) ^
+            (cur << 10)  ^ (~cur << 15) ^
+            (cur << 20)  ^ (~cur << 25) ^
+            (cur << 30);
+      str++;
+    }
+
+  return ret % WORD_HASH;
+}
+
+
+/* Adds a new keyword candidate to the global "guess" list. This
+   list is always case-insensitive. */
+
+void wordlist_add_guess(u8* text) {
+  u32 target, i, kh;
+
+  if (dont_add_words) return;
+
+  /* Check if this is a bad or known guess or keyword. */
+
+  if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return;
+
+  for (i=0;i<guess_cnt;i++)
+    if (!strcasecmp((char*)text, (char*)guess[i])) return;
+
+  kh = hash_word(text);
+
+  for (i=0;i<keyword_cnt[kh];i++)
+    if (!strcasecmp((char*)text, (char*)keyword[kh][i].word)) return;
+
+  /* Initialize guess list if necessary. */
+
+  if (!guess) guess = ck_alloc(max_guesses * sizeof(u8*));
+
+  /* See if we can add a new one, or need to nuke something. */
+
+  if (guess_cnt >= max_guesses) target = R(max_guesses);
+    else target = guess_cnt++;
+
+  ck_free(guess[target]);
+  guess[target] = ck_strdup(text);
+
+}
+
+
+/* Adds a single, sanitized keyword to the list, or increases its hit count.
+   Keyword list is case-insensitive - first capitalization wins. */
+
+static void wordlist_confirm_single(u8* text, u8 is_ext, u32 add_hits,
+                                    u32 total_age, u32 last_age) {
+  u32 kh, i;
+
+  if (!text || !text[0] || strlen((char*)text) > MAX_WORD) return;
+
+  /* Check if this is a known keyword. */
+
+  kh = hash_word(text);
+
+  for (i=0;i<keyword_cnt[kh];i++)
+    if (!strcasecmp((char*)text, (char*)keyword[kh][i].word)) {
+
+      /* Known! Increase hit count, and if this is now
+         tagged as an extension, add to extension list. */
+
+      if (!keyword[kh][i].hit_already) {
+        keyword[kh][i].hit_cnt    += add_hits;
+        keyword[kh][i].hit_already = 1;
+        keyword[kh][i].last_age    = 0;
+      }
+
+      if (!keyword[kh][i].is_ext && is_ext) {
+        keyword[kh][i].is_ext = 1;
+
+        extension = ck_realloc(extension, (extension_cnt + 1) * sizeof(u8*));
+        extension[extension_cnt++] = keyword[kh][i].word;
+      }
+
+      return;
+
+    }
+
+  /* Word not known (and i == keyword_cnt[kh]). Create a new wordlist entry. */
+
+  keyword[kh] = ck_realloc(keyword[kh], (i + 1) * sizeof(struct kw_entry));
+  keyword_cnt[kh]++;
+  keyword_total_cnt++;
+
+  keyword[kh][i].word      = ck_strdup(text);
+  keyword[kh][i].is_ext    = is_ext;
+  keyword[kh][i].hit_cnt   = add_hits;
+  keyword[kh][i].total_age = total_age;
+  keyword[kh][i].last_age  = last_age;
+
+  /* If this is a new keyword (not loaded from file), mark it as hit to
+     avoid inflating hit_cnt. */
+
+  if (!total_age) keyword[kh][i].hit_already = 1;
+
+  if (is_ext) {
+    extension = ck_realloc(extension, (extension_cnt + 1) * sizeof(u8*));
+    extension[extension_cnt++] = keyword[kh][i].word;
+  }
+
+}
+
+
+/* Adds non-sanitized keywords to the list. */
+
+void wordlist_confirm_word(u8* text) {
+  u32 tlen, i, dcnt = 0, too_many_dots = 0;
+  s32 ppos = -1;
+
+  if (dont_add_words) return;
+
+  /* Good keywords are expected to consist of A-Za-z 0-9_-~().:!^$ only.
+     We expect at least one non-'.' character, at most one '.', and not more
+     than four digits.
+
+     If they do contain a dot at a position other than 0 or end-of-string,
+     and the character after . is not a digit, we also extract and store an
+     extension (which shouldn't be longer than 12 characters or so).
+
+     This might misinterpret some TLDs as extensions (e.g, param=example.com),
+     but the user is unlikely to be scanning so many different ccTLDs for this
+     to affect the quality of the database. */
+
+  if (!text || !text[0]) return;
+
+  tlen = strlen((char*)text);
+
+  for (i=0;i<tlen;i++) {
+    if (!isalnum(text[i]) && !strchr(" _-~().:!^$", text[i])) return;
+    if (isdigit(text[0])) dcnt++;
+    if (text[i] == '.') {
+      if (ppos != -1) too_many_dots = 1;
+      ppos = i;
+    }
+  }
+
+  /* Too many dots? Tokenize class paths and domains as individual keywords,
+     still. */
+
+  if (too_many_dots) {
+    u8 *st = text, *en;
+
+    do {
+      en = (u8*)strchr((char*)st, '.');
+      if (en) *en = 0;
+      wordlist_confirm_word(st);
+      if (en) *en = '.';
+      st = en + 1;
+    } while (en);
+
+    return;
+  }
+
+  /* Too many digits? */
+  if (dcnt > 4) return;
+
+  if (ppos != -1) {
+
+    /* Period only? Too long? */
+    if (tlen == 1 || tlen - ppos > 12) return;
+
+    if (ppos && ppos != tlen - 1 && !isdigit(text[ppos] + 1)) {
+      wordlist_confirm_single(text + ppos + 1, 1, 1, 0, 0);
+      text[ppos] = 0;
+      wordlist_confirm_single(text, 0, 1, 0, 0);
+      text[ppos] = '.';
+      return;
+    }
+
+  }
+
+  wordlist_confirm_single(text, 0, 1, 0, 0);
+}
+
+
+/* Returns wordlist item at a specified offset (NULL if no more available). */
+
+u8* wordlist_get_word(u32 offset) {
+  u32 cur_off = 0, kh;
+
+  for (kh=0;kh<WORD_HASH;kh++) {
+    if (cur_off + keyword_cnt[kh] > offset) break;
+    cur_off += keyword_cnt[kh];
+  }
+
+  if (kh == WORD_HASH) return NULL;
+
+  return keyword[kh][offset - cur_off].word;
+}
+
+
+/* Returns keyword candidate at a specified offset (or NULL). */
+
+u8* wordlist_get_guess(u32 offset) {
+  if (offset >= guess_cnt) return NULL;
+  return guess[offset];
+}
+
+
+/* Returns extension at a specified offset (or NULL). */
+
+u8* wordlist_get_extension(u32 offset) {
+  if (offset >= extension_cnt) return NULL;
+  return extension[offset];
+}
+
+
+/* Loads keywords from file. */
+
+void load_keywords(u8* fname, u32 purge_age) {
+  FILE* in;
+  u32 hits, total_age, last_age, lines = 0;
+  u8 type;
+  s32 fields;
+  u8 kword[MAX_WORD + 1];
+  char fmt[32];
+
+  kword[MAX_WORD] = 0;
+
+  in = fopen((char*)fname, "r");
+
+  if (!in) {
+
+    PFATAL("Unable to open wordlist '%s'", fname);
+
+    WARN("Wordlist '%s' not found, not loaded.", fname);
+    return;
+
+  }
+
+  sprintf(fmt, "%%c %%u %%u %%u %%%u[^\x01-\x1f]", MAX_WORD);
+
+  while ((fields = fscanf(in, fmt, &type, &hits, &total_age, &last_age, kword))
+          == 5) {
+    if (!purge_age || last_age < purge_age)
+      wordlist_confirm_single(kword, (type == 'e'), hits,
+                              total_age + 1, last_age + 1);
+    lines++;
+    fgetc(in); /* sink \n */
+  }
+
+  if (fields != -1 && fields != 5)
+    FATAL("Wordlist '%s': syntax error in line %u.\n", fname, lines + 1);
+
+  if (!lines)
+    WARN("Wordlist '%s' contained no valid entries.", fname);
+
+  keyword_orig_cnt = keyword_total_cnt;
+
+  fclose(in);
+
+}
+
+
+/* qsort() callback for sorting keywords in save_keywords(). */
+
+static int keyword_sorter(const void* word1, const void* word2) {
+  if (((struct kw_entry*)word1)->hit_cnt < ((struct kw_entry*)word2)->hit_cnt)
+    return 1;
+  else if (((struct kw_entry*)word1)->hit_cnt ==
+           ((struct kw_entry*)word2)->hit_cnt)
+    return 0;
+  else return -1;
+}
+
+
+/* Saves all keywords to a file. */
+
+void save_keywords(u8* fname) {
+  struct stat st;
+  FILE* out;
+  s32 fd;
+  u32 i, kh;
+  u8* old;
+
+#ifndef O_NOFOLLOW
+#define O_NOFOLLOW 0
+#endif /* !O_NOFOLLOW */
+
+  if (stat((char*)fname, &st) || !S_ISREG(st.st_mode)) return;
+
+  /* First, sort the list. */
+
+  for (kh=0;kh<WORD_HASH;kh++)
+    qsort(keyword[kh], keyword_cnt[kh], sizeof(struct kw_entry), keyword_sorter);
+
+  old = ck_alloc(strlen((char*)fname) + 5);
+  sprintf((char*)old, "%s.old", fname);
+
+  /* Ignore errors for these two. */
+  unlink((char*)old);
+  rename((char*)fname, (char*)old);
+
+  ck_free(old);
+
+  fd = open((char*)fname, O_WRONLY | O_CREAT | O_EXCL, 0644);
+
+  if (fd < 0 || !(out = fdopen(fd,"a"))) {
+    WARN("Unable to save new wordlist to '%s'", fname);
+    close(fd);
+    return;
+  }
+
+  out = fdopen(fd, "w");
+
+  for (kh=0;kh<WORD_HASH;kh++)
+    for (i=0;i<keyword_cnt[kh];i++)
+      fprintf(out,"%c %u %u %u %s\n", keyword[kh][i].is_ext ? 'e' : 'w',
+              keyword[kh][i].hit_cnt, keyword[kh][i].total_age,
+              keyword[kh][i].last_age, keyword[kh][i].word);
+
+  SAY(cLGN "[+] " cNOR "Wordlist '%s' updated (%u new words added).\n",
+      fname, keyword_total_cnt - keyword_orig_cnt);
+
+  fclose(out);
+  close(fd);
+
+}
+
+
+/* Displays pretty pivot statistics as we go. */
+
+static u32 pivot_pending,
+           pivot_init,
+           pivot_attack,
+           pivot_bf,
+           pivot_done;
+
+static u32 pivot_serv,
+           pivot_dir,
+           pivot_file,
+           pivot_pinfo,
+           pivot_param,
+           pivot_value,
+           pivot_missing,
+           pivot_unknown;
+
+static u32 issue_cnt[6],
+           pivot_cnt;
+
+static void pv_stat_crawl(struct pivot_desc* pv) {
+  u32 i;
+
+  pivot_cnt++;
+
+  switch (pv->type) {
+    case PIVOT_SERV:     pivot_serv++; /* Fall through */
+    case PIVOT_DIR:      pivot_dir++; break;
+    case PIVOT_FILE:     pivot_file++; break;
+    case PIVOT_PATHINFO: pivot_pinfo++; break;
+    case PIVOT_UNKNOWN:  pivot_unknown++; break;
+    case PIVOT_PARAM:    pivot_param++; break;
+    case PIVOT_VALUE:    pivot_value++; break;
+  }
+
+  if (pv->missing) pivot_missing++;
+
+  switch (pv->state) {
+    case PSTATE_PENDING: pivot_pending++; break;
+    case PSTATE_FETCH ... PSTATE_IPS_CHECK: pivot_init++; break;
+    case PSTATE_CHILD_INJECT:
+    case PSTATE_PAR_INJECT: pivot_attack++; break;
+    case PSTATE_DONE: pivot_done++; break;
+    default: pivot_bf++;
+  }
+
+  for (i=0;i<pv->issue_cnt;i++)
+    issue_cnt[PSEV(pv->issue[i].type)]++;
+
+  for (i=0;i<pv->child_cnt;i++)
+    pv_stat_crawl(pv->child[i]);
+
+}
+
+
+void database_stats() {
+
+  pivot_pending = pivot_init = pivot_attack = pivot_bf = pivot_pinfo =
+  pivot_done = pivot_serv = pivot_dir = pivot_file = pivot_param =
+  pivot_value = pivot_missing = pivot_unknown = pivot_cnt = 0;
+
+  memset(issue_cnt, 0, sizeof(issue_cnt));
+
+  pv_stat_crawl(&root_pivot);
+
+  SAY("Database statistics\n"
+      "-------------------\n\n"
+      cGRA "          Pivots : " cNOR "%u total, %u done (%.02f%%)    \n"
+      cGRA "     In progress : " cNOR "%u pending, %u init, %u attacks, "
+                               "%u dict    \n"
+      cGRA "   Missing nodes : " cNOR "%u spotted\n"
+      cGRA "      Node types : " cNOR "%u serv, %u dir, %u file, %u pinfo, "
+                               "%u unkn, %u par, %u val\n"
+      cGRA "    Issues found : " cNOR "%u info, %u warn, %u low, %u medium, "
+                               "%u high impact\n"
+      cGRA "       Dict size : " cNOR "%u words (%u new), %u extensions, "
+                               "%u candidates\n",
+      pivot_cnt, pivot_done, pivot_cnt ? ((100.0 * pivot_done) / (pivot_cnt))
+      : 0, pivot_pending, pivot_init, pivot_attack, pivot_bf, pivot_missing,
+      pivot_serv, pivot_dir, pivot_file, pivot_pinfo, pivot_unknown,
+      pivot_param, pivot_value, issue_cnt[1], issue_cnt[2], issue_cnt[3],
+       issue_cnt[4], issue_cnt[5], keyword_total_cnt, keyword_total_cnt -
+      keyword_orig_cnt, extension_cnt, guess_cnt);
+
+}
+
+
+/* Dumps pivot database, for debugging purposes. */
+
+void dump_pivots(struct pivot_desc* cur, u8 nest) {
+
+  u8* indent = ck_alloc(nest + 1);
+  u8* url;
+  u32 i;
+
+  if (!cur) cur = &root_pivot;
+
+  memset(indent, ' ', nest);
+
+  SAY(cBRI "\n%s== Pivot " cLGN "%s" cBRI " [%d] ==\n",
+      indent, cur->name, cur->dupe);
+  SAY(cGRA "%sType     : " cNOR, indent);
+
+  switch (cur->type) {
+    case PIVOT_NONE:     SAY(cLRD "PIVOT_NONE (bad!)\n" cNOR); break;
+    case PIVOT_ROOT:     SAY("PIVOT_ROOT\n"); break;
+    case PIVOT_SERV:     SAY("PIVOT_SERV\n"); break;
+    case PIVOT_DIR:      SAY("PIVOT_DIR\n"); break;
+    case PIVOT_FILE:     SAY("PIVOT_FILE\n"); break;
+    case PIVOT_PATHINFO: SAY("PIVOT_PATHINFO\n"); break;
+    case PIVOT_VALUE:    SAY("PIVOT_VALUE\n"); break;
+    case PIVOT_UNKNOWN:  SAY("PIVOT_UNKNOWN\n"); break;
+    case PIVOT_PARAM:    SAY("PIVOT_PARAM\n"); break;
+    default:             SAY(cLRD "<UNKNOWN %u> (bad!)\n" cNOR, cur->type);
+  }
+
+  SAY(cGRA "%sState    : " cNOR, indent);
+
+  switch (cur->state) {
+    case PSTATE_NONE:         SAY(cLRD "PSTATE_NONE (bad!)\n" cNOR); break;
+    case PSTATE_PENDING:      SAY("PSTATE_PENDING\n"); break;
+    case PSTATE_FETCH:        SAY("PSTATE_FETCH\n"); break;
+    case PSTATE_TYPE_CHECK:   SAY("PSTATE_TYPE_CHECK\n"); break;
+    case PSTATE_404_CHECK:    SAY("PSTATE_404_CHECK\n"); break;
+    case PSTATE_IPS_CHECK:    SAY("PSTATE_IPS_CHECK\n"); break;
+    case PSTATE_CHILD_INJECT: SAY("PSTATE_CHILD_INJECT\n"); break;
+    case PSTATE_CHILD_DICT:   SAY("PSTATE_CHILD_DICT\n"); break;
+    case PSTATE_PAR_CHECK:    SAY("PSTATE_PAR_CHECK\n"); break;
+    case PSTATE_PAR_INJECT:   SAY("PSTATE_PAR_INJECT\n"); break;
+    case PSTATE_PAR_NUMBER:   SAY("PSTATE_PAR_NUMBER\n"); break;
+    case PSTATE_PAR_DICT:     SAY("PSTATE_PAR_DICT\n"); break;
+    case PSTATE_PAR_TRYLIST:  SAY("PSTATE_PAR_TRYLIST\n"); break;
+    case PSTATE_DONE:         SAY("PSTATE_DONE\n"); break;
+    default:                  SAY(cLRD "<UNKNOWN %u> (bad!)\n" cNOR,
+                                     cur->state);
+  }
+
+  if (cur->missing) {
+    if (cur->linked == 2)
+      SAY(cGRA "%sMissing  : " cMGN "YES\n" cNOR, indent);
+    else
+      SAY(cGRA "%sMissing  : " cLBL "YES (followed a dodgy link)\n" cNOR,
+          indent);
+  }
+
+  SAY(cGRA "%sFlags    : " cNOR "linked %u, case %u/%u, fuzz_par %d, ips %u, "
+      "sigs %u, reqs %u\n", indent, cur->linked, cur->csens, cur->c_checked,
+      cur->fuzz_par, cur->uses_ips, cur->r404_cnt, cur->pending);
+
+  if (cur->req) {
+    url = serialize_path(cur->req, 1, 0);
+    SAY(cGRA "%sTarget   : " cNOR "%s (" cYEL "%d" cNOR ")\n", indent, url,
+        cur->res ? cur->res->code : 0);
+    ck_free(url);
+
+    if (cur->res)
+      SAY(cGRA "%sMIME     : " cNOR "%s -> %s ["
+          "%s:%s]\n", indent, cur->res->header_mime ? cur->res->header_mime :
+          (u8*)"-", cur->res->sniffed_mime ? cur->res->sniffed_mime : (u8*)"-", 
+          cur->res->header_charset ? cur->res->header_charset : (u8*)"-",
+          cur->res->meta_charset ? cur->res->meta_charset : (u8*)"-");
+  }
+
+  if (cur->try_cnt) {
+    SAY(cGRA "%sTry      : " cNOR, indent);
+    for (i=0;i<cur->try_cnt;i++)
+      SAY("%s%s", cur->try_list[i], (i == cur->try_cnt - 1) ? "" : ", ");
+    SAY("\n");
+  }
+
+  /* Dump issues. */
+
+  for (i=0;i<cur->issue_cnt;i++) {
+    if (cur->issue[i].req) url = serialize_path(cur->issue[i].req, 0, 0);
+      else url = ck_strdup((u8*)"[none]");
+    SAY(cGRA "%s-> Issue : " cNOR "type %d, extra '%s', URL: " cLGN "%s"
+        cNOR " (" cYEL "%u" cNOR ")\n", indent, cur->issue[i].type,
+        cur->issue[i].extra, url, cur->issue[i].res ? cur->issue[i].res->code
+        : 0);
+    ck_free(url);
+  }
+
+  ck_free(indent);
+
+  for (i=0;i<cur->child_cnt;i++)
+    dump_pivots(cur->child[i], nest + 1);
+
+}
+
+
+/* Cleans up pivot structure for memory debugging. */
+
+static void dealloc_pivots(struct pivot_desc* cur) {
+  u32 i;
+
+  if (!cur) cur = &root_pivot;
+
+  if (cur->req) destroy_request(cur->req);
+  if (cur->res) destroy_response(cur->res);
+
+  ck_free(cur->name);
+
+  if (cur->try_cnt) {
+    for (i=0;i<cur->try_cnt;i++) ck_free(cur->try_list[i]);
+    ck_free(cur->try_list);
+  }
+
+  if (cur->issue) {
+    for (i=0;i<cur->issue_cnt;i++) {
+      ck_free(cur->issue[i].extra);
+      if (cur->issue[i].req) destroy_request(cur->issue[i].req);
+      if (cur->issue[i].res) destroy_response(cur->issue[i].res);
+    }
+    ck_free(cur->issue);
+  }
+
+  for (i=0;i<cur->child_cnt;i++)
+    dealloc_pivots(cur->child[i]);
+
+  ck_free(cur->child);
+
+  if (cur != &root_pivot) ck_free(cur);
+
+}
+
+
+
+/* Creates a new XSS location tag. */
+
+u8* new_xss_tag(u8* prefix) {
+  static u8* ret;
+
+  if (ret) free(ret);
+  ret = __DFL_ck_alloc((prefix ? strlen((char*)prefix) : 0) + 32);
+
+  if (!scan_id) scan_id = R(999999) + 1;
+
+  sprintf((char*)ret, "%s>\">'>'\"<sfi%06uv%06u>",
+          prefix ? prefix : (u8*)"", cur_xss_id, scan_id);
+
+  return ret;
+
+}
+
+
+/* Registers last XSS tag along with a completed http_request */
+
+void register_xss_tag(struct http_request* req) {
+  xss_req = ck_realloc(xss_req, (cur_xss_id + 1) *
+                       (sizeof(struct http_request*)));
+  xss_req[cur_xss_id] = req_copy(req, 0, 1);
+  cur_xss_id++;
+}
+
+
+/* Gets the request that submitted the tag in the first place */
+
+struct http_request* get_xss_request(u32 xid, u32 sid) {
+  if (sid != scan_id || xid >= cur_xss_id) return NULL;
+  return xss_req[xid];
+}
+
+
+/* Cleans up other database entries, for memory profiling purposes. */
+
+void destroy_database() {
+  u32 i, kh;
+
+  dealloc_pivots(0);
+
+  ck_free(deny_urls);
+  ck_free(deny_strings);
+  ck_free(allow_urls);
+  ck_free(allow_domains);
+  ck_free(trust_domains);
+
+  for (kh=0;kh<WORD_HASH;kh++) {
+    for (i=0;i<keyword_cnt[kh];i++) ck_free(keyword[kh][i].word);
+    ck_free(keyword[kh]);
+  }
+
+  /* Extensions just referenced keyword[][].word entries. */
+  ck_free(extension);
+
+  for (i=0;i<guess_cnt;i++) ck_free(guess[i]);
+  ck_free(guess);
+
+  for (i=0;i<cur_xss_id;i++) destroy_request(xss_req[i]);
+  ck_free(xss_req);
+
+}
+
diff --git a/database.h b/database.h
new file mode 100644
index 0000000..c92865c
--- /dev/null
+++ b/database.h
@@ -0,0 +1,406 @@
+/*
+   skipfish - database & crawl management
+   --------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_DATABASE_H
+#define _HAVE_DATABASE_H
+
+#include "debug.h"
+#include "config.h"
+#include "types.h"
+#include "http_client.h"
+
+/* Testing pivot points - used to organize the scan: */
+
+/* - Pivot types: */
+
+#define PIVOT_NONE              0               /* Invalid                   */
+#define PIVOT_ROOT              1               /* Root pivot                */
+
+#define PIVOT_SERV              10              /* Top-level host pivot      */
+#define PIVOT_DIR               11              /* Directory pivot           */
+#define PIVOT_FILE              12              /* File pivot                */
+#define PIVOT_PATHINFO          13              /* PATH_INFO script          */
+
+#define PIVOT_UNKNOWN           18              /* (Currently) unknown type  */
+
+#define PIVOT_PARAM             100             /* Parameter fuzzing pivot   */
+#define PIVOT_VALUE             101             /* Parameter value pivot     */
+
+/* - Pivot states (initialized to PENDING or FETCH by database.c, then
+     advanced by crawler.c): */
+
+#define PSTATE_NONE             0               /* Invalid                   */
+#define PSTATE_PENDING          1               /* Pending parent tests      */
+
+#define PSTATE_FETCH            10              /* Initial data fetch        */
+
+#define PSTATE_TYPE_CHECK       20              /* Type check (unknown only) */
+#define PSTATE_404_CHECK        22              /* 404 check (dir only)      */
+#define PSTATE_IPS_CHECK        25              /* IPS filtering check       */
+
+/* For directories only (injecting children nodes): */
+
+#define PSTATE_CHILD_INJECT     50              /* Common security attacks   */
+#define PSTATE_CHILD_DICT       55              /* Dictionary brute-force    */
+
+/* For parametric nodes only (replacing parameter value): */
+
+#define PSTATE_PAR_CHECK        60              /* Parameter works at all?   */
+#define PSTATE_PAR_INJECT       65              /* Common security attacks   */
+#define PSTATE_PAR_NUMBER       70              /* Numeric ID traversal      */
+#define PSTATE_PAR_DICT         75              /* Dictionary brute-force    */
+#define PSTATE_PAR_TRYLIST      99              /* 'Try list' fetches        */
+
+#define PSTATE_DONE             100             /* Analysis done             */
+
+/* - Descriptor of a pivot point: */
+
+struct pivot_desc {
+  u8 type;                                      /* PIVOT_*                   */
+  u8 state;                                     /* PSTATE_*                  */
+  u8 linked;                                    /* Linked to? (0/1/2)        */
+  u8 missing;                                   /* Determined to be missing? */
+
+  u8 csens;                                     /* Case sensitive names?     */
+  u8 c_checked;                                 /* csens check done?         */
+
+  u8* name;                                     /* Directory / script name   */
+
+  struct http_request* req;                     /* Prototype HTTP request    */
+
+  s32  fuzz_par;                                /* Fuzz target parameter     */
+  u8** try_list;                                /* Values to try             */
+  u32  try_cnt;                                 /* Number of values to try   */
+  u32  try_cur;                                 /* Last tested try list offs */
+
+  struct pivot_desc* parent;                    /* Parent pivot, if any      */
+  struct pivot_desc** child;                    /* List of children          */
+  u32 child_cnt;                                /* Number of children        */
+
+  struct issue_desc* issue;                     /* List of issues found      */
+  u32 issue_cnt;                                /* Number of issues          */
+
+  struct http_response* res;                    /* HTTP response seen        */
+
+  u8 res_varies;                                /* Response varies?          */
+
+  /* Fuzzer and probe state data: */
+
+  u8 no_fuzz;                                   /* Do not attepmt fuzzing.   */
+
+  u8  uses_ips;                                 /* Uses IPS filtering?       */
+
+  u32 cur_key;                                  /* Current keyword           */
+  u32 pdic_cur_key;                             /* ...for param dict         */
+
+  u8 guess;                                     /* Guess list keywords?      */
+  u8 pdic_guess;                                /* ...for param dict         */
+
+  u32 pending;                                  /* Number of pending reqs    */
+  u32 pdic_pending;                             /* ...for param dict         */
+  u32 num_pending;                              /* ...for numerical enum     */
+  u32 try_pending;                              /* ...for try list           */
+  u32 r404_pending;                             /* ...for 404 probes         */
+  u32 ck_pending;                               /* ...for behavior checks    */
+
+  struct http_sig r404[MAX_404];                /* 404 response signatures   */
+  u32 r404_cnt;                                 /* Number of sigs collected  */
+  struct http_sig unk_sig;                      /* Original "unknown" sig.   */
+
+  /* Injection attack logic scratchpad: */
+
+  struct http_request*  misc_req[10];           /* Saved requests            */
+  struct http_response* misc_res[10];           /* Saved responses           */
+  u8 misc_cnt;                                  /* Request / response count  */
+
+  u8 i_skip[15];                                /* Injection step skip flags */
+  u8 i_skip_add;
+  u8 r404_skip;
+
+  u8 bogus_par;                                 /* fuzz_par does nothing?    */
+
+  u8 ognl_check;                                /* OGNL check flags          */
+
+  /* Reporting information: */
+
+  u32 total_child_cnt;                          /* All children              */
+  u32 total_issues[6];                          /* Issues by severity        */
+  u8  dupe;                                     /* Looks like a duplicate?   */
+  u32 pv_sig;                                   /* Simple pivot signature    */
+
+};
+
+extern struct pivot_desc root_pivot;
+
+/* Maps a parsed URL (in req) to the pivot tree, creating or modifying nodes
+   as necessary, and scheduling them for crawl; via_link should be 1 if the
+   URL came from an explicit link or user input, 0 if brute-forced.
+
+   Always makes a copy of req, res; they can be destroyed safely; via_link
+   set to 2 means we're sure it's a valid link; 1 means "probably". */
+
+void maybe_add_pivot(struct http_request* req, struct http_response* res,
+                     u8 via_link);
+
+/* Creates a working copy of a request for use in db and crawl functions. If all
+   is 0, does not copy path, query parameters, or POST data (but still
+   copies headers); and forces GET method. */
+
+struct http_request* req_copy(struct http_request* req,
+                              struct pivot_desc* pv, u8 all);
+
+/* Finds the host-level pivot point for global issues. */
+
+struct pivot_desc* host_pivot(struct pivot_desc* pv);
+
+/* Case sensitivity helper. */
+
+u8 is_c_sens(struct pivot_desc* pv);
+
+/* Recorded security issues: */
+
+/* - Informational data (non-specific security-relevant notes): */
+
+#define PROB_NONE               0               /* Invalid                   */
+
+#define PROB_SSL_CERT           10101           /* SSL issuer data           */
+
+#define PROB_NEW_COOKIE         10201           /* New cookie added          */
+#define PROB_SERVER_CHANGE      10202           /* New Server: value seen    */
+#define PROB_VIA_CHANGE         10203           /* New Via: value seen       */
+#define PROB_X_CHANGE           10204           /* New X-*: value seen       */
+#define PROB_NEW_404            10205           /* New 404 signatures seen   */
+
+#define PROB_NO_ACCESS          10401           /* Resource not accessible   */
+#define PROB_AUTH_REQ           10402           /* Authentication requires   */
+#define PROB_SERV_ERR           10403           /* Server error              */
+
+#define PROB_EXT_LINK           10501           /* External link             */
+#define PROB_EXT_REDIR          10502           /* External redirector       */
+#define PROB_MAIL_ADDR          10503           /* E-mail address seen       */
+#define PROB_UNKNOWN_PROTO      10504           /* Unknown protocol in URL   */
+#define PROB_UNKNOWN_FIELD      10505           /* Unknown form field        */
+
+#define PROB_FORM               10601           /* XSRF-safe form            */
+#define PROB_PASS_FORM          10602           /* Password form             */
+
+#define PROB_USER_LINK          10701           /* User-supplied A link      */
+
+#define PROB_BAD_MIME_STAT      10801           /* Bad MIME type, low risk   */
+#define PROB_GEN_MIME_STAT      10802           /* Generic MIME, low risk    */
+#define PROB_BAD_CSET_STAT      10803           /* Bad charset, low risk     */
+#define PROB_CFL_HDRS_STAT      10804           /* Conflicting hdr, low risk */
+
+#define PROB_FUZZ_DIGIT         10901           /* Try fuzzing file name     */
+#define PROB_OGNL               10902           /* OGNL-like parameter       */
+
+/* - Internal warnings (scan failures, etc): */
+
+#define PROB_FETCH_FAIL         20101           /* Fetch failed.             */
+#define PROB_LIMITS             20102           /* Crawl limits exceeded.    */
+
+#define PROB_404_FAIL           20201           /* Behavior probe failed.    */
+#define PROB_IPS_FILTER         20202           /* IPS behavior detected.    */
+#define PROB_IPS_FILTER_OFF     20203           /* IPS no longer active.     */
+#define PROB_VARIES             20204           /* Response varies.          */
+
+#define PROB_NOT_DIR            20301           /* Node should be a dir.     */
+
+/* - Low severity issues (limited impact or check specificity): */
+
+#define PROB_URL_AUTH           30101           /* HTTP credentials in URL   */
+
+#define PROB_SSL_CERT_DATE      30201           /* SSL cert date invalid     */
+#define PROB_SSL_SELF_CERT      30202           /* Self-signed SSL cert      */
+#define PROB_SSL_BAD_HOST       30203           /* Certificate host mismatch */
+#define PROB_SSL_NO_CERT        30204           /* No certificate data?      */
+
+#define PROB_DIR_LIST           30301           /* Dir listing bypass        */
+
+#define PROB_URL_REDIR          30401           /* URL redirection           */
+#define PROB_USER_URL           30402           /* URL content inclusion     */
+
+#define PROB_EXT_OBJ            30501           /* External obj standalone   */
+#define PROB_MIXED_OBJ          30502           /* Mixed content standalone  */
+
+#define PROB_VULN_FORM          30601           /* Form w/o anti-XSRF token  */
+#define PROB_JS_XSSI            30602           /* Script with no XSSI prot  */
+
+#define PROB_CACHE_LOW          30701           /* Cache nit-picking         */
+
+/* - Moderate severity issues (data compromise): */
+
+#define PROB_BODY_XSS           40101           /* Document body XSS         */
+#define PROB_URL_XSS            40102           /* URL-based XSS             */
+#define PROB_HTTP_INJECT        40103           /* Header splitting          */
+#define PROB_USER_URL_ACT       40104           /* Active user content       */
+
+#define PROB_EXT_SUB            40201           /* External subresource      */
+#define PROB_MIXED_SUB          40202           /* Mixed content subresource */
+
+#define PROB_BAD_MIME_DYN       40301           /* Bad MIME type, hi risk    */
+#define PROB_GEN_MIME_DYN       40302           /* Generic MIME, hi risk     */
+#define PROB_BAD_CSET_DYN       40304           /* Bad charset, hi risk      */
+#define PROB_CFL_HDRS_DYN       40305           /* Conflicting hdr, hi risk  */
+
+#define PROB_FILE_POI           40401           /* Interesting file          */
+#define PROB_ERROR_POI          40402           /* Interesting error message */
+
+#define PROB_DIR_TRAVERSAL      40501           /* Directory traversal       */
+
+#define PROB_CACHE_HI           40601           /* Serious caching issues    */
+
+/* - High severity issues (system compromise): */
+
+#define PROB_XML_INJECT         50101           /* Backend XML injection     */
+#define PROB_SH_INJECT          50102           /* Shell cmd injection       */
+#define PROB_SQL_INJECT         50103           /* SQL injection             */
+#define PROB_FMT_STRING         50104           /* Format string attack      */
+#define PROB_INT_OVER           50105           /* Integer overflow attack   */
+
+#define PROB_SQL_PARAM          50201           /* SQL-like parameter        */
+
+/* - Severity macros: */
+
+#define PSEV(_x) ((_x) / 10000)
+#define PSEV_INFO 1
+#define PSEV_WARN 2
+#define PSEV_LOW  3
+#define PSEV_MED  4
+#define PSEV_HI   5
+
+/* Issue descriptor: */
+
+struct issue_desc {
+  u32   type;                                   /* PROB_*                    */
+  u8*   extra;                                  /* Problem-specific string   */
+  struct http_request* req;                     /* HTTP request sent         */
+  struct http_response* res;                    /* HTTP response seen        */
+};
+
+/* Register a problem, if not duplicate (res, extra may be NULL): */
+
+void problem(u32 type, struct http_request* req, struct http_response* res,
+             u8* extra, struct pivot_desc* pv, u8 allow_dup);
+
+/* Compare the checksums for two responses: */
+
+u8 same_page(struct http_sig* sig1, struct http_sig* sig2);
+
+/* URL filtering constraints (exported from database.c): */
+
+#define APPEND_FILTER(_ptr, _cnt, _val) do { \
+   (_ptr) = ck_realloc(_ptr, ((_cnt) + 1) * sizeof(u8*)); \
+   (_ptr)[_cnt] = (u8*)(_val); \
+   (_cnt)++; \
+ } while (0)
+
+extern u8 **deny_urls, **deny_strings, **allow_urls, **allow_domains,
+          **trust_domains;
+
+extern u32 num_deny_urls,
+           num_deny_strings,
+           num_allow_urls,
+           num_allow_domains,
+           num_trust_domains;
+
+extern u32 max_depth,
+           max_children,
+           max_trylist,
+           max_guesses;
+
+/* Check if the URL is permitted under current rules (0 = no, 1 = yes): */
+
+u8 url_allowed_host(struct http_request* req);
+u8 url_trusted_host(struct http_request* req);
+u8 url_allowed(struct http_request* req);
+
+/* Keyword management: */
+
+extern u8  dont_add_words;
+
+/* Adds a new keyword candidate to the "guess" list. */
+
+void wordlist_add_guess(u8* text);
+
+/* Adds non-sanitized keywords to the list. */
+
+void wordlist_confirm_word(u8* text);
+
+/* Returns wordlist item at a specified offset (NULL if no more available). */
+
+u8* wordlist_get_word(u32 offset);
+
+/* Returns keyword candidate at a specified offset (or NULL). */
+
+u8* wordlist_get_guess(u32 offset);
+
+/* Returns extension at a specified offset (or NULL). */
+
+u8* wordlist_get_extension(u32 offset);
+
+/* Loads keywords from file. */
+
+void load_keywords(u8* fname, u32 purge_age);
+
+/* Saves all keywords to a file. */
+
+void save_keywords(u8* fname);
+
+/* Database maintenance: */
+
+/* Dumps pivot database, for debugging purposes. */
+
+void dump_pivots(struct pivot_desc* cur, u8 nest);
+
+/* Deallocates all data, for debugging purposes. */
+
+void destroy_database();
+
+/* Prints DB stats. */
+
+void database_stats();
+
+/* XSS manager: */
+
+/* Creates a new stored XSS id (buffer valid only until next call). */
+
+u8* new_xss_tag(u8* prefix);
+
+/* Registers last XSS tag along with a completed http_request. */
+
+void register_xss_tag(struct http_request* req);
+
+/* Returns request associated with a stored XSS id. */
+
+struct http_request* get_xss_request(u32 xid, u32 sid);
+
+/* Dumps signature data: */
+
+void dump_signature(struct http_sig* sig);
+
+/* Displays debug information for same_page() checks. */
+
+void debug_same_page(struct http_sig* sig1, struct http_sig* sig2);
+
+#endif /* _HAVE_DATABASE_H */
+
diff --git a/debug.h b/debug.h
new file mode 100644
index 0000000..d7e0b78
--- /dev/null
+++ b/debug.h
@@ -0,0 +1,96 @@
+/*
+
+   skipfish - debugging and messaging macros
+   -----------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+*/
+
+#ifndef _HAVE_DEBUG_H
+#define _HAVE_DEBUG_H
+
+#include <stdio.h>
+#include "config.h"
+
+#ifdef USE_COLOR
+#  define cBLK "\x1b[0;30m"
+#  define cRED "\x1b[0;31m"
+#  define cGRN "\x1b[0;32m"
+#  define cBRN "\x1b[0;33m"
+#  define cBLU "\x1b[0;34m"
+#  define cMGN "\x1b[0;35m"
+#  define cCYA "\x1b[0;36m"
+#  define cNOR "\x1b[0;37m"
+#  define cGRA "\x1b[1;30m"
+#  define cLRD "\x1b[1;31m"
+#  define cLGN "\x1b[1;32m"
+#  define cYEL "\x1b[1;33m"
+#  define cLBL "\x1b[1;34m"
+#  define cPIN "\x1b[1;35m"
+#  define cLCY "\x1b[1;36m"
+#  define cBRI "\x1b[1;37m"
+#else
+#  define cBLK
+#  define cRED
+#  define cGRN
+#  define cBRN
+#  define cBLU
+#  define cMGN
+#  define cCYA
+#  define cNOR
+#  define cGRA
+#  define cLRD
+#  define cLGN
+#  define cYEL
+#  define cLBL
+#  define cPIN
+#  define cLCY
+#  define cBRI
+#endif /* ^USE_COLOR */
+
+#ifdef LOG_STDERR
+#  define DEBUG(x...) fprintf(stderr,x)
+#else
+#  define DEBUG(x...)
+#endif /* ^LOG_STDERR */
+
+#define F_DEBUG(x...) fprintf(stderr,x)
+#define SAY(x...)   printf(x)
+
+#define WARN(x...) do { \
+    F_DEBUG(cYEL "[!] WARNING: " cBRI x); \
+    F_DEBUG(cNOR "\n"); \
+  } while (0)
+
+#define FATAL(x...) do { \
+    F_DEBUG(cLRD "[-] PROGRAM ABORT : " cBRI x); \
+    F_DEBUG(cLRD "\n    Stop location : " cNOR "%s(), %s:%u\n", \
+            __FUNCTION__, __FILE__, __LINE__); \
+    exit(1); \
+  } while (0)
+
+#define PFATAL(x...) do { \
+    F_DEBUG(cLRD "[-]  SYSTEM ERROR : " cBRI x); \
+    F_DEBUG(cLRD "\n    Stop location : " cNOR "%s(), %s:%u\n", \
+            __FUNCTION__, __FILE__, __LINE__); \
+    perror(cLRD "       OS message " cNOR); \
+    exit(1); \
+  } while (0)
+
+
+#endif /* ! _HAVE_DEBUG_H */
diff --git a/dictionaries/README-FIRST b/dictionaries/README-FIRST
new file mode 100644
index 0000000..cfa49df
--- /dev/null
+++ b/dictionaries/README-FIRST
@@ -0,0 +1,186 @@
+This directory contains four alternative, hand-picked Skipfish dictionaries.
+
+Before you pick one, you should understand several basic concepts related to
+dictionary management in this scanner, as this topic is of critical importance
+to the quality of your scans.
+
+-----------------------------
+Dictionary management basics:
+-----------------------------
+
+1) Each dictionary may consist of a number of extensions, and a number of
+   "regular" keywords. Extensions are considered just a special subset of
+   the keyword list.
+
+2) You can specify the dictionary to use with a -W option. The file must
+   conform to the following format:
+
+   type hits total_age last_age keyword
+
+   ...where 'type' is either 'e' or 'w' (extension or wordlist); 'hits'
+   is the total number of times this keyword resulted in a non-404 hit
+   in all previous scans; 'total_age' is the number of scan cycles this
+   word is in the dictionary; 'last_age' is the number of scan cycles
+   since the last 'hit'; and 'keyword' is the actual keyword.
+
+   Do not duplicate extensions as keywords - if you already have 'html' as
+   an 'e' entry, there is no need to also create a 'w' one.
+
+   There must be no empty or malformed lines, comments, etc, in the wordlist
+   file. Extension keywords must have no leading dot (e.g., 'exe', not '.exe'),
+   and all keywords should be NOT url-encoded (e.g., 'Program Files', not
+   'Program%20Files'). No keyword should exceed 64 characters.
+
+   If you omit -W in the command line, 'skipfish.wl' is assumed.
+
+3) When loading a dictionary, you can use -R option to drop any entries
+   that had no hits for a specified number of scans.
+
+4) Unless -L is specified in the command line, the scanner will also
+   automatically learn new keywords and extensions based on any links
+   discovered during the scan.
+
+5) Unless -L is specified, the scanner will also analyze pages and extract
+   words that would serve as keyword guesses. A capped number of guesses
+   is maintained by the scanner, with older entries being removed from the
+   list as new ones are found (the size of this jar is adjustable with the
+   -G option).
+
+   These guesses would be tested along with regular keywords during brute-force
+   steps. If they result in a non-404 hit at some point, they are promoted to
+   the "proper" keyword list.
+
+6) Unless -V is specified in the command line, all newly discovered keywords
+   are saved back to the input wordlist file, along with their hit statistics.
+
+----------------------------------------------
+Dictionaries are used for the following tasks:
+----------------------------------------------
+
+1) When a new directory, or a file-like query or POST parameter is discovered,
+   the scanner attempts passing all possible <keyword> values to discover new
+   files, directories, etc.
+
+2) If you did NOT specify -Y in the command line, the scanner also tests all
+   possible <keyword>.<extension> pairs in these cases. Note that this may 
+   result in several orders of magnitude more requests, but is the only way
+   to discover files such as 'backup.tar.gz', 'database.csv', etc.
+
+3) For any non-404 file or directory discovered by any other means, the scanner
+   also attempts all <node_filename>.<extension> combinations, to discover,
+   for example, entries such as 'index.php.old'.
+
+----------------------
+Supplied dictionaries:
+----------------------
+
+1) Empty dictionary (-).
+
+   Simply create an empty file, then load it via -W. If you use this option
+   in conjunction with -L, this essentially inhibits all brute-force testing,
+   and results in an orderly, link-based crawl.
+
+   If -L is not used, the crawler will still attempt brute-force, but only
+   based on the keywords and extensions discovered when crawling the site.
+   This means it will likely learn keywords such as 'index' or extensions
+   such as 'html' - but may never attempt probing for 'log', 'old', 'bak', etc.
+
+   Both these variants are very useful for lightweight scans, but are not
+   particularly exhaustive.
+
+2) Extension-only dictionary (extensions-only.wl).
+
+   This dictionary contains about 90 common file extensions, and no other
+   keywords. It must be used in conjunction with -Y (otherwise, it will not
+   behave as expected).
+
+   This is often a better alternative to a null dictionary: the scanner will
+   still limit brute-force primarily to file names learned on the site, but
+   will know about extensions such as 'log' or 'old', and will test for them
+   accordingly.
+
+3) Basic extensions dictionary (minimal.wl).
+
+   This dictionary contains about 25 extensions, focusing on common entries
+   most likely to spell trouble (.bak, .old, .conf, .zip, etc); and about 1,700
+   hand-picked keywords.
+
+   This is useful for quick assessments where no obscure technologies are used.
+   The principal scan cost is about 42,000 requests per each fuzzed directory.
+   Using it without -L is recommended, as the list of extensions does not
+   include standard framework-specific cases (.asp, .jsp, .php, etc), and
+   these are best learned on the fly.
+
+   You can also use this dictionary with -Y option enabled, approximating the
+   behavior of most other security scanners; in this case, it will send only
+   about 1,700 requests per directory, and will look for 25 secondary extensions
+   only on otherwise discovered resources.
+
+3) Standard extensions dictionary (default.wl).
+
+   This dictionary contains about 60 common extensions, plus the same set of
+   1,700 keywords. The extensions cover most of the common, interesting web
+   resources.
+
+   This is a good starting point for assessments where scan times are not
+   a critical factor; the cost is about 100,000 requests per each fuzzed
+   directory.
+
+   In -Y mode, it behaves nearly identical to minimal.wl, but will test a
+   greater set of extensions on otherwise discovered resources, at a relatively
+   minor expense.
+
+4) Complete extensions dictionary (complete.wl).
+
+   Contains about 90 common extensions and 1,700 keywords. These extensions
+   cover a broader range of media types, including some less common programming
+   languages, image and video formats, etc.
+
+   Useful for comprehensive assessments, over 150,000 requests per each fuzzed
+   directory.
+
+   In -Y mode - see default.wl, offers the best coverage of all three wordlists
+   at a relatively low cost.
+
+Of course, you can customize these dictionaries as seen fit. It might be, for
+example, a good idea to downgrade file extensions not likely to occur given
+the technologies used by your target host to regular 'w' records.
+
+Whichever option you choose, be sure to make a *copy* of this dictionary, and
+load that copy, not the original, via -W. The specified file will be overwritten
+with site-specific information (unless -V used).
+
+----------------------------------
+Bah, these dictionaries are small!
+----------------------------------
+
+Keep in mind that web crawling is not password guessing; it is exceedingly
+unlikely for web servers to have directories or files named 'henceforth',
+'abating', or 'witlessly'. Because of this, using 200,000+ entry English
+wordlists, or similar data sets, is largely pointless.
+
+More importantly, doing so often leads to reduced coverage or unacceptable
+scan times; with a 200k wordlist and 80 extensions, trying all combinations
+for a single directory would take 30-40 hours against a slow server; and even
+with a fast one, at least 5 hours is to be expected.
+
+DirBuster uses a unique approach that seems promising at first sight - to
+base their wordlists depending on how often a particular keyword appeared in
+URLs seen on the Internet. This is interesting, but comes with two gotchas:
+
+  - Keywords related to popular websites and brands are heavily
+    overrepresented; DirBuster wordlists have 'bbc_news_24', 'beebie_bunny',
+    and 'koalabrothers' near the top of their list, but it is pretty unlikely
+    these keywords would be of any use in real-world assessments of a typical
+    site, unless it happens to be BBC.
+
+  - Some of the most interesting security-related keywords are not commonly
+    indexed, and may appear, say, on no more than few dozen or few thousand
+    crawled websites in Google index. But, that does not make 'AggreSpy' or
+    '.ssh/authorized_keys' any less interesting.
+
+Bottom line is, poor wordlists are one of the reasons why some other web
+security scanners perform worse than expected, so please - be careful. You will
+almost always be better off narrowing down or selectively extending the
+supplied set (and possibly contributing back your changes upstream!), than
+importing a giant wordlist from elsewhere.
diff --git a/dictionaries/complete.wl b/dictionaries/complete.wl
new file mode 100644
index 0000000..1f150d5
--- /dev/null
+++ b/dictionaries/complete.wl
@@ -0,0 +1,1894 @@
+e 1 1 1 7z
+e 1 1 1 asmx
+e 1 1 1 asp
+e 1 1 1 aspx
+e 1 1 1 bak
+e 1 1 1 bat
+e 1 1 1 bin
+e 1 1 1 bz2
+e 1 1 1 c
+e 1 1 1 cc
+e 1 1 1 cfg
+e 1 1 1 cfm
+e 1 1 1 cgi
+e 1 1 1 class
+e 1 1 1 cnf
+e 1 1 1 conf
+e 1 1 1 config
+e 1 1 1 cpp
+e 1 1 1 cs
+e 1 1 1 csv
+e 1 1 1 dat
+e 1 1 1 db
+e 1 1 1 dll
+e 1 1 1 do
+e 1 1 1 doc
+e 1 1 1 dump
+e 1 1 1 ep
+e 1 1 1 err
+e 1 1 1 error
+e 1 1 1 exe
+e 1 1 1 gif
+e 1 1 1 gz
+e 1 1 1 htm
+e 1 1 1 html
+e 1 1 1 inc
+e 1 1 1 ini
+e 1 1 1 java
+e 1 1 1 jhtml
+e 1 1 1 jpg
+e 1 1 1 js
+e 1 1 1 jsf
+e 1 1 1 jsp
+e 1 1 1 key
+e 1 1 1 lib
+e 1 1 1 log
+e 1 1 1 lst
+e 1 1 1 manifest
+e 1 1 1 mdb
+e 1 1 1 meta
+e 1 1 1 msg
+e 1 1 1 nsf
+e 1 1 1 o
+e 1 1 1 old
+e 1 1 1 ora
+e 1 1 1 orig
+e 1 1 1 out
+e 1 1 1 part
+e 1 1 1 pdf
+e 1 1 1 php
+e 1 1 1 php3
+e 1 1 1 pl
+e 1 1 1 pm
+e 1 1 1 png
+e 1 1 1 ppt
+e 1 1 1 properties
+e 1 1 1 py
+e 1 1 1 rar
+e 1 1 1 rss
+e 1 1 1 rtf
+e 1 1 1 save
+e 1 1 1 sh
+e 1 1 1 shtml
+e 1 1 1 so
+e 1 1 1 sql
+e 1 1 1 stackdump
+e 1 1 1 swf
+e 1 1 1 tar
+e 1 1 1 tar.bz2
+e 1 1 1 tar.gz
+e 1 1 1 temp
+e 1 1 1 test
+e 1 1 1 tgz
+e 1 1 1 tmp
+e 1 1 1 trace
+e 1 1 1 txt
+e 1 1 1 vb
+e 1 1 1 vbs
+e 1 1 1 ws
+e 1 1 1 xls
+e 1 1 1 xml
+e 1 1 1 xsl
+e 1 1 1 zip
+w 1 1 1 .bash_history
+w 1 1 1 .bashrc
+w 1 1 1 .cvsignore
+w 1 1 1 .history
+w 1 1 1 .htaccess
+w 1 1 1 .htpasswd
+w 1 1 1 .passwd
+w 1 1 1 .perf
+w 1 1 1 .ssh
+w 1 1 1 .svn
+w 1 1 1 .web
+w 1 1 1 0
+w 1 1 1 00
+w 1 1 1 01
+w 1 1 1 02
+w 1 1 1 03
+w 1 1 1 04
+w 1 1 1 05
+w 1 1 1 06
+w 1 1 1 07
+w 1 1 1 08
+w 1 1 1 09
+w 1 1 1 1
+w 1 1 1 10
+w 1 1 1 100
+w 1 1 1 1000
+w 1 1 1 1001
+w 1 1 1 101
+w 1 1 1 11
+w 1 1 1 12
+w 1 1 1 13
+w 1 1 1 14
+w 1 1 1 15
+w 1 1 1 1990
+w 1 1 1 1991
+w 1 1 1 1992
+w 1 1 1 1993
+w 1 1 1 1994
+w 1 1 1 1995
+w 1 1 1 1996
+w 1 1 1 1997
+w 1 1 1 1998
+w 1 1 1 1999
+w 1 1 1 2
+w 1 1 1 20
+w 1 1 1 200
+w 1 1 1 2000
+w 1 1 1 2001
+w 1 1 1 2002
+w 1 1 1 2003
+w 1 1 1 2004
+w 1 1 1 2005
+w 1 1 1 2006
+w 1 1 1 2007
+w 1 1 1 2008
+w 1 1 1 2009
+w 1 1 1 2010
+w 1 1 1 2011
+w 1 1 1 2012
+w 1 1 1 21
+w 1 1 1 22
+w 1 1 1 23
+w 1 1 1 24
+w 1 1 1 25
+w 1 1 1 2g
+w 1 1 1 3
+w 1 1 1 300
+w 1 1 1 3g
+w 1 1 1 4
+w 1 1 1 42
+w 1 1 1 5
+w 1 1 1 50
+w 1 1 1 500
+w 1 1 1 51
+w 1 1 1 6
+w 1 1 1 7
+w 1 1 1 8
+w 1 1 1 9
+w 1 1 1 ADM
+w 1 1 1 ADMIN
+w 1 1 1 AggreSpy
+w 1 1 1 AppsLocalLogin
+w 1 1 1 AppsLogin
+w 1 1 1 BUILD
+w 1 1 1 CMS
+w 1 1 1 CVS
+w 1 1 1 DB
+w 1 1 1 DMSDump
+w 1 1 1 Documents and Settings
+w 1 1 1 Entries
+w 1 1 1 FCKeditor
+w 1 1 1 JMXSoapAdapter
+w 1 1 1 LICENSE
+w 1 1 1 MANIFEST.MF
+w 1 1 1 META-INF
+w 1 1 1 Makefile
+w 1 1 1 OA
+w 1 1 1 OAErrorDetailPage
+w 1 1 1 OA_HTML
+w 1 1 1 Program Files
+w 1 1 1 README
+w 1 1 1 Readme
+w 1 1 1 Recycled
+w 1 1 1 Root
+w 1 1 1 SQL
+w 1 1 1 SUNWmc
+w 1 1 1 SiteScope
+w 1 1 1 SiteServer
+w 1 1 1 Spy
+w 1 1 1 TEMP
+w 1 1 1 TMP
+w 1 1 1 TODO
+w 1 1 1 Thumbs.db
+w 1 1 1 WEB-INF
+w 1 1 1 WS_FTP
+w 1 1 1 XXX
+w 1 1 1 _
+w 1 1 1 _adm
+w 1 1 1 _admin
+w 1 1 1 _files
+w 1 1 1 _include
+w 1 1 1 _js
+w 1 1 1 _mem_bin
+w 1 1 1 _old
+w 1 1 1 _pages
+w 1 1 1 _private
+w 1 1 1 _res
+w 1 1 1 _source
+w 1 1 1 _src
+w 1 1 1 _test
+w 1 1 1 _vti_bin
+w 1 1 1 _vti_cnf
+w 1 1 1 _vti_pvt
+w 1 1 1 _vti_txt
+w 1 1 1 _www
+w 1 1 1 a
+w 1 1 1 aa
+w 1 1 1 aaa
+w 1 1 1 abc
+w 1 1 1 abc123
+w 1 1 1 abcd
+w 1 1 1 abcd1234
+w 1 1 1 about
+w 1 1 1 access
+w 1 1 1 access-log
+w 1 1 1 access-log.1
+w 1 1 1 access.1
+w 1 1 1 access_log
+w 1 1 1 access_log.1
+w 1 1 1 accessibility
+w 1 1 1 account
+w 1 1 1 accounting
+w 1 1 1 accounts
+w 1 1 1 action
+w 1 1 1 actions
+w 1 1 1 active
+w 1 1 1 activex
+w 1 1 1 ad
+w 1 1 1 adclick
+w 1 1 1 add
+w 1 1 1 addressbook
+w 1 1 1 adm
+w 1 1 1 admin
+w 1 1 1 admin_
+w 1 1 1 ads
+w 1 1 1 adv
+w 1 1 1 advertise
+w 1 1 1 advertising
+w 1 1 1 affiliate
+w 1 1 1 affiliates
+w 1 1 1 agenda
+w 1 1 1 agent
+w 1 1 1 agents
+w 1 1 1 ajax
+w 1 1 1 album
+w 1 1 1 albums
+w 1 1 1 alert
+w 1 1 1 alerts
+w 1 1 1 alias
+w 1 1 1 aliases
+w 1 1 1 all
+w 1 1 1 alpha
+w 1 1 1 alumni
+w 1 1 1 analog
+w 1 1 1 announcement
+w 1 1 1 announcements
+w 1 1 1 anon
+w 1 1 1 anonymous
+w 1 1 1 ansi
+w 1 1 1 apac
+w 1 1 1 apache
+w 1 1 1 apexec
+w 1 1 1 api
+w 1 1 1 apis
+w 1 1 1 app
+w 1 1 1 appeal
+w 1 1 1 appeals
+w 1 1 1 append
+w 1 1 1 appl
+w 1 1 1 apple
+w 1 1 1 appliation
+w 1 1 1 applications
+w 1 1 1 apps
+w 1 1 1 apr
+w 1 1 1 arch
+w 1 1 1 archive
+w 1 1 1 archives
+w 1 1 1 array
+w 1 1 1 art
+w 1 1 1 article
+w 1 1 1 articles
+w 1 1 1 artwork
+w 1 1 1 ascii
+w 1 1 1 asdf
+w 1 1 1 asset
+w 1 1 1 assets
+w 1 1 1 atom
+w 1 1 1 attach
+w 1 1 1 attachment
+w 1 1 1 attachments
+w 1 1 1 attachs
+w 1 1 1 attic
+w 1 1 1 audio
+w 1 1 1 audit
+w 1 1 1 audits
+w 1 1 1 auth
+w 1 1 1 author
+w 1 1 1 authorized_keys
+w 1 1 1 authors
+w 1 1 1 auto
+w 1 1 1 automatic
+w 1 1 1 automation
+w 1 1 1 avatar
+w 1 1 1 avatars
+w 1 1 1 award
+w 1 1 1 awards
+w 1 1 1 awl
+w 1 1 1 awstats
+w 1 1 1 b
+w 1 1 1 b2b
+w 1 1 1 b2c
+w 1 1 1 back
+w 1 1 1 backdoor
+w 1 1 1 backend
+w 1 1 1 backup
+w 1 1 1 backups
+w 1 1 1 bandwidth
+w 1 1 1 bank
+w 1 1 1 banks
+w 1 1 1 banner
+w 1 1 1 banners
+w 1 1 1 bar
+w 1 1 1 base
+w 1 1 1 bash
+w 1 1 1 basic
+w 1 1 1 basket
+w 1 1 1 baskets
+w 1 1 1 batch
+w 1 1 1 baz
+w 1 1 1 bb
+w 1 1 1 bb-hist
+w 1 1 1 bb-histlog
+w 1 1 1 bboard
+w 1 1 1 bbs
+w 1 1 1 beans
+w 1 1 1 beehive
+w 1 1 1 benefits
+w 1 1 1 beta
+w 1 1 1 bfc
+w 1 1 1 big
+w 1 1 1 bigip
+w 1 1 1 bill
+w 1 1 1 billing
+w 1 1 1 binaries
+w 1 1 1 binary
+w 1 1 1 bins
+w 1 1 1 bio
+w 1 1 1 bios
+w 1 1 1 biz
+w 1 1 1 bkup
+w 1 1 1 blah
+w 1 1 1 blank
+w 1 1 1 blog
+w 1 1 1 blogger
+w 1 1 1 bloggers
+w 1 1 1 blogs
+w 1 1 1 board
+w 1 1 1 bofh
+w 1 1 1 book
+w 1 1 1 books
+w 1 1 1 boot
+w 1 1 1 bottom
+w 1 1 1 broken
+w 1 1 1 broker
+w 1 1 1 browse
+w 1 1 1 bs
+w 1 1 1 bsd
+w 1 1 1 bugs
+w 1 1 1 build
+w 1 1 1 buildr
+w 1 1 1 bulk
+w 1 1 1 bullet
+w 1 1 1 business
+w 1 1 1 button
+w 1 1 1 buttons
+w 1 1 1 buy
+w 1 1 1 buynow
+w 1 1 1 bypass
+w 1 1 1 ca
+w 1 1 1 cache
+w 1 1 1 cal
+w 1 1 1 calendar
+w 1 1 1 camel
+w 1 1 1 car
+w 1 1 1 card
+w 1 1 1 cards
+w 1 1 1 career
+w 1 1 1 careers
+w 1 1 1 cars
+w 1 1 1 cart
+w 1 1 1 carts
+w 1 1 1 cat
+w 1 1 1 catalog
+w 1 1 1 catalogs
+w 1 1 1 catalyst
+w 1 1 1 categories
+w 1 1 1 category
+w 1 1 1 catinfo
+w 1 1 1 cats
+w 1 1 1 ccbill
+w 1 1 1 cd
+w 1 1 1 cerificate
+w 1 1 1 cert
+w 1 1 1 certificate
+w 1 1 1 certificates
+w 1 1 1 certs
+w 1 1 1 cf
+w 1 1 1 cfcache
+w 1 1 1 cfdocs
+w 1 1 1 cfide
+w 1 1 1 cfusion
+w 1 1 1 cgi-bin
+w 1 1 1 cgi-bin2
+w 1 1 1 cgi-home
+w 1 1 1 cgi-local
+w 1 1 1 cgi-pub
+w 1 1 1 cgi-script
+w 1 1 1 cgi-shl
+w 1 1 1 cgi-sys
+w 1 1 1 cgi-web
+w 1 1 1 cgi-win
+w 1 1 1 cgibin
+w 1 1 1 cgiwrap
+w 1 1 1 cgm-web
+w 1 1 1 change
+w 1 1 1 changed
+w 1 1 1 changes
+w 1 1 1 charge
+w 1 1 1 charges
+w 1 1 1 chat
+w 1 1 1 chats
+w 1 1 1 checkout
+w 1 1 1 child
+w 1 1 1 children
+w 1 1 1 cisco
+w 1 1 1 cisweb
+w 1 1 1 citrix
+w 1 1 1 cl
+w 1 1 1 claim
+w 1 1 1 claims
+w 1 1 1 classes
+w 1 1 1 classified
+w 1 1 1 classifieds
+w 1 1 1 clear
+w 1 1 1 click
+w 1 1 1 clicks
+w 1 1 1 client
+w 1 1 1 clientaccesspolicy
+w 1 1 1 clients
+w 1 1 1 close
+w 1 1 1 closed
+w 1 1 1 closing
+w 1 1 1 club
+w 1 1 1 cluster
+w 1 1 1 clusters
+w 1 1 1 cmd
+w 1 1 1 cms
+w 1 1 1 cnf
+w 1 1 1 cnt
+w 1 1 1 cocoon
+w 1 1 1 code
+w 1 1 1 codec
+w 1 1 1 codecs
+w 1 1 1 codes
+w 1 1 1 cognos
+w 1 1 1 coldfusion
+w 1 1 1 columns
+w 1 1 1 com
+w 1 1 1 comment
+w 1 1 1 comments
+w 1 1 1 commerce
+w 1 1 1 commercial
+w 1 1 1 common
+w 1 1 1 communicator
+w 1 1 1 community
+w 1 1 1 compact
+w 1 1 1 company
+w 1 1 1 complaint
+w 1 1 1 complaints
+w 1 1 1 compliance
+w 1 1 1 component
+w 1 1 1 compressed
+w 1 1 1 computer
+w 1 1 1 computers
+w 1 1 1 computing
+w 1 1 1 conference
+w 1 1 1 conferences
+w 1 1 1 configs
+w 1 1 1 console
+w 1 1 1 consumer
+w 1 1 1 contact
+w 1 1 1 contacts
+w 1 1 1 content
+w 1 1 1 contents
+w 1 1 1 contract
+w 1 1 1 contracts
+w 1 1 1 control
+w 1 1 1 controlpanel
+w 1 1 1 cookie
+w 1 1 1 cookies
+w 1 1 1 copies
+w 1 1 1 copy
+w 1 1 1 copyright
+w 1 1 1 core
+w 1 1 1 corp
+w 1 1 1 corpo
+w 1 1 1 corporate
+w 1 1 1 corrections
+w 1 1 1 count
+w 1 1 1 counter
+w 1 1 1 counters
+w 1 1 1 counts
+w 1 1 1 course
+w 1 1 1 courses
+w 1 1 1 cover
+w 1 1 1 cpanel
+w 1 1 1 cr
+w 1 1 1 crack
+w 1 1 1 crash
+w 1 1 1 crashes
+w 1 1 1 create
+w 1 1 1 credits
+w 1 1 1 crm
+w 1 1 1 cron
+w 1 1 1 crons
+w 1 1 1 crontab
+w 1 1 1 crontabs
+w 1 1 1 crossdomain
+w 1 1 1 crypt
+w 1 1 1 crypto
+w 1 1 1 css
+w 1 1 1 current
+w 1 1 1 custom
+w 1 1 1 custom-log
+w 1 1 1 custom_log
+w 1 1 1 customer
+w 1 1 1 customers
+w 1 1 1 cv
+w 1 1 1 cxf
+w 1 1 1 czcmdcvt
+w 1 1 1 d
+w 1 1 1 daemon
+w 1 1 1 daily
+w 1 1 1 dana-na
+w 1 1 1 data
+w 1 1 1 database
+w 1 1 1 databases
+w 1 1 1 date
+w 1 1 1 dba
+w 1 1 1 dbase
+w 1 1 1 dbman
+w 1 1 1 dc
+w 1 1 1 dcforum
+w 1 1 1 de
+w 1 1 1 dealer
+w 1 1 1 debug
+w 1 1 1 decl
+w 1 1 1 declaration
+w 1 1 1 declarations
+w 1 1 1 decode
+w 1 1 1 decrypt
+w 1 1 1 def
+w 1 1 1 default
+w 1 1 1 defaults
+w 1 1 1 definition
+w 1 1 1 definitions
+w 1 1 1 del
+w 1 1 1 delete
+w 1 1 1 deleted
+w 1 1 1 demo
+w 1 1 1 demos
+w 1 1 1 denied
+w 1 1 1 deny
+w 1 1 1 design
+w 1 1 1 desktop
+w 1 1 1 desktops
+w 1 1 1 detail
+w 1 1 1 details
+w 1 1 1 dev
+w 1 1 1 devel
+w 1 1 1 developer
+w 1 1 1 developers
+w 1 1 1 development
+w 1 1 1 device
+w 1 1 1 devices
+w 1 1 1 devs
+w 1 1 1 df
+w 1 1 1 dialog
+w 1 1 1 dialogs
+w 1 1 1 diff
+w 1 1 1 diffs
+w 1 1 1 digest
+w 1 1 1 digg
+w 1 1 1 dir
+w 1 1 1 directories
+w 1 1 1 directory
+w 1 1 1 dirs
+w 1 1 1 disabled
+w 1 1 1 disclaimer
+w 1 1 1 display
+w 1 1 1 django
+w 1 1 1 dl
+w 1 1 1 dm
+w 1 1 1 dm-config
+w 1 1 1 dms
+w 1 1 1 dms0
+w 1 1 1 dns
+w 1 1 1 dock
+w 1 1 1 docroot
+w 1 1 1 docs
+w 1 1 1 document
+w 1 1 1 documentation
+w 1 1 1 documents
+w 1 1 1 domain
+w 1 1 1 domains
+w 1 1 1 down
+w 1 1 1 download
+w 1 1 1 downloads
+w 1 1 1 drop
+w 1 1 1 dropped
+w 1 1 1 drupal
+w 1 1 1 dummy
+w 1 1 1 dumps
+w 1 1 1 dvd
+w 1 1 1 dwr
+w 1 1 1 dynamic
+w 1 1 1 e
+w 1 1 1 e2fs
+w 1 1 1 ear
+w 1 1 1 ecommerce
+w 1 1 1 edge
+w 1 1 1 edit
+w 1 1 1 editor
+w 1 1 1 edits
+w 1 1 1 edu
+w 1 1 1 education
+w 1 1 1 ee
+w 1 1 1 effort
+w 1 1 1 efforts
+w 1 1 1 egress
+w 1 1 1 ejb
+w 1 1 1 element
+w 1 1 1 elements
+w 1 1 1 em
+w 1 1 1 email
+w 1 1 1 emails
+w 1 1 1 emea
+w 1 1 1 employees
+w 1 1 1 employment
+w 1 1 1 empty
+w 1 1 1 emu
+w 1 1 1 emulator
+w 1 1 1 en
+w 1 1 1 en_US
+w 1 1 1 encode
+w 1 1 1 encrypt
+w 1 1 1 eng
+w 1 1 1 engine
+w 1 1 1 english
+w 1 1 1 enterprise
+w 1 1 1 entertainment
+w 1 1 1 entries
+w 1 1 1 entry
+w 1 1 1 env
+w 1 1 1 environ
+w 1 1 1 environment
+w 1 1 1 error
+w 1 1 1 error-log
+w 1 1 1 error_log
+w 1 1 1 errors
+w 1 1 1 es
+w 1 1 1 esale
+w 1 1 1 esales
+w 1 1 1 etc
+w 1 1 1 europe
+w 1 1 1 event
+w 1 1 1 events
+w 1 1 1 evil
+w 1 1 1 evt
+w 1 1 1 ews
+w 1 1 1 ex
+w 1 1 1 example
+w 1 1 1 examples
+w 1 1 1 excalibur
+w 1 1 1 exchange
+w 1 1 1 exec
+w 1 1 1 export
+w 1 1 1 ext
+w 1 1 1 ext2
+w 1 1 1 extern
+w 1 1 1 external
+w 1 1 1 ezshopper
+w 1 1 1 f
+w 1 1 1 face
+w 1 1 1 faces
+w 1 1 1 faculty
+w 1 1 1 fail
+w 1 1 1 failure
+w 1 1 1 family
+w 1 1 1 faq
+w 1 1 1 faqs
+w 1 1 1 fcgi-bin
+w 1 1 1 feature
+w 1 1 1 features
+w 1 1 1 feed
+w 1 1 1 feedback
+w 1 1 1 feeds
+w 1 1 1 felix
+w 1 1 1 field
+w 1 1 1 fields
+w 1 1 1 file
+w 1 1 1 fileadmin
+w 1 1 1 files
+w 1 1 1 filez
+w 1 1 1 finance
+w 1 1 1 financial
+w 1 1 1 find
+w 1 1 1 finger
+w 1 1 1 firewall
+w 1 1 1 fixed
+w 1 1 1 flags
+w 1 1 1 flash
+w 1 1 1 flow
+w 1 1 1 flows
+w 1 1 1 flv
+w 1 1 1 fn
+w 1 1 1 folder
+w 1 1 1 folders
+w 1 1 1 font
+w 1 1 1 fonts
+w 1 1 1 foo
+w 1 1 1 footer
+w 1 1 1 footers
+w 1 1 1 form
+w 1 1 1 formatting
+w 1 1 1 formmail
+w 1 1 1 forms
+w 1 1 1 forrest
+w 1 1 1 fortune
+w 1 1 1 forum
+w 1 1 1 forum1
+w 1 1 1 forum2
+w 1 1 1 forumdisplay
+w 1 1 1 forums
+w 1 1 1 forward
+w 1 1 1 foto
+w 1 1 1 foundation
+w 1 1 1 fr
+w 1 1 1 frame
+w 1 1 1 frames
+w 1 1 1 framework
+w 1 1 1 free
+w 1 1 1 freebsd
+w 1 1 1 friend
+w 1 1 1 friends
+w 1 1 1 frob
+w 1 1 1 frontend
+w 1 1 1 fs
+w 1 1 1 ftp
+w 1 1 1 fuck
+w 1 1 1 fuckoff
+w 1 1 1 fuckyou
+w 1 1 1 full
+w 1 1 1 fun
+w 1 1 1 func
+w 1 1 1 funcs
+w 1 1 1 function
+w 1 1 1 functions
+w 1 1 1 fusion
+w 1 1 1 fw
+w 1 1 1 g
+w 1 1 1 galleries
+w 1 1 1 gallery
+w 1 1 1 game
+w 1 1 1 games
+w 1 1 1 ganglia
+w 1 1 1 garbage
+w 1 1 1 gateway
+w 1 1 1 gb
+w 1 1 1 geeklog
+w 1 1 1 general
+w 1 1 1 geronimo
+w 1 1 1 get
+w 1 1 1 getaccess
+w 1 1 1 getjobid
+w 1 1 1 gfx
+w 1 1 1 gitweb
+w 1 1 1 glimpse
+w 1 1 1 global
+w 1 1 1 globals
+w 1 1 1 glossary
+w 1 1 1 go
+w 1 1 1 goaway
+w 1 1 1 google
+w 1 1 1 government
+w 1 1 1 gprs
+w 1 1 1 grant
+w 1 1 1 grants
+w 1 1 1 graphics
+w 1 1 1 group
+w 1 1 1 groupcp
+w 1 1 1 groups
+w 1 1 1 gsm
+w 1 1 1 guest
+w 1 1 1 guestbook
+w 1 1 1 guests
+w 1 1 1 guide
+w 1 1 1 guides
+w 1 1 1 gump
+w 1 1 1 gwt
+w 1 1 1 h
+w 1 1 1 hack
+w 1 1 1 hacker
+w 1 1 1 hacking
+w 1 1 1 hackme
+w 1 1 1 hadoop
+w 1 1 1 hardcore
+w 1 1 1 hardware
+w 1 1 1 harmony
+w 1 1 1 head
+w 1 1 1 header
+w 1 1 1 headers
+w 1 1 1 health
+w 1 1 1 hello
+w 1 1 1 help
+w 1 1 1 helper
+w 1 1 1 helpers
+w 1 1 1 hi
+w 1 1 1 hidden
+w 1 1 1 hide
+w 1 1 1 high
+w 1 1 1 hipaa
+w 1 1 1 history
+w 1 1 1 hit
+w 1 1 1 hits
+w 1 1 1 hole
+w 1 1 1 home
+w 1 1 1 homepage
+w 1 1 1 hop
+w 1 1 1 horde
+w 1 1 1 hosting
+w 1 1 1 hosts
+w 1 1 1 howto
+w 1 1 1 hp
+w 1 1 1 hr
+w 1 1 1 hta
+w 1 1 1 htbin
+w 1 1 1 htdoc
+w 1 1 1 htdocs
+w 1 1 1 htpasswd
+w 1 1 1 http
+w 1 1 1 httpd
+w 1 1 1 https
+w 1 1 1 httpuser
+w 1 1 1 hu
+w 1 1 1 hyper
+w 1 1 1 i
+w 1 1 1 ia
+w 1 1 1 ibm
+w 1 1 1 icat
+w 1 1 1 icon
+w 1 1 1 icons
+w 1 1 1 id
+w 1 1 1 idea
+w 1 1 1 ideas
+w 1 1 1 ids
+w 1 1 1 ie
+w 1 1 1 iframe
+w 1 1 1 ig
+w 1 1 1 ignore
+w 1 1 1 iisadmin
+w 1 1 1 iisadmpwd
+w 1 1 1 iissamples
+w 1 1 1 image
+w 1 1 1 imagefolio
+w 1 1 1 images
+w 1 1 1 img
+w 1 1 1 imgs
+w 1 1 1 imp
+w 1 1 1 import
+w 1 1 1 important
+w 1 1 1 in
+w 1 1 1 inbound
+w 1 1 1 incl
+w 1 1 1 include
+w 1 1 1 includes
+w 1 1 1 incoming
+w 1 1 1 incubator
+w 1 1 1 index
+w 1 1 1 index1
+w 1 1 1 index2
+w 1 1 1 index_1
+w 1 1 1 index_2
+w 1 1 1 inetpub
+w 1 1 1 inetsrv
+w 1 1 1 inf
+w 1 1 1 info
+w 1 1 1 information
+w 1 1 1 ingress
+w 1 1 1 init
+w 1 1 1 inline
+w 1 1 1 input
+w 1 1 1 inquire
+w 1 1 1 inquiries
+w 1 1 1 inquiry
+w 1 1 1 insert
+w 1 1 1 install
+w 1 1 1 int
+w 1 1 1 interim
+w 1 1 1 intermediate
+w 1 1 1 internal
+w 1 1 1 international
+w 1 1 1 internet
+w 1 1 1 intl
+w 1 1 1 intranet
+w 1 1 1 intro
+w 1 1 1 ip
+w 1 1 1 ipc
+w 1 1 1 ips
+w 1 1 1 irc
+w 1 1 1 is
+w 1 1 1 isapi
+w 1 1 1 iso
+w 1 1 1 issues
+w 1 1 1 it
+w 1 1 1 item
+w 1 1 1 j
+w 1 1 1 j2ee
+w 1 1 1 j2me
+w 1 1 1 jakarta
+w 1 1 1 java-plugin
+w 1 1 1 javadoc
+w 1 1 1 javascript
+w 1 1 1 javax
+w 1 1 1 jboss
+w 1 1 1 jdbc
+w 1 1 1 jigsaw
+w 1 1 1 jj
+w 1 1 1 jmx-console
+w 1 1 1 job
+w 1 1 1 jobs
+w 1 1 1 joe
+w 1 1 1 john
+w 1 1 1 join
+w 1 1 1 joomla
+w 1 1 1 journal
+w 1 1 1 jp
+w 1 1 1 jpa
+w 1 1 1 jre
+w 1 1 1 jrun
+w 1 1 1 json
+w 1 1 1 jsso
+w 1 1 1 jsx
+w 1 1 1 juniper
+w 1 1 1 junk
+w 1 1 1 jvm
+w 1 1 1 k
+w 1 1 1 kboard
+w 1 1 1 keep
+w 1 1 1 kernel
+w 1 1 1 keygen
+w 1 1 1 keys
+w 1 1 1 kids
+w 1 1 1 kill
+w 1 1 1 known_hosts
+w 1 1 1 l
+w 1 1 1 labs
+w 1 1 1 lang
+w 1 1 1 large
+w 1 1 1 law
+w 1 1 1 layout
+w 1 1 1 layouts
+w 1 1 1 ldap
+w 1 1 1 leader
+w 1 1 1 leaders
+w 1 1 1 left
+w 1 1 1 legacy
+w 1 1 1 legal
+w 1 1 1 lenya
+w 1 1 1 letters
+w 1 1 1 level
+w 1 1 1 lg
+w 1 1 1 library
+w 1 1 1 libs
+w 1 1 1 license
+w 1 1 1 licenses
+w 1 1 1 line
+w 1 1 1 link
+w 1 1 1 links
+w 1 1 1 linux
+w 1 1 1 list
+w 1 1 1 listinfo
+w 1 1 1 lists
+w 1 1 1 live
+w 1 1 1 lo
+w 1 1 1 loader
+w 1 1 1 loading
+w 1 1 1 loc
+w 1 1 1 local
+w 1 1 1 location
+w 1 1 1 lock
+w 1 1 1 locked
+w 1 1 1 log4j
+w 1 1 1 logfile
+w 1 1 1 logging
+w 1 1 1 login
+w 1 1 1 logins
+w 1 1 1 logo
+w 1 1 1 logoff
+w 1 1 1 logon
+w 1 1 1 logos
+w 1 1 1 logout
+w 1 1 1 logs
+w 1 1 1 lost+found
+w 1 1 1 low
+w 1 1 1 ls
+w 1 1 1 lucene
+w 1 1 1 m
+w 1 1 1 mac
+w 1 1 1 mail
+w 1 1 1 mailer
+w 1 1 1 mailing
+w 1 1 1 mailman
+w 1 1 1 mails
+w 1 1 1 main
+w 1 1 1 manage
+w 1 1 1 management
+w 1 1 1 manager
+w 1 1 1 manual
+w 1 1 1 manuals
+w 1 1 1 map
+w 1 1 1 maps
+w 1 1 1 mark
+w 1 1 1 marketing
+w 1 1 1 master
+w 1 1 1 master.passwd
+w 1 1 1 match
+w 1 1 1 matrix
+w 1 1 1 maven
+w 1 1 1 mbox
+w 1 1 1 me
+w 1 1 1 media
+w 1 1 1 medium
+w 1 1 1 mem
+w 1 1 1 member
+w 1 1 1 members
+w 1 1 1 membership
+w 1 1 1 memory
+w 1 1 1 menu
+w 1 1 1 messaging
+w 1 1 1 microsoft
+w 1 1 1 migrate
+w 1 1 1 migration
+w 1 1 1 mina
+w 1 1 1 mirror
+w 1 1 1 mirrors
+w 1 1 1 misc
+w 1 1 1 mission
+w 1 1 1 mix
+w 1 1 1 mms
+w 1 1 1 mobi
+w 1 1 1 mobile
+w 1 1 1 mock
+w 1 1 1 mod
+w 1 1 1 modify
+w 1 1 1 mods
+w 1 1 1 module
+w 1 1 1 modules
+w 1 1 1 mojo
+w 1 1 1 money
+w 1 1 1 monitoring
+w 1 1 1 more
+w 1 1 1 move
+w 1 1 1 movie
+w 1 1 1 movies
+w 1 1 1 mp
+w 1 1 1 mp3
+w 1 1 1 mp3s
+w 1 1 1 ms
+w 1 1 1 ms-sql
+w 1 1 1 msadc
+w 1 1 1 msadm
+w 1 1 1 msie
+w 1 1 1 msql
+w 1 1 1 mssql
+w 1 1 1 mta
+w 1 1 1 multimedia
+w 1 1 1 music
+w 1 1 1 mx
+w 1 1 1 my
+w 1 1 1 myfaces
+w 1 1 1 myphpnuke
+w 1 1 1 mysql
+w 1 1 1 mysqld
+w 1 1 1 n
+w 1 1 1 nav
+w 1 1 1 navigation
+w 1 1 1 net
+w 1 1 1 netbsd
+w 1 1 1 nethome
+w 1 1 1 nets
+w 1 1 1 network
+w 1 1 1 networking
+w 1 1 1 new
+w 1 1 1 news
+w 1 1 1 newsletter
+w 1 1 1 newsletters
+w 1 1 1 next
+w 1 1 1 nfs
+w 1 1 1 nice
+w 1 1 1 nl
+w 1 1 1 nobody
+w 1 1 1 node
+w 1 1 1 none
+w 1 1 1 note
+w 1 1 1 notes
+w 1 1 1 notification
+w 1 1 1 notifications
+w 1 1 1 notified
+w 1 1 1 notify
+w 1 1 1 ns
+w 1 1 1 nuke
+w 1 1 1 nul
+w 1 1 1 null
+w 1 1 1 oa_servlets
+w 1 1 1 oauth
+w 1 1 1 obdc
+w 1 1 1 obsolete
+w 1 1 1 obsoleted
+w 1 1 1 odbc
+w 1 1 1 ode
+w 1 1 1 oem
+w 1 1 1 ofbiz
+w 1 1 1 office
+w 1 1 1 onbound
+w 1 1 1 online
+w 1 1 1 op
+w 1 1 1 open
+w 1 1 1 openbsd
+w 1 1 1 opendir
+w 1 1 1 openejb
+w 1 1 1 openjpa
+w 1 1 1 operations
+w 1 1 1 opinion
+w 1 1 1 oprocmgr-status
+w 1 1 1 opt
+w 1 1 1 option
+w 1 1 1 options
+w 1 1 1 oracle
+w 1 1 1 oracle.xml.xsql.XSQLServlet
+w 1 1 1 order
+w 1 1 1 ordered
+w 1 1 1 orders
+w 1 1 1 org
+w 1 1 1 osc
+w 1 1 1 oscommerce
+w 1 1 1 other
+w 1 1 1 outgoing
+w 1 1 1 outline
+w 1 1 1 output
+w 1 1 1 outreach
+w 1 1 1 overview
+w 1 1 1 owa
+w 1 1 1 ows
+w 1 1 1 ows-bin
+w 1 1 1 p
+w 1 1 1 p2p
+w 1 1 1 pack
+w 1 1 1 packages
+w 1 1 1 page
+w 1 1 1 page1
+w 1 1 1 page2
+w 1 1 1 page_1
+w 1 1 1 page_2
+w 1 1 1 pages
+w 1 1 1 paid
+w 1 1 1 panel
+w 1 1 1 paper
+w 1 1 1 papers
+w 1 1 1 parse
+w 1 1 1 partner
+w 1 1 1 partners
+w 1 1 1 party
+w 1 1 1 pass
+w 1 1 1 passwd
+w 1 1 1 password
+w 1 1 1 passwords
+w 1 1 1 past
+w 1 1 1 patch
+w 1 1 1 patches
+w 1 1 1 paypal
+w 1 1 1 pc
+w 1 1 1 pci
+w 1 1 1 pda
+w 1 1 1 pdfs
+w 1 1 1 peek
+w 1 1 1 pending
+w 1 1 1 people
+w 1 1 1 perf
+w 1 1 1 performance
+w 1 1 1 perl
+w 1 1 1 personal
+w 1 1 1 pg
+w 1 1 1 phf
+w 1 1 1 phone
+w 1 1 1 phones
+w 1 1 1 phorum
+w 1 1 1 photo
+w 1 1 1 photos
+w 1 1 1 phpBB
+w 1 1 1 phpBB2
+w 1 1 1 phpEventCalendar
+w 1 1 1 phpMyAdmin
+w 1 1 1 phpbb
+w 1 1 1 phpmyadmin
+w 1 1 1 phpnuke
+w 1 1 1 phps
+w 1 1 1 pic
+w 1 1 1 pics
+w 1 1 1 pictures
+w 1 1 1 pii
+w 1 1 1 ping
+w 1 1 1 pipermail
+w 1 1 1 piranha
+w 1 1 1 pix
+w 1 1 1 pixel
+w 1 1 1 pkg
+w 1 1 1 pkgs
+w 1 1 1 plain
+w 1 1 1 play
+w 1 1 1 pls
+w 1 1 1 plugin
+w 1 1 1 plugins
+w 1 1 1 pm
+w 1 1 1 poi
+w 1 1 1 policies
+w 1 1 1 policy
+w 1 1 1 politics
+w 1 1 1 poll
+w 1 1 1 polls
+w 1 1 1 pop
+w 1 1 1 pop3
+w 1 1 1 porn
+w 1 1 1 port
+w 1 1 1 portal
+w 1 1 1 portals
+w 1 1 1 portfolio
+w 1 1 1 pos
+w 1 1 1 post
+w 1 1 1 posted
+w 1 1 1 postgres
+w 1 1 1 postgresql
+w 1 1 1 postnuke
+w 1 1 1 postpaid
+w 1 1 1 posts
+w 1 1 1 pr
+w 1 1 1 pr0n
+w 1 1 1 premium
+w 1 1 1 prepaid
+w 1 1 1 presentation
+w 1 1 1 presentations
+w 1 1 1 preserve
+w 1 1 1 press
+w 1 1 1 preview
+w 1 1 1 previews
+w 1 1 1 previous
+w 1 1 1 pricing
+w 1 1 1 print
+w 1 1 1 printenv
+w 1 1 1 printer
+w 1 1 1 printers
+w 1 1 1 priv
+w 1 1 1 privacy
+w 1 1 1 private
+w 1 1 1 problems
+w 1 1 1 proc
+w 1 1 1 procedures
+w 1 1 1 prod
+w 1 1 1 product
+w 1 1 1 product_info
+w 1 1 1 production
+w 1 1 1 products
+w 1 1 1 profile
+w 1 1 1 profiles
+w 1 1 1 profiling
+w 1 1 1 program
+w 1 1 1 programming
+w 1 1 1 programs
+w 1 1 1 project
+w 1 1 1 projects
+w 1 1 1 promo
+w 1 1 1 prop
+w 1 1 1 properties
+w 1 1 1 property
+w 1 1 1 props
+w 1 1 1 protect
+w 1 1 1 proto
+w 1 1 1 proxies
+w 1 1 1 proxy
+w 1 1 1 prv
+w 1 1 1 ps
+w 1 1 1 psql
+w 1 1 1 pt
+w 1 1 1 pub
+w 1 1 1 public
+w 1 1 1 publication
+w 1 1 1 publications
+w 1 1 1 pubs
+w 1 1 1 pull
+w 1 1 1 purchase
+w 1 1 1 purchases
+w 1 1 1 purchasing
+w 1 1 1 push
+w 1 1 1 pw
+w 1 1 1 pwd
+w 1 1 1 python
+w 1 1 1 q
+w 1 1 1 qpid
+w 1 1 1 queries
+w 1 1 1 query
+w 1 1 1 queue
+w 1 1 1 queues
+w 1 1 1 quote
+w 1 1 1 quotes
+w 1 1 1 r
+w 1 1 1 radio
+w 1 1 1 random
+w 1 1 1 rdf
+w 1 1 1 read
+w 1 1 1 readme
+w 1 1 1 realestate
+w 1 1 1 receive
+w 1 1 1 received
+w 1 1 1 recharge
+w 1 1 1 record
+w 1 1 1 records
+w 1 1 1 recovery
+w 1 1 1 recycle
+w 1 1 1 recycled
+w 1 1 1 redir
+w 1 1 1 redirect
+w 1 1 1 reference
+w 1 1 1 reg
+w 1 1 1 register
+w 1 1 1 registered
+w 1 1 1 registration
+w 1 1 1 registrations
+w 1 1 1 release
+w 1 1 1 releases
+w 1 1 1 remote
+w 1 1 1 remove
+w 1 1 1 removed
+w 1 1 1 render
+w 1 1 1 rendered
+w 1 1 1 rep
+w 1 1 1 repl
+w 1 1 1 replica
+w 1 1 1 replicas
+w 1 1 1 replicate
+w 1 1 1 replicated
+w 1 1 1 replication
+w 1 1 1 replicator
+w 1 1 1 reply
+w 1 1 1 report
+w 1 1 1 reporting
+w 1 1 1 reports
+w 1 1 1 reprints
+w 1 1 1 req
+w 1 1 1 reqs
+w 1 1 1 request
+w 1 1 1 requests
+w 1 1 1 requisition
+w 1 1 1 requisitions
+w 1 1 1 res
+w 1 1 1 research
+w 1 1 1 resin
+w 1 1 1 resource
+w 1 1 1 resources
+w 1 1 1 rest
+w 1 1 1 restore
+w 1 1 1 restored
+w 1 1 1 restricted
+w 1 1 1 results
+w 1 1 1 retail
+w 1 1 1 reverse
+w 1 1 1 reversed
+w 1 1 1 revert
+w 1 1 1 reverted
+w 1 1 1 review
+w 1 1 1 reviews
+w 1 1 1 right
+w 1 1 1 roam
+w 1 1 1 roaming
+w 1 1 1 robot
+w 1 1 1 robots
+w 1 1 1 roller
+w 1 1 1 room
+w 1 1 1 root
+w 1 1 1 rpc
+w 1 1 1 ru
+w 1 1 1 rule
+w 1 1 1 rules
+w 1 1 1 run
+w 1 1 1 rwservlet
+w 1 1 1 s
+w 1 1 1 sale
+w 1 1 1 sales
+w 1 1 1 sam
+w 1 1 1 samba
+w 1 1 1 sample
+w 1 1 1 samples
+w 1 1 1 sav
+w 1 1 1 saved
+w 1 1 1 saves
+w 1 1 1 sbin
+w 1 1 1 scan
+w 1 1 1 scanned
+w 1 1 1 scans
+w 1 1 1 sched
+w 1 1 1 schedule
+w 1 1 1 scheduled
+w 1 1 1 scheduling
+w 1 1 1 schema
+w 1 1 1 science
+w 1 1 1 screen
+w 1 1 1 screens
+w 1 1 1 screenshot
+w 1 1 1 screenshots
+w 1 1 1 script
+w 1 1 1 scriptlet
+w 1 1 1 scriptlets
+w 1 1 1 scripts
+w 1 1 1 sdk
+w 1 1 1 se
+w 1 1 1 search
+w 1 1 1 sec
+w 1 1 1 secret
+w 1 1 1 section
+w 1 1 1 sections
+w 1 1 1 secure
+w 1 1 1 secured
+w 1 1 1 security
+w 1 1 1 seed
+w 1 1 1 sell
+w 1 1 1 send
+w 1 1 1 sendmail
+w 1 1 1 sendto
+w 1 1 1 sent
+w 1 1 1 serial
+w 1 1 1 serv
+w 1 1 1 serve
+w 1 1 1 server
+w 1 1 1 server-info
+w 1 1 1 server-status
+w 1 1 1 servers
+w 1 1 1 service
+w 1 1 1 services
+w 1 1 1 servlet
+w 1 1 1 servlets
+w 1 1 1 session
+w 1 1 1 sessions
+w 1 1 1 setting
+w 1 1 1 settings
+w 1 1 1 setup
+w 1 1 1 share
+w 1 1 1 shared
+w 1 1 1 shares
+w 1 1 1 shell
+w 1 1 1 ship
+w 1 1 1 shipped
+w 1 1 1 shipping
+w 1 1 1 shop
+w 1 1 1 shopper
+w 1 1 1 shopping
+w 1 1 1 shops
+w 1 1 1 shoutbox
+w 1 1 1 show
+w 1 1 1 show_post
+w 1 1 1 show_thread
+w 1 1 1 showcat
+w 1 1 1 showenv
+w 1 1 1 showjobs
+w 1 1 1 showmap
+w 1 1 1 showmsg
+w 1 1 1 showpost
+w 1 1 1 showthread
+w 1 1 1 sign
+w 1 1 1 signoff
+w 1 1 1 signon
+w 1 1 1 signup
+w 1 1 1 simple
+w 1 1 1 sink
+w 1 1 1 site
+w 1 1 1 site-map
+w 1 1 1 site_map
+w 1 1 1 sitemap
+w 1 1 1 sites
+w 1 1 1 skel
+w 1 1 1 skin
+w 1 1 1 skins
+w 1 1 1 skip
+w 1 1 1 sl
+w 1 1 1 sling
+w 1 1 1 sm
+w 1 1 1 small
+w 1 1 1 sms
+w 1 1 1 smtp
+w 1 1 1 snoop
+w 1 1 1 soap
+w 1 1 1 soaprouter
+w 1 1 1 soft
+w 1 1 1 software
+w 1 1 1 solaris
+w 1 1 1 sold
+w 1 1 1 solution
+w 1 1 1 solutions
+w 1 1 1 source
+w 1 1 1 sources
+w 1 1 1 soutbox
+w 1 1 1 sox
+w 1 1 1 sp
+w 1 1 1 space
+w 1 1 1 spacer
+w 1 1 1 spam
+w 1 1 1 special
+w 1 1 1 specials
+w 1 1 1 sponsor
+w 1 1 1 sponsors
+w 1 1 1 spool
+w 1 1 1 sport
+w 1 1 1 sports
+w 1 1 1 sqlnet
+w 1 1 1 squirrel
+w 1 1 1 squirrelmail
+w 1 1 1 src
+w 1 1 1 srv
+w 1 1 1 ss
+w 1 1 1 ssh
+w 1 1 1 ssi
+w 1 1 1 ssl
+w 1 1 1 sslvpn
+w 1 1 1 ssn
+w 1 1 1 sso
+w 1 1 1 staff
+w 1 1 1 staging
+w 1 1 1 standard
+w 1 1 1 standards
+w 1 1 1 star
+w 1 1 1 start
+w 1 1 1 stat
+w 1 1 1 statement
+w 1 1 1 statements
+w 1 1 1 static
+w 1 1 1 staticpages
+w 1 1 1 statistic
+w 1 1 1 statistics
+w 1 1 1 stats
+w 1 1 1 status
+w 1 1 1 stock
+w 1 1 1 storage
+w 1 1 1 store
+w 1 1 1 stored
+w 1 1 1 stories
+w 1 1 1 story
+w 1 1 1 strut
+w 1 1 1 struts
+w 1 1 1 student
+w 1 1 1 students
+w 1 1 1 stuff
+w 1 1 1 style
+w 1 1 1 styles
+w 1 1 1 submissions
+w 1 1 1 submit
+w 1 1 1 subscribe
+w 1 1 1 subscriber
+w 1 1 1 subscribers
+w 1 1 1 subscription
+w 1 1 1 subscriptions
+w 1 1 1 success
+w 1 1 1 suite
+w 1 1 1 suites
+w 1 1 1 sun
+w 1 1 1 sunos
+w 1 1 1 super
+w 1 1 1 support
+w 1 1 1 surf
+w 1 1 1 survey
+w 1 1 1 surveys
+w 1 1 1 sws
+w 1 1 1 synapse
+w 1 1 1 sync
+w 1 1 1 synced
+w 1 1 1 sys
+w 1 1 1 system
+w 1 1 1 systems
+w 1 1 1 sysuser
+w 1 1 1 t
+w 1 1 1 tag
+w 1 1 1 tags
+w 1 1 1 tape
+w 1 1 1 tapes
+w 1 1 1 tapestry
+w 1 1 1 tb
+w 1 1 1 tcl
+w 1 1 1 team
+w 1 1 1 tech
+w 1 1 1 technical
+w 1 1 1 technology
+w 1 1 1 tel
+w 1 1 1 tele
+w 1 1 1 templ
+w 1 1 1 template
+w 1 1 1 templates
+w 1 1 1 terms
+w 1 1 1 test-cgi
+w 1 1 1 test-env
+w 1 1 1 test1
+w 1 1 1 test123
+w 1 1 1 test1234
+w 1 1 1 test2
+w 1 1 1 test3
+w 1 1 1 testimonial
+w 1 1 1 testimonials
+w 1 1 1 testing
+w 1 1 1 tests
+w 1 1 1 texis
+w 1 1 1 text
+w 1 1 1 texts
+w 1 1 1 theme
+w 1 1 1 themes
+w 1 1 1 thread
+w 1 1 1 threads
+w 1 1 1 thumb
+w 1 1 1 thumbnail
+w 1 1 1 thumbnails
+w 1 1 1 thumbs
+w 1 1 1 tickets
+w 1 1 1 tiki
+w 1 1 1 tiles
+w 1 1 1 tip
+w 1 1 1 tips
+w 1 1 1 title
+w 1 1 1 tls
+w 1 1 1 tmpl
+w 1 1 1 tmps
+w 1 1 1 tn
+w 1 1 1 toc
+w 1 1 1 todo
+w 1 1 1 toggle
+w 1 1 1 tomcat
+w 1 1 1 tool
+w 1 1 1 toolbar
+w 1 1 1 toolkit
+w 1 1 1 tools
+w 1 1 1 top
+w 1 1 1 topic
+w 1 1 1 topics
+w 1 1 1 torrent
+w 1 1 1 torrents
+w 1 1 1 tos
+w 1 1 1 tour
+w 1 1 1 tpl
+w 1 1 1 tpv
+w 1 1 1 tr
+w 1 1 1 traceroute
+w 1 1 1 traces
+w 1 1 1 track
+w 1 1 1 trackback
+w 1 1 1 tracker
+w 1 1 1 trackers
+w 1 1 1 tracking
+w 1 1 1 tracks
+w 1 1 1 traffic
+w 1 1 1 trailer
+w 1 1 1 trailers
+w 1 1 1 training
+w 1 1 1 trans
+w 1 1 1 transparent
+w 1 1 1 transport
+w 1 1 1 trash
+w 1 1 1 travel
+w 1 1 1 treasury
+w 1 1 1 tree
+w 1 1 1 trees
+w 1 1 1 trial
+w 1 1 1 trunk
+w 1 1 1 tsweb
+w 1 1 1 tt
+w 1 1 1 turbine
+w 1 1 1 tuscany
+w 1 1 1 tutorial
+w 1 1 1 tutorials
+w 1 1 1 tv
+w 1 1 1 tweak
+w 1 1 1 type
+w 1 1 1 typo3
+w 1 1 1 typo3conf
+w 1 1 1 u
+w 1 1 1 ubb
+w 1 1 1 uds
+w 1 1 1 uk
+w 1 1 1 umts
+w 1 1 1 union
+w 1 1 1 unix
+w 1 1 1 unlock
+w 1 1 1 unreg
+w 1 1 1 unregister
+w 1 1 1 up
+w 1 1 1 upd
+w 1 1 1 update
+w 1 1 1 updated
+w 1 1 1 updater
+w 1 1 1 updates
+w 1 1 1 upload
+w 1 1 1 uploads
+w 1 1 1 url
+w 1 1 1 us
+w 1 1 1 usa
+w 1 1 1 usage
+w 1 1 1 user
+w 1 1 1 userlog
+w 1 1 1 users
+w 1 1 1 usr
+w 1 1 1 util
+w 1 1 1 utilities
+w 1 1 1 utility
+w 1 1 1 utils
+w 1 1 1 v
+w 1 1 1 v1
+w 1 1 1 v2
+w 1 1 1 var
+w 1 1 1 vault
+w 1 1 1 vector
+w 1 1 1 velocity
+w 1 1 1 vendor
+w 1 1 1 ver
+w 1 1 1 ver1
+w 1 1 1 ver2
+w 1 1 1 version
+w 1 1 1 vfs
+w 1 1 1 video
+w 1 1 1 videos
+w 1 1 1 view
+w 1 1 1 view-source
+w 1 1 1 viewcvs
+w 1 1 1 viewforum
+w 1 1 1 viewonline
+w 1 1 1 views
+w 1 1 1 viewsource
+w 1 1 1 viewsvn
+w 1 1 1 viewtopic
+w 1 1 1 viewvc
+w 1 1 1 virtual
+w 1 1 1 vm
+w 1 1 1 voip
+w 1 1 1 vol
+w 1 1 1 vpn
+w 1 1 1 w
+w 1 1 1 w3
+w 1 1 1 w3c
+w 1 1 1 wa
+w 1 1 1 wap
+w 1 1 1 war
+w 1 1 1 warez
+w 1 1 1 way-board
+w 1 1 1 wbboard
+w 1 1 1 wc
+w 1 1 1 weather
+w 1 1 1 web
+w 1 1 1 web-beans
+w 1 1 1 web-console
+w 1 1 1 webaccess
+w 1 1 1 webadmin
+w 1 1 1 webagent
+w 1 1 1 webalizer
+w 1 1 1 webapp
+w 1 1 1 webb
+w 1 1 1 webbbs
+w 1 1 1 webboard
+w 1 1 1 webcalendar
+w 1 1 1 webcart
+w 1 1 1 webcasts
+w 1 1 1 webcgi
+w 1 1 1 webchat
+w 1 1 1 webdata
+w 1 1 1 webdav
+w 1 1 1 weblog
+w 1 1 1 weblogic
+w 1 1 1 weblogs
+w 1 1 1 webmail
+w 1 1 1 webplus
+w 1 1 1 webshop
+w 1 1 1 website
+w 1 1 1 websphere
+w 1 1 1 webstats
+w 1 1 1 websvn
+w 1 1 1 webwork
+w 1 1 1 welcome
+w 1 1 1 whitepapers
+w 1 1 1 whois
+w 1 1 1 whosonline
+w 1 1 1 wicket
+w 1 1 1 wiki
+w 1 1 1 win
+w 1 1 1 win32
+w 1 1 1 windows
+w 1 1 1 winnt
+w 1 1 1 wireless
+w 1 1 1 wml
+w 1 1 1 word
+w 1 1 1 wordpress
+w 1 1 1 work
+w 1 1 1 working
+w 1 1 1 world
+w 1 1 1 wp
+w 1 1 1 wp-content
+w 1 1 1 wp-includes
+w 1 1 1 wp-login
+w 1 1 1 wrap
+w 1 1 1 ws-client
+w 1 1 1 ws_ftp
+w 1 1 1 wtai
+w 1 1 1 www
+w 1 1 1 www-sql
+w 1 1 1 www1
+w 1 1 1 www2
+w 1 1 1 www3
+w 1 1 1 wwwboard
+w 1 1 1 wwwroot
+w 1 1 1 wwwstats
+w 1 1 1 wwwthreads
+w 1 1 1 wwwuser
+w 1 1 1 x
+w 1 1 1 xalan
+w 1 1 1 xerces
+w 1 1 1 xhtml
+w 1 1 1 xmlrpc
+w 1 1 1 xslt
+w 1 1 1 xsql
+w 1 1 1 xxx
+w 1 1 1 xyzzy
+w 1 1 1 y
+w 1 1 1 yahoo
+w 1 1 1 youtube
+w 1 1 1 yt
+w 1 1 1 z
+w 1 1 1 zboard
+w 1 1 1 zend
+w 1 1 1 zero
+w 1 1 1 zipfiles
+w 1 1 1 zips
+w 1 1 1 zope
+w 1 1 1 zorum
+w 1 1 1 ~admin
+w 1 1 1 ~apache
+w 1 1 1 ~bin
+w 1 1 1 ~bob
+w 1 1 1 ~ftp
+w 1 1 1 ~guest
+w 1 1 1 ~http
+w 1 1 1 ~httpd
+w 1 1 1 ~john
+w 1 1 1 ~log
+w 1 1 1 ~logs
+w 1 1 1 ~lp
+w 1 1 1 ~mark
+w 1 1 1 ~matt
+w 1 1 1 ~nobody
+w 1 1 1 ~root
+w 1 1 1 ~test
+w 1 1 1 ~tmp
+w 1 1 1 ~www
diff --git a/dictionaries/default.wl b/dictionaries/default.wl
new file mode 100644
index 0000000..f14f950
--- /dev/null
+++ b/dictionaries/default.wl
@@ -0,0 +1,1893 @@
+e 1 1 1 asmx
+e 1 1 1 asp
+e 1 1 1 aspx
+e 1 1 1 bak
+e 1 1 1 bat
+e 1 1 1 cc
+e 1 1 1 cfg
+e 1 1 1 cfm
+e 1 1 1 cgi
+e 1 1 1 class
+e 1 1 1 cnf
+e 1 1 1 conf
+e 1 1 1 config
+e 1 1 1 cpp
+e 1 1 1 csv
+e 1 1 1 dat
+e 1 1 1 db
+e 1 1 1 dll
+e 1 1 1 err
+e 1 1 1 error
+e 1 1 1 exe
+e 1 1 1 gz
+e 1 1 1 htm
+e 1 1 1 html
+e 1 1 1 inc
+e 1 1 1 ini
+e 1 1 1 java
+e 1 1 1 jhtml
+e 1 1 1 js
+e 1 1 1 jsf
+e 1 1 1 jsp
+e 1 1 1 key
+e 1 1 1 log
+e 1 1 1 mdb
+e 1 1 1 nsf
+e 1 1 1 old
+e 1 1 1 ora
+e 1 1 1 orig
+e 1 1 1 out
+e 1 1 1 part
+e 1 1 1 php
+e 1 1 1 php3
+e 1 1 1 pl
+e 1 1 1 pm
+e 1 1 1 py
+e 1 1 1 rss
+e 1 1 1 sh
+e 1 1 1 shtml
+e 1 1 1 sql
+e 1 1 1 stackdump
+e 1 1 1 tar.gz
+e 1 1 1 temp
+e 1 1 1 test
+e 1 1 1 tgz
+e 1 1 1 tmp
+e 1 1 1 txt
+e 1 1 1 vb
+e 1 1 1 vbs
+e 1 1 1 ws
+e 1 1 1 xls
+e 1 1 1 xml
+e 1 1 1 xsl
+e 1 1 1 zip
+w 1 1 1 .bash_history
+w 1 1 1 .bashrc
+w 1 1 1 .cvsignore
+w 1 1 1 .history
+w 1 1 1 .htaccess
+w 1 1 1 .htpasswd
+w 1 1 1 .passwd
+w 1 1 1 .perf
+w 1 1 1 .ssh
+w 1 1 1 .svn
+w 1 1 1 .web
+w 1 1 1 0
+w 1 1 1 00
+w 1 1 1 01
+w 1 1 1 02
+w 1 1 1 03
+w 1 1 1 04
+w 1 1 1 05
+w 1 1 1 06
+w 1 1 1 07
+w 1 1 1 08
+w 1 1 1 09
+w 1 1 1 1
+w 1 1 1 10
+w 1 1 1 100
+w 1 1 1 1000
+w 1 1 1 1001
+w 1 1 1 101
+w 1 1 1 11
+w 1 1 1 12
+w 1 1 1 13
+w 1 1 1 14
+w 1 1 1 15
+w 1 1 1 1990
+w 1 1 1 1991
+w 1 1 1 1992
+w 1 1 1 1993
+w 1 1 1 1994
+w 1 1 1 1995
+w 1 1 1 1996
+w 1 1 1 1997
+w 1 1 1 1998
+w 1 1 1 1999
+w 1 1 1 2
+w 1 1 1 20
+w 1 1 1 200
+w 1 1 1 2000
+w 1 1 1 2001
+w 1 1 1 2002
+w 1 1 1 2003
+w 1 1 1 2004
+w 1 1 1 2005
+w 1 1 1 2006
+w 1 1 1 2007
+w 1 1 1 2008
+w 1 1 1 2009
+w 1 1 1 2010
+w 1 1 1 2011
+w 1 1 1 2012
+w 1 1 1 21
+w 1 1 1 22
+w 1 1 1 23
+w 1 1 1 24
+w 1 1 1 25
+w 1 1 1 2g
+w 1 1 1 3
+w 1 1 1 300
+w 1 1 1 3g
+w 1 1 1 4
+w 1 1 1 42
+w 1 1 1 5
+w 1 1 1 50
+w 1 1 1 500
+w 1 1 1 51
+w 1 1 1 6
+w 1 1 1 7
+w 1 1 1 7z
+w 1 1 1 8
+w 1 1 1 9
+w 1 1 1 ADM
+w 1 1 1 ADMIN
+w 1 1 1 AggreSpy
+w 1 1 1 AppsLocalLogin
+w 1 1 1 AppsLogin
+w 1 1 1 BUILD
+w 1 1 1 CMS
+w 1 1 1 CVS
+w 1 1 1 DB
+w 1 1 1 DMSDump
+w 1 1 1 Documents and Settings
+w 1 1 1 Entries
+w 1 1 1 FCKeditor
+w 1 1 1 JMXSoapAdapter
+w 1 1 1 LICENSE
+w 1 1 1 MANIFEST.MF
+w 1 1 1 META-INF
+w 1 1 1 Makefile
+w 1 1 1 OA
+w 1 1 1 OAErrorDetailPage
+w 1 1 1 OA_HTML
+w 1 1 1 Program Files
+w 1 1 1 README
+w 1 1 1 Readme
+w 1 1 1 Recycled
+w 1 1 1 Root
+w 1 1 1 SQL
+w 1 1 1 SUNWmc
+w 1 1 1 SiteScope
+w 1 1 1 SiteServer
+w 1 1 1 Spy
+w 1 1 1 TEMP
+w 1 1 1 TMP
+w 1 1 1 TODO
+w 1 1 1 Thumbs.db
+w 1 1 1 WEB-INF
+w 1 1 1 WS_FTP
+w 1 1 1 XXX
+w 1 1 1 _
+w 1 1 1 _adm
+w 1 1 1 _admin
+w 1 1 1 _files
+w 1 1 1 _include
+w 1 1 1 _js
+w 1 1 1 _mem_bin
+w 1 1 1 _old
+w 1 1 1 _pages
+w 1 1 1 _private
+w 1 1 1 _res
+w 1 1 1 _source
+w 1 1 1 _src
+w 1 1 1 _test
+w 1 1 1 _vti_bin
+w 1 1 1 _vti_cnf
+w 1 1 1 _vti_pvt
+w 1 1 1 _vti_txt
+w 1 1 1 _www
+w 1 1 1 a
+w 1 1 1 aa
+w 1 1 1 aaa
+w 1 1 1 abc
+w 1 1 1 abc123
+w 1 1 1 abcd
+w 1 1 1 abcd1234
+w 1 1 1 about
+w 1 1 1 access
+w 1 1 1 access-log
+w 1 1 1 access-log.1
+w 1 1 1 access.1
+w 1 1 1 access_log
+w 1 1 1 access_log.1
+w 1 1 1 accessibility
+w 1 1 1 account
+w 1 1 1 accounting
+w 1 1 1 accounts
+w 1 1 1 action
+w 1 1 1 actions
+w 1 1 1 active
+w 1 1 1 activex
+w 1 1 1 ad
+w 1 1 1 adclick
+w 1 1 1 add
+w 1 1 1 addressbook
+w 1 1 1 adm
+w 1 1 1 admin
+w 1 1 1 admin_
+w 1 1 1 ads
+w 1 1 1 adv
+w 1 1 1 advertise
+w 1 1 1 advertising
+w 1 1 1 affiliate
+w 1 1 1 affiliates
+w 1 1 1 agenda
+w 1 1 1 agent
+w 1 1 1 agents
+w 1 1 1 ajax
+w 1 1 1 album
+w 1 1 1 albums
+w 1 1 1 alert
+w 1 1 1 alerts
+w 1 1 1 alias
+w 1 1 1 aliases
+w 1 1 1 all
+w 1 1 1 alpha
+w 1 1 1 alumni
+w 1 1 1 analog
+w 1 1 1 announcement
+w 1 1 1 announcements
+w 1 1 1 anon
+w 1 1 1 anonymous
+w 1 1 1 ansi
+w 1 1 1 apac
+w 1 1 1 apache
+w 1 1 1 apexec
+w 1 1 1 api
+w 1 1 1 apis
+w 1 1 1 app
+w 1 1 1 appeal
+w 1 1 1 appeals
+w 1 1 1 append
+w 1 1 1 appl
+w 1 1 1 apple
+w 1 1 1 appliation
+w 1 1 1 applications
+w 1 1 1 apps
+w 1 1 1 apr
+w 1 1 1 arch
+w 1 1 1 archive
+w 1 1 1 archives
+w 1 1 1 array
+w 1 1 1 art
+w 1 1 1 article
+w 1 1 1 articles
+w 1 1 1 artwork
+w 1 1 1 ascii
+w 1 1 1 asdf
+w 1 1 1 asset
+w 1 1 1 assets
+w 1 1 1 atom
+w 1 1 1 attach
+w 1 1 1 attachment
+w 1 1 1 attachments
+w 1 1 1 attachs
+w 1 1 1 attic
+w 1 1 1 audio
+w 1 1 1 audit
+w 1 1 1 audits
+w 1 1 1 auth
+w 1 1 1 author
+w 1 1 1 authorized_keys
+w 1 1 1 authors
+w 1 1 1 auto
+w 1 1 1 automatic
+w 1 1 1 automation
+w 1 1 1 avatar
+w 1 1 1 avatars
+w 1 1 1 award
+w 1 1 1 awards
+w 1 1 1 awl
+w 1 1 1 awstats
+w 1 1 1 b
+w 1 1 1 b2b
+w 1 1 1 b2c
+w 1 1 1 back
+w 1 1 1 backdoor
+w 1 1 1 backend
+w 1 1 1 backup
+w 1 1 1 backups
+w 1 1 1 bandwidth
+w 1 1 1 bank
+w 1 1 1 banks
+w 1 1 1 banner
+w 1 1 1 banners
+w 1 1 1 bar
+w 1 1 1 base
+w 1 1 1 bash
+w 1 1 1 basic
+w 1 1 1 basket
+w 1 1 1 baskets
+w 1 1 1 batch
+w 1 1 1 baz
+w 1 1 1 bb
+w 1 1 1 bb-hist
+w 1 1 1 bb-histlog
+w 1 1 1 bboard
+w 1 1 1 bbs
+w 1 1 1 beans
+w 1 1 1 beehive
+w 1 1 1 benefits
+w 1 1 1 beta
+w 1 1 1 bfc
+w 1 1 1 big
+w 1 1 1 bigip
+w 1 1 1 bill
+w 1 1 1 billing
+w 1 1 1 bin
+w 1 1 1 binaries
+w 1 1 1 binary
+w 1 1 1 bins
+w 1 1 1 bio
+w 1 1 1 bios
+w 1 1 1 biz
+w 1 1 1 bkup
+w 1 1 1 blah
+w 1 1 1 blank
+w 1 1 1 blog
+w 1 1 1 blogger
+w 1 1 1 bloggers
+w 1 1 1 blogs
+w 1 1 1 board
+w 1 1 1 bofh
+w 1 1 1 book
+w 1 1 1 books
+w 1 1 1 boot
+w 1 1 1 bottom
+w 1 1 1 broken
+w 1 1 1 broker
+w 1 1 1 browse
+w 1 1 1 bs
+w 1 1 1 bsd
+w 1 1 1 bugs
+w 1 1 1 build
+w 1 1 1 buildr
+w 1 1 1 bulk
+w 1 1 1 bullet
+w 1 1 1 business
+w 1 1 1 button
+w 1 1 1 buttons
+w 1 1 1 buy
+w 1 1 1 buynow
+w 1 1 1 bypass
+w 1 1 1 bz2
+w 1 1 1 c
+w 1 1 1 ca
+w 1 1 1 cache
+w 1 1 1 cal
+w 1 1 1 calendar
+w 1 1 1 camel
+w 1 1 1 car
+w 1 1 1 card
+w 1 1 1 cards
+w 1 1 1 career
+w 1 1 1 careers
+w 1 1 1 cars
+w 1 1 1 cart
+w 1 1 1 carts
+w 1 1 1 cat
+w 1 1 1 catalog
+w 1 1 1 catalogs
+w 1 1 1 catalyst
+w 1 1 1 categories
+w 1 1 1 category
+w 1 1 1 catinfo
+w 1 1 1 cats
+w 1 1 1 ccbill
+w 1 1 1 cd
+w 1 1 1 cerificate
+w 1 1 1 cert
+w 1 1 1 certificate
+w 1 1 1 certificates
+w 1 1 1 certs
+w 1 1 1 cf
+w 1 1 1 cfcache
+w 1 1 1 cfdocs
+w 1 1 1 cfide
+w 1 1 1 cfusion
+w 1 1 1 cgi-bin
+w 1 1 1 cgi-bin2
+w 1 1 1 cgi-home
+w 1 1 1 cgi-local
+w 1 1 1 cgi-pub
+w 1 1 1 cgi-script
+w 1 1 1 cgi-shl
+w 1 1 1 cgi-sys
+w 1 1 1 cgi-web
+w 1 1 1 cgi-win
+w 1 1 1 cgibin
+w 1 1 1 cgiwrap
+w 1 1 1 cgm-web
+w 1 1 1 change
+w 1 1 1 changed
+w 1 1 1 changes
+w 1 1 1 charge
+w 1 1 1 charges
+w 1 1 1 chat
+w 1 1 1 chats
+w 1 1 1 checkout
+w 1 1 1 child
+w 1 1 1 children
+w 1 1 1 cisco
+w 1 1 1 cisweb
+w 1 1 1 citrix
+w 1 1 1 cl
+w 1 1 1 claim
+w 1 1 1 claims
+w 1 1 1 classes
+w 1 1 1 classified
+w 1 1 1 classifieds
+w 1 1 1 clear
+w 1 1 1 click
+w 1 1 1 clicks
+w 1 1 1 client
+w 1 1 1 clientaccesspolicy
+w 1 1 1 clients
+w 1 1 1 close
+w 1 1 1 closed
+w 1 1 1 closing
+w 1 1 1 club
+w 1 1 1 cluster
+w 1 1 1 clusters
+w 1 1 1 cmd
+w 1 1 1 cms
+w 1 1 1 cnf
+w 1 1 1 cnt
+w 1 1 1 cocoon
+w 1 1 1 code
+w 1 1 1 codec
+w 1 1 1 codecs
+w 1 1 1 codes
+w 1 1 1 cognos
+w 1 1 1 coldfusion
+w 1 1 1 columns
+w 1 1 1 com
+w 1 1 1 comment
+w 1 1 1 comments
+w 1 1 1 commerce
+w 1 1 1 commercial
+w 1 1 1 common
+w 1 1 1 communicator
+w 1 1 1 community
+w 1 1 1 compact
+w 1 1 1 company
+w 1 1 1 complaint
+w 1 1 1 complaints
+w 1 1 1 compliance
+w 1 1 1 component
+w 1 1 1 compressed
+w 1 1 1 computer
+w 1 1 1 computers
+w 1 1 1 computing
+w 1 1 1 conference
+w 1 1 1 conferences
+w 1 1 1 configs
+w 1 1 1 console
+w 1 1 1 consumer
+w 1 1 1 contact
+w 1 1 1 contacts
+w 1 1 1 content
+w 1 1 1 contents
+w 1 1 1 contract
+w 1 1 1 contracts
+w 1 1 1 control
+w 1 1 1 controlpanel
+w 1 1 1 cookie
+w 1 1 1 cookies
+w 1 1 1 copies
+w 1 1 1 copy
+w 1 1 1 copyright
+w 1 1 1 core
+w 1 1 1 corp
+w 1 1 1 corpo
+w 1 1 1 corporate
+w 1 1 1 corrections
+w 1 1 1 count
+w 1 1 1 counter
+w 1 1 1 counters
+w 1 1 1 counts
+w 1 1 1 course
+w 1 1 1 courses
+w 1 1 1 cover
+w 1 1 1 cpanel
+w 1 1 1 cr
+w 1 1 1 crack
+w 1 1 1 crash
+w 1 1 1 crashes
+w 1 1 1 create
+w 1 1 1 credits
+w 1 1 1 crm
+w 1 1 1 cron
+w 1 1 1 crons
+w 1 1 1 crontab
+w 1 1 1 crontabs
+w 1 1 1 crossdomain
+w 1 1 1 crypt
+w 1 1 1 crypto
+w 1 1 1 cs
+w 1 1 1 css
+w 1 1 1 current
+w 1 1 1 custom
+w 1 1 1 custom-log
+w 1 1 1 custom_log
+w 1 1 1 customer
+w 1 1 1 customers
+w 1 1 1 cv
+w 1 1 1 cxf
+w 1 1 1 czcmdcvt
+w 1 1 1 d
+w 1 1 1 daemon
+w 1 1 1 daily
+w 1 1 1 dana-na
+w 1 1 1 data
+w 1 1 1 database
+w 1 1 1 databases
+w 1 1 1 date
+w 1 1 1 dba
+w 1 1 1 dbase
+w 1 1 1 dbman
+w 1 1 1 dc
+w 1 1 1 dcforum
+w 1 1 1 de
+w 1 1 1 dealer
+w 1 1 1 debug
+w 1 1 1 decl
+w 1 1 1 declaration
+w 1 1 1 declarations
+w 1 1 1 decode
+w 1 1 1 decrypt
+w 1 1 1 def
+w 1 1 1 default
+w 1 1 1 defaults
+w 1 1 1 definition
+w 1 1 1 definitions
+w 1 1 1 del
+w 1 1 1 delete
+w 1 1 1 deleted
+w 1 1 1 demo
+w 1 1 1 demos
+w 1 1 1 denied
+w 1 1 1 deny
+w 1 1 1 design
+w 1 1 1 desktop
+w 1 1 1 desktops
+w 1 1 1 detail
+w 1 1 1 details
+w 1 1 1 dev
+w 1 1 1 devel
+w 1 1 1 developer
+w 1 1 1 developers
+w 1 1 1 development
+w 1 1 1 device
+w 1 1 1 devices
+w 1 1 1 devs
+w 1 1 1 df
+w 1 1 1 dialog
+w 1 1 1 dialogs
+w 1 1 1 diff
+w 1 1 1 diffs
+w 1 1 1 digest
+w 1 1 1 digg
+w 1 1 1 dir
+w 1 1 1 directories
+w 1 1 1 directory
+w 1 1 1 dirs
+w 1 1 1 disabled
+w 1 1 1 disclaimer
+w 1 1 1 display
+w 1 1 1 django
+w 1 1 1 dl
+w 1 1 1 dm
+w 1 1 1 dm-config
+w 1 1 1 dms
+w 1 1 1 dms0
+w 1 1 1 dns
+w 1 1 1 do
+w 1 1 1 doc
+w 1 1 1 dock
+w 1 1 1 docroot
+w 1 1 1 docs
+w 1 1 1 document
+w 1 1 1 documentation
+w 1 1 1 documents
+w 1 1 1 domain
+w 1 1 1 domains
+w 1 1 1 down
+w 1 1 1 download
+w 1 1 1 downloads
+w 1 1 1 drop
+w 1 1 1 dropped
+w 1 1 1 drupal
+w 1 1 1 dummy
+w 1 1 1 dump
+w 1 1 1 dumps
+w 1 1 1 dvd
+w 1 1 1 dwr
+w 1 1 1 dynamic
+w 1 1 1 e
+w 1 1 1 e2fs
+w 1 1 1 ear
+w 1 1 1 ecommerce
+w 1 1 1 edge
+w 1 1 1 edit
+w 1 1 1 editor
+w 1 1 1 edits
+w 1 1 1 edu
+w 1 1 1 education
+w 1 1 1 ee
+w 1 1 1 effort
+w 1 1 1 efforts
+w 1 1 1 egress
+w 1 1 1 ejb
+w 1 1 1 element
+w 1 1 1 elements
+w 1 1 1 em
+w 1 1 1 email
+w 1 1 1 emails
+w 1 1 1 emea
+w 1 1 1 employees
+w 1 1 1 employment
+w 1 1 1 empty
+w 1 1 1 emu
+w 1 1 1 emulator
+w 1 1 1 en
+w 1 1 1 en_US
+w 1 1 1 encode
+w 1 1 1 encrypt
+w 1 1 1 eng
+w 1 1 1 engine
+w 1 1 1 english
+w 1 1 1 enterprise
+w 1 1 1 entertainment
+w 1 1 1 entries
+w 1 1 1 entry
+w 1 1 1 env
+w 1 1 1 environ
+w 1 1 1 environment
+w 1 1 1 ep
+w 1 1 1 error
+w 1 1 1 error-log
+w 1 1 1 error_log
+w 1 1 1 errors
+w 1 1 1 es
+w 1 1 1 esale
+w 1 1 1 esales
+w 1 1 1 etc
+w 1 1 1 europe
+w 1 1 1 event
+w 1 1 1 events
+w 1 1 1 evil
+w 1 1 1 evt
+w 1 1 1 ews
+w 1 1 1 ex
+w 1 1 1 example
+w 1 1 1 examples
+w 1 1 1 excalibur
+w 1 1 1 exchange
+w 1 1 1 exec
+w 1 1 1 export
+w 1 1 1 ext
+w 1 1 1 ext2
+w 1 1 1 extern
+w 1 1 1 external
+w 1 1 1 ezshopper
+w 1 1 1 f
+w 1 1 1 face
+w 1 1 1 faces
+w 1 1 1 faculty
+w 1 1 1 fail
+w 1 1 1 failure
+w 1 1 1 family
+w 1 1 1 faq
+w 1 1 1 faqs
+w 1 1 1 fcgi-bin
+w 1 1 1 feature
+w 1 1 1 features
+w 1 1 1 feed
+w 1 1 1 feedback
+w 1 1 1 feeds
+w 1 1 1 felix
+w 1 1 1 field
+w 1 1 1 fields
+w 1 1 1 file
+w 1 1 1 fileadmin
+w 1 1 1 files
+w 1 1 1 filez
+w 1 1 1 finance
+w 1 1 1 financial
+w 1 1 1 find
+w 1 1 1 finger
+w 1 1 1 firewall
+w 1 1 1 fixed
+w 1 1 1 flags
+w 1 1 1 flash
+w 1 1 1 flow
+w 1 1 1 flows
+w 1 1 1 flv
+w 1 1 1 fn
+w 1 1 1 folder
+w 1 1 1 folders
+w 1 1 1 font
+w 1 1 1 fonts
+w 1 1 1 foo
+w 1 1 1 footer
+w 1 1 1 footers
+w 1 1 1 form
+w 1 1 1 formatting
+w 1 1 1 formmail
+w 1 1 1 forms
+w 1 1 1 forrest
+w 1 1 1 fortune
+w 1 1 1 forum
+w 1 1 1 forum1
+w 1 1 1 forum2
+w 1 1 1 forumdisplay
+w 1 1 1 forums
+w 1 1 1 forward
+w 1 1 1 foto
+w 1 1 1 foundation
+w 1 1 1 fr
+w 1 1 1 frame
+w 1 1 1 frames
+w 1 1 1 framework
+w 1 1 1 free
+w 1 1 1 freebsd
+w 1 1 1 friend
+w 1 1 1 friends
+w 1 1 1 frob
+w 1 1 1 frontend
+w 1 1 1 fs
+w 1 1 1 ftp
+w 1 1 1 fuck
+w 1 1 1 fuckoff
+w 1 1 1 fuckyou
+w 1 1 1 full
+w 1 1 1 fun
+w 1 1 1 func
+w 1 1 1 funcs
+w 1 1 1 function
+w 1 1 1 functions
+w 1 1 1 fusion
+w 1 1 1 fw
+w 1 1 1 g
+w 1 1 1 galleries
+w 1 1 1 gallery
+w 1 1 1 game
+w 1 1 1 games
+w 1 1 1 ganglia
+w 1 1 1 garbage
+w 1 1 1 gateway
+w 1 1 1 gb
+w 1 1 1 geeklog
+w 1 1 1 general
+w 1 1 1 geronimo
+w 1 1 1 get
+w 1 1 1 getaccess
+w 1 1 1 getjobid
+w 1 1 1 gfx
+w 1 1 1 gif
+w 1 1 1 gitweb
+w 1 1 1 glimpse
+w 1 1 1 global
+w 1 1 1 globals
+w 1 1 1 glossary
+w 1 1 1 go
+w 1 1 1 goaway
+w 1 1 1 google
+w 1 1 1 government
+w 1 1 1 gprs
+w 1 1 1 grant
+w 1 1 1 grants
+w 1 1 1 graphics
+w 1 1 1 group
+w 1 1 1 groupcp
+w 1 1 1 groups
+w 1 1 1 gsm
+w 1 1 1 guest
+w 1 1 1 guestbook
+w 1 1 1 guests
+w 1 1 1 guide
+w 1 1 1 guides
+w 1 1 1 gump
+w 1 1 1 gwt
+w 1 1 1 h
+w 1 1 1 hack
+w 1 1 1 hacker
+w 1 1 1 hacking
+w 1 1 1 hackme
+w 1 1 1 hadoop
+w 1 1 1 hardcore
+w 1 1 1 hardware
+w 1 1 1 harmony
+w 1 1 1 head
+w 1 1 1 header
+w 1 1 1 headers
+w 1 1 1 health
+w 1 1 1 hello
+w 1 1 1 help
+w 1 1 1 helper
+w 1 1 1 helpers
+w 1 1 1 hi
+w 1 1 1 hidden
+w 1 1 1 hide
+w 1 1 1 high
+w 1 1 1 hipaa
+w 1 1 1 history
+w 1 1 1 hit
+w 1 1 1 hits
+w 1 1 1 hole
+w 1 1 1 home
+w 1 1 1 homepage
+w 1 1 1 hop
+w 1 1 1 horde
+w 1 1 1 hosting
+w 1 1 1 hosts
+w 1 1 1 howto
+w 1 1 1 hp
+w 1 1 1 hr
+w 1 1 1 hta
+w 1 1 1 htbin
+w 1 1 1 htdoc
+w 1 1 1 htdocs
+w 1 1 1 htpasswd
+w 1 1 1 http
+w 1 1 1 httpd
+w 1 1 1 https
+w 1 1 1 httpuser
+w 1 1 1 hu
+w 1 1 1 hyper
+w 1 1 1 i
+w 1 1 1 ia
+w 1 1 1 ibm
+w 1 1 1 icat
+w 1 1 1 icon
+w 1 1 1 icons
+w 1 1 1 id
+w 1 1 1 idea
+w 1 1 1 ideas
+w 1 1 1 ids
+w 1 1 1 ie
+w 1 1 1 iframe
+w 1 1 1 ig
+w 1 1 1 ignore
+w 1 1 1 iisadmin
+w 1 1 1 iisadmpwd
+w 1 1 1 iissamples
+w 1 1 1 image
+w 1 1 1 imagefolio
+w 1 1 1 images
+w 1 1 1 img
+w 1 1 1 imgs
+w 1 1 1 imp
+w 1 1 1 import
+w 1 1 1 important
+w 1 1 1 in
+w 1 1 1 inbound
+w 1 1 1 incl
+w 1 1 1 include
+w 1 1 1 includes
+w 1 1 1 incoming
+w 1 1 1 incubator
+w 1 1 1 index
+w 1 1 1 index1
+w 1 1 1 index2
+w 1 1 1 index_1
+w 1 1 1 index_2
+w 1 1 1 inetpub
+w 1 1 1 inetsrv
+w 1 1 1 inf
+w 1 1 1 info
+w 1 1 1 information
+w 1 1 1 ingress
+w 1 1 1 init
+w 1 1 1 inline
+w 1 1 1 input
+w 1 1 1 inquire
+w 1 1 1 inquiries
+w 1 1 1 inquiry
+w 1 1 1 insert
+w 1 1 1 install
+w 1 1 1 int
+w 1 1 1 interim
+w 1 1 1 intermediate
+w 1 1 1 internal
+w 1 1 1 international
+w 1 1 1 internet
+w 1 1 1 intl
+w 1 1 1 intranet
+w 1 1 1 intro
+w 1 1 1 ip
+w 1 1 1 ipc
+w 1 1 1 ips
+w 1 1 1 irc
+w 1 1 1 is
+w 1 1 1 isapi
+w 1 1 1 iso
+w 1 1 1 issues
+w 1 1 1 it
+w 1 1 1 item
+w 1 1 1 j
+w 1 1 1 j2ee
+w 1 1 1 j2me
+w 1 1 1 jakarta
+w 1 1 1 java-plugin
+w 1 1 1 javadoc
+w 1 1 1 javascript
+w 1 1 1 javax
+w 1 1 1 jboss
+w 1 1 1 jdbc
+w 1 1 1 jigsaw
+w 1 1 1 jj
+w 1 1 1 jmx-console
+w 1 1 1 job
+w 1 1 1 jobs
+w 1 1 1 joe
+w 1 1 1 john
+w 1 1 1 join
+w 1 1 1 joomla
+w 1 1 1 journal
+w 1 1 1 jp
+w 1 1 1 jpa
+w 1 1 1 jpg
+w 1 1 1 jre
+w 1 1 1 jrun
+w 1 1 1 json
+w 1 1 1 jsso
+w 1 1 1 jsx
+w 1 1 1 juniper
+w 1 1 1 junk
+w 1 1 1 jvm
+w 1 1 1 k
+w 1 1 1 kboard
+w 1 1 1 keep
+w 1 1 1 kernel
+w 1 1 1 keygen
+w 1 1 1 keys
+w 1 1 1 kids
+w 1 1 1 kill
+w 1 1 1 known_hosts
+w 1 1 1 l
+w 1 1 1 labs
+w 1 1 1 lang
+w 1 1 1 large
+w 1 1 1 law
+w 1 1 1 layout
+w 1 1 1 layouts
+w 1 1 1 ldap
+w 1 1 1 leader
+w 1 1 1 leaders
+w 1 1 1 left
+w 1 1 1 legacy
+w 1 1 1 legal
+w 1 1 1 lenya
+w 1 1 1 letters
+w 1 1 1 level
+w 1 1 1 lg
+w 1 1 1 lib
+w 1 1 1 library
+w 1 1 1 libs
+w 1 1 1 license
+w 1 1 1 licenses
+w 1 1 1 line
+w 1 1 1 link
+w 1 1 1 links
+w 1 1 1 linux
+w 1 1 1 list
+w 1 1 1 listinfo
+w 1 1 1 lists
+w 1 1 1 live
+w 1 1 1 lo
+w 1 1 1 loader
+w 1 1 1 loading
+w 1 1 1 loc
+w 1 1 1 local
+w 1 1 1 location
+w 1 1 1 lock
+w 1 1 1 locked
+w 1 1 1 log4j
+w 1 1 1 logfile
+w 1 1 1 logging
+w 1 1 1 login
+w 1 1 1 logins
+w 1 1 1 logo
+w 1 1 1 logoff
+w 1 1 1 logon
+w 1 1 1 logos
+w 1 1 1 logout
+w 1 1 1 logs
+w 1 1 1 lost+found
+w 1 1 1 low
+w 1 1 1 ls
+w 1 1 1 lst
+w 1 1 1 lucene
+w 1 1 1 m
+w 1 1 1 mac
+w 1 1 1 mail
+w 1 1 1 mailer
+w 1 1 1 mailing
+w 1 1 1 mailman
+w 1 1 1 mails
+w 1 1 1 main
+w 1 1 1 manage
+w 1 1 1 management
+w 1 1 1 manager
+w 1 1 1 manifest
+w 1 1 1 manual
+w 1 1 1 manuals
+w 1 1 1 map
+w 1 1 1 maps
+w 1 1 1 mark
+w 1 1 1 marketing
+w 1 1 1 master
+w 1 1 1 master.passwd
+w 1 1 1 match
+w 1 1 1 matrix
+w 1 1 1 maven
+w 1 1 1 mbox
+w 1 1 1 me
+w 1 1 1 media
+w 1 1 1 medium
+w 1 1 1 mem
+w 1 1 1 member
+w 1 1 1 members
+w 1 1 1 membership
+w 1 1 1 memory
+w 1 1 1 menu
+w 1 1 1 messaging
+w 1 1 1 meta
+w 1 1 1 microsoft
+w 1 1 1 migrate
+w 1 1 1 migration
+w 1 1 1 mina
+w 1 1 1 mirror
+w 1 1 1 mirrors
+w 1 1 1 misc
+w 1 1 1 mission
+w 1 1 1 mix
+w 1 1 1 mms
+w 1 1 1 mobi
+w 1 1 1 mobile
+w 1 1 1 mock
+w 1 1 1 mod
+w 1 1 1 modify
+w 1 1 1 mods
+w 1 1 1 module
+w 1 1 1 modules
+w 1 1 1 mojo
+w 1 1 1 money
+w 1 1 1 monitoring
+w 1 1 1 more
+w 1 1 1 move
+w 1 1 1 movie
+w 1 1 1 movies
+w 1 1 1 mp
+w 1 1 1 mp3
+w 1 1 1 mp3s
+w 1 1 1 ms
+w 1 1 1 ms-sql
+w 1 1 1 msadc
+w 1 1 1 msadm
+w 1 1 1 msg
+w 1 1 1 msie
+w 1 1 1 msql
+w 1 1 1 mssql
+w 1 1 1 mta
+w 1 1 1 multimedia
+w 1 1 1 music
+w 1 1 1 mx
+w 1 1 1 my
+w 1 1 1 myfaces
+w 1 1 1 myphpnuke
+w 1 1 1 mysql
+w 1 1 1 mysqld
+w 1 1 1 n
+w 1 1 1 nav
+w 1 1 1 navigation
+w 1 1 1 net
+w 1 1 1 netbsd
+w 1 1 1 nethome
+w 1 1 1 nets
+w 1 1 1 network
+w 1 1 1 networking
+w 1 1 1 new
+w 1 1 1 news
+w 1 1 1 newsletter
+w 1 1 1 newsletters
+w 1 1 1 next
+w 1 1 1 nfs
+w 1 1 1 nice
+w 1 1 1 nl
+w 1 1 1 nobody
+w 1 1 1 node
+w 1 1 1 none
+w 1 1 1 note
+w 1 1 1 notes
+w 1 1 1 notification
+w 1 1 1 notifications
+w 1 1 1 notified
+w 1 1 1 notify
+w 1 1 1 ns
+w 1 1 1 nuke
+w 1 1 1 nul
+w 1 1 1 null
+w 1 1 1 o
+w 1 1 1 oa_servlets
+w 1 1 1 oauth
+w 1 1 1 obdc
+w 1 1 1 obsolete
+w 1 1 1 obsoleted
+w 1 1 1 odbc
+w 1 1 1 ode
+w 1 1 1 oem
+w 1 1 1 ofbiz
+w 1 1 1 office
+w 1 1 1 onbound
+w 1 1 1 online
+w 1 1 1 op
+w 1 1 1 open
+w 1 1 1 openbsd
+w 1 1 1 opendir
+w 1 1 1 openejb
+w 1 1 1 openjpa
+w 1 1 1 operations
+w 1 1 1 opinion
+w 1 1 1 oprocmgr-status
+w 1 1 1 opt
+w 1 1 1 option
+w 1 1 1 options
+w 1 1 1 oracle
+w 1 1 1 oracle.xml.xsql.XSQLServlet
+w 1 1 1 order
+w 1 1 1 ordered
+w 1 1 1 orders
+w 1 1 1 org
+w 1 1 1 osc
+w 1 1 1 oscommerce
+w 1 1 1 other
+w 1 1 1 outgoing
+w 1 1 1 outline
+w 1 1 1 output
+w 1 1 1 outreach
+w 1 1 1 overview
+w 1 1 1 owa
+w 1 1 1 ows
+w 1 1 1 ows-bin
+w 1 1 1 p
+w 1 1 1 p2p
+w 1 1 1 pack
+w 1 1 1 packages
+w 1 1 1 page
+w 1 1 1 page1
+w 1 1 1 page2
+w 1 1 1 page_1
+w 1 1 1 page_2
+w 1 1 1 pages
+w 1 1 1 paid
+w 1 1 1 panel
+w 1 1 1 paper
+w 1 1 1 papers
+w 1 1 1 parse
+w 1 1 1 partner
+w 1 1 1 partners
+w 1 1 1 party
+w 1 1 1 pass
+w 1 1 1 passwd
+w 1 1 1 password
+w 1 1 1 passwords
+w 1 1 1 past
+w 1 1 1 patch
+w 1 1 1 patches
+w 1 1 1 paypal
+w 1 1 1 pc
+w 1 1 1 pci
+w 1 1 1 pda
+w 1 1 1 pdf
+w 1 1 1 pdfs
+w 1 1 1 peek
+w 1 1 1 pending
+w 1 1 1 people
+w 1 1 1 perf
+w 1 1 1 performance
+w 1 1 1 perl
+w 1 1 1 personal
+w 1 1 1 pg
+w 1 1 1 phf
+w 1 1 1 phone
+w 1 1 1 phones
+w 1 1 1 phorum
+w 1 1 1 photo
+w 1 1 1 photos
+w 1 1 1 phpBB
+w 1 1 1 phpBB2
+w 1 1 1 phpEventCalendar
+w 1 1 1 phpMyAdmin
+w 1 1 1 phpbb
+w 1 1 1 phpmyadmin
+w 1 1 1 phpnuke
+w 1 1 1 phps
+w 1 1 1 pic
+w 1 1 1 pics
+w 1 1 1 pictures
+w 1 1 1 pii
+w 1 1 1 ping
+w 1 1 1 pipermail
+w 1 1 1 piranha
+w 1 1 1 pix
+w 1 1 1 pixel
+w 1 1 1 pkg
+w 1 1 1 pkgs
+w 1 1 1 plain
+w 1 1 1 play
+w 1 1 1 pls
+w 1 1 1 plugin
+w 1 1 1 plugins
+w 1 1 1 pm
+w 1 1 1 png
+w 1 1 1 poi
+w 1 1 1 policies
+w 1 1 1 policy
+w 1 1 1 politics
+w 1 1 1 poll
+w 1 1 1 polls
+w 1 1 1 pop
+w 1 1 1 pop3
+w 1 1 1 porn
+w 1 1 1 port
+w 1 1 1 portal
+w 1 1 1 portals
+w 1 1 1 portfolio
+w 1 1 1 pos
+w 1 1 1 post
+w 1 1 1 posted
+w 1 1 1 postgres
+w 1 1 1 postgresql
+w 1 1 1 postnuke
+w 1 1 1 postpaid
+w 1 1 1 posts
+w 1 1 1 ppt
+w 1 1 1 pr
+w 1 1 1 pr0n
+w 1 1 1 premium
+w 1 1 1 prepaid
+w 1 1 1 presentation
+w 1 1 1 presentations
+w 1 1 1 preserve
+w 1 1 1 press
+w 1 1 1 preview
+w 1 1 1 previews
+w 1 1 1 previous
+w 1 1 1 pricing
+w 1 1 1 print
+w 1 1 1 printenv
+w 1 1 1 printer
+w 1 1 1 printers
+w 1 1 1 priv
+w 1 1 1 privacy
+w 1 1 1 private
+w 1 1 1 problems
+w 1 1 1 proc
+w 1 1 1 procedures
+w 1 1 1 prod
+w 1 1 1 product
+w 1 1 1 product_info
+w 1 1 1 production
+w 1 1 1 products
+w 1 1 1 profile
+w 1 1 1 profiles
+w 1 1 1 profiling
+w 1 1 1 program
+w 1 1 1 programming
+w 1 1 1 programs
+w 1 1 1 project
+w 1 1 1 projects
+w 1 1 1 promo
+w 1 1 1 prop
+w 1 1 1 properties
+w 1 1 1 property
+w 1 1 1 props
+w 1 1 1 protect
+w 1 1 1 proto
+w 1 1 1 proxies
+w 1 1 1 proxy
+w 1 1 1 prv
+w 1 1 1 ps
+w 1 1 1 psql
+w 1 1 1 pt
+w 1 1 1 pub
+w 1 1 1 public
+w 1 1 1 publication
+w 1 1 1 publications
+w 1 1 1 pubs
+w 1 1 1 pull
+w 1 1 1 purchase
+w 1 1 1 purchases
+w 1 1 1 purchasing
+w 1 1 1 push
+w 1 1 1 pw
+w 1 1 1 pwd
+w 1 1 1 python
+w 1 1 1 q
+w 1 1 1 qpid
+w 1 1 1 queries
+w 1 1 1 query
+w 1 1 1 queue
+w 1 1 1 queues
+w 1 1 1 quote
+w 1 1 1 quotes
+w 1 1 1 r
+w 1 1 1 radio
+w 1 1 1 random
+w 1 1 1 rar
+w 1 1 1 rdf
+w 1 1 1 read
+w 1 1 1 readme
+w 1 1 1 realestate
+w 1 1 1 receive
+w 1 1 1 received
+w 1 1 1 recharge
+w 1 1 1 record
+w 1 1 1 records
+w 1 1 1 recovery
+w 1 1 1 recycle
+w 1 1 1 recycled
+w 1 1 1 redir
+w 1 1 1 redirect
+w 1 1 1 reference
+w 1 1 1 reg
+w 1 1 1 register
+w 1 1 1 registered
+w 1 1 1 registration
+w 1 1 1 registrations
+w 1 1 1 release
+w 1 1 1 releases
+w 1 1 1 remote
+w 1 1 1 remove
+w 1 1 1 removed
+w 1 1 1 render
+w 1 1 1 rendered
+w 1 1 1 rep
+w 1 1 1 repl
+w 1 1 1 replica
+w 1 1 1 replicas
+w 1 1 1 replicate
+w 1 1 1 replicated
+w 1 1 1 replication
+w 1 1 1 replicator
+w 1 1 1 reply
+w 1 1 1 report
+w 1 1 1 reporting
+w 1 1 1 reports
+w 1 1 1 reprints
+w 1 1 1 req
+w 1 1 1 reqs
+w 1 1 1 request
+w 1 1 1 requests
+w 1 1 1 requisition
+w 1 1 1 requisitions
+w 1 1 1 res
+w 1 1 1 research
+w 1 1 1 resin
+w 1 1 1 resource
+w 1 1 1 resources
+w 1 1 1 rest
+w 1 1 1 restore
+w 1 1 1 restored
+w 1 1 1 restricted
+w 1 1 1 results
+w 1 1 1 retail
+w 1 1 1 reverse
+w 1 1 1 reversed
+w 1 1 1 revert
+w 1 1 1 reverted
+w 1 1 1 review
+w 1 1 1 reviews
+w 1 1 1 right
+w 1 1 1 roam
+w 1 1 1 roaming
+w 1 1 1 robot
+w 1 1 1 robots
+w 1 1 1 roller
+w 1 1 1 room
+w 1 1 1 root
+w 1 1 1 rpc
+w 1 1 1 rtf
+w 1 1 1 ru
+w 1 1 1 rule
+w 1 1 1 rules
+w 1 1 1 run
+w 1 1 1 rwservlet
+w 1 1 1 s
+w 1 1 1 sale
+w 1 1 1 sales
+w 1 1 1 sam
+w 1 1 1 samba
+w 1 1 1 sample
+w 1 1 1 samples
+w 1 1 1 sav
+w 1 1 1 save
+w 1 1 1 saved
+w 1 1 1 saves
+w 1 1 1 sbin
+w 1 1 1 scan
+w 1 1 1 scanned
+w 1 1 1 scans
+w 1 1 1 sched
+w 1 1 1 schedule
+w 1 1 1 scheduled
+w 1 1 1 scheduling
+w 1 1 1 schema
+w 1 1 1 science
+w 1 1 1 screen
+w 1 1 1 screens
+w 1 1 1 screenshot
+w 1 1 1 screenshots
+w 1 1 1 script
+w 1 1 1 scriptlet
+w 1 1 1 scriptlets
+w 1 1 1 scripts
+w 1 1 1 sdk
+w 1 1 1 se
+w 1 1 1 search
+w 1 1 1 sec
+w 1 1 1 secret
+w 1 1 1 section
+w 1 1 1 sections
+w 1 1 1 secure
+w 1 1 1 secured
+w 1 1 1 security
+w 1 1 1 seed
+w 1 1 1 sell
+w 1 1 1 send
+w 1 1 1 sendmail
+w 1 1 1 sendto
+w 1 1 1 sent
+w 1 1 1 serial
+w 1 1 1 serv
+w 1 1 1 serve
+w 1 1 1 server
+w 1 1 1 server-info
+w 1 1 1 server-status
+w 1 1 1 servers
+w 1 1 1 service
+w 1 1 1 services
+w 1 1 1 servlet
+w 1 1 1 servlets
+w 1 1 1 session
+w 1 1 1 sessions
+w 1 1 1 setting
+w 1 1 1 settings
+w 1 1 1 setup
+w 1 1 1 share
+w 1 1 1 shared
+w 1 1 1 shares
+w 1 1 1 shell
+w 1 1 1 ship
+w 1 1 1 shipped
+w 1 1 1 shipping
+w 1 1 1 shop
+w 1 1 1 shopper
+w 1 1 1 shopping
+w 1 1 1 shops
+w 1 1 1 shoutbox
+w 1 1 1 show
+w 1 1 1 show_post
+w 1 1 1 show_thread
+w 1 1 1 showcat
+w 1 1 1 showenv
+w 1 1 1 showjobs
+w 1 1 1 showmap
+w 1 1 1 showmsg
+w 1 1 1 showpost
+w 1 1 1 showthread
+w 1 1 1 sign
+w 1 1 1 signoff
+w 1 1 1 signon
+w 1 1 1 signup
+w 1 1 1 simple
+w 1 1 1 sink
+w 1 1 1 site
+w 1 1 1 site-map
+w 1 1 1 site_map
+w 1 1 1 sitemap
+w 1 1 1 sites
+w 1 1 1 skel
+w 1 1 1 skin
+w 1 1 1 skins
+w 1 1 1 skip
+w 1 1 1 sl
+w 1 1 1 sling
+w 1 1 1 sm
+w 1 1 1 small
+w 1 1 1 sms
+w 1 1 1 smtp
+w 1 1 1 snoop
+w 1 1 1 so
+w 1 1 1 soap
+w 1 1 1 soaprouter
+w 1 1 1 soft
+w 1 1 1 software
+w 1 1 1 solaris
+w 1 1 1 sold
+w 1 1 1 solution
+w 1 1 1 solutions
+w 1 1 1 source
+w 1 1 1 sources
+w 1 1 1 soutbox
+w 1 1 1 sox
+w 1 1 1 sp
+w 1 1 1 space
+w 1 1 1 spacer
+w 1 1 1 spam
+w 1 1 1 special
+w 1 1 1 specials
+w 1 1 1 sponsor
+w 1 1 1 sponsors
+w 1 1 1 spool
+w 1 1 1 sport
+w 1 1 1 sports
+w 1 1 1 sqlnet
+w 1 1 1 squirrel
+w 1 1 1 squirrelmail
+w 1 1 1 src
+w 1 1 1 srv
+w 1 1 1 ss
+w 1 1 1 ssh
+w 1 1 1 ssi
+w 1 1 1 ssl
+w 1 1 1 sslvpn
+w 1 1 1 ssn
+w 1 1 1 sso
+w 1 1 1 staff
+w 1 1 1 staging
+w 1 1 1 standard
+w 1 1 1 standards
+w 1 1 1 star
+w 1 1 1 start
+w 1 1 1 stat
+w 1 1 1 statement
+w 1 1 1 statements
+w 1 1 1 static
+w 1 1 1 staticpages
+w 1 1 1 statistic
+w 1 1 1 statistics
+w 1 1 1 stats
+w 1 1 1 status
+w 1 1 1 stock
+w 1 1 1 storage
+w 1 1 1 store
+w 1 1 1 stored
+w 1 1 1 stories
+w 1 1 1 story
+w 1 1 1 strut
+w 1 1 1 struts
+w 1 1 1 student
+w 1 1 1 students
+w 1 1 1 stuff
+w 1 1 1 style
+w 1 1 1 styles
+w 1 1 1 submissions
+w 1 1 1 submit
+w 1 1 1 subscribe
+w 1 1 1 subscriber
+w 1 1 1 subscribers
+w 1 1 1 subscription
+w 1 1 1 subscriptions
+w 1 1 1 success
+w 1 1 1 suite
+w 1 1 1 suites
+w 1 1 1 sun
+w 1 1 1 sunos
+w 1 1 1 super
+w 1 1 1 support
+w 1 1 1 surf
+w 1 1 1 survey
+w 1 1 1 surveys
+w 1 1 1 swf
+w 1 1 1 sws
+w 1 1 1 synapse
+w 1 1 1 sync
+w 1 1 1 synced
+w 1 1 1 sys
+w 1 1 1 system
+w 1 1 1 systems
+w 1 1 1 sysuser
+w 1 1 1 t
+w 1 1 1 tag
+w 1 1 1 tags
+w 1 1 1 tape
+w 1 1 1 tapes
+w 1 1 1 tapestry
+w 1 1 1 tar
+w 1 1 1 tar.bz2
+w 1 1 1 tb
+w 1 1 1 tcl
+w 1 1 1 team
+w 1 1 1 tech
+w 1 1 1 technical
+w 1 1 1 technology
+w 1 1 1 tel
+w 1 1 1 tele
+w 1 1 1 templ
+w 1 1 1 template
+w 1 1 1 templates
+w 1 1 1 terms
+w 1 1 1 test-cgi
+w 1 1 1 test-env
+w 1 1 1 test1
+w 1 1 1 test123
+w 1 1 1 test1234
+w 1 1 1 test2
+w 1 1 1 test3
+w 1 1 1 testimonial
+w 1 1 1 testimonials
+w 1 1 1 testing
+w 1 1 1 tests
+w 1 1 1 texis
+w 1 1 1 text
+w 1 1 1 texts
+w 1 1 1 theme
+w 1 1 1 themes
+w 1 1 1 thread
+w 1 1 1 threads
+w 1 1 1 thumb
+w 1 1 1 thumbnail
+w 1 1 1 thumbnails
+w 1 1 1 thumbs
+w 1 1 1 tickets
+w 1 1 1 tiki
+w 1 1 1 tiles
+w 1 1 1 tip
+w 1 1 1 tips
+w 1 1 1 title
+w 1 1 1 tls
+w 1 1 1 tmpl
+w 1 1 1 tmps
+w 1 1 1 tn
+w 1 1 1 toc
+w 1 1 1 todo
+w 1 1 1 toggle
+w 1 1 1 tomcat
+w 1 1 1 tool
+w 1 1 1 toolbar
+w 1 1 1 toolkit
+w 1 1 1 tools
+w 1 1 1 top
+w 1 1 1 topic
+w 1 1 1 topics
+w 1 1 1 torrent
+w 1 1 1 torrents
+w 1 1 1 tos
+w 1 1 1 tour
+w 1 1 1 tpl
+w 1 1 1 tpv
+w 1 1 1 tr
+w 1 1 1 trace
+w 1 1 1 traceroute
+w 1 1 1 traces
+w 1 1 1 track
+w 1 1 1 trackback
+w 1 1 1 tracker
+w 1 1 1 trackers
+w 1 1 1 tracking
+w 1 1 1 tracks
+w 1 1 1 traffic
+w 1 1 1 trailer
+w 1 1 1 trailers
+w 1 1 1 training
+w 1 1 1 trans
+w 1 1 1 transparent
+w 1 1 1 transport
+w 1 1 1 trash
+w 1 1 1 travel
+w 1 1 1 treasury
+w 1 1 1 tree
+w 1 1 1 trees
+w 1 1 1 trial
+w 1 1 1 trunk
+w 1 1 1 tsweb
+w 1 1 1 tt
+w 1 1 1 turbine
+w 1 1 1 tuscany
+w 1 1 1 tutorial
+w 1 1 1 tutorials
+w 1 1 1 tv
+w 1 1 1 tweak
+w 1 1 1 type
+w 1 1 1 typo3
+w 1 1 1 typo3conf
+w 1 1 1 u
+w 1 1 1 ubb
+w 1 1 1 uds
+w 1 1 1 uk
+w 1 1 1 umts
+w 1 1 1 union
+w 1 1 1 unix
+w 1 1 1 unlock
+w 1 1 1 unreg
+w 1 1 1 unregister
+w 1 1 1 up
+w 1 1 1 upd
+w 1 1 1 update
+w 1 1 1 updated
+w 1 1 1 updater
+w 1 1 1 updates
+w 1 1 1 upload
+w 1 1 1 uploads
+w 1 1 1 url
+w 1 1 1 us
+w 1 1 1 usa
+w 1 1 1 usage
+w 1 1 1 user
+w 1 1 1 userlog
+w 1 1 1 users
+w 1 1 1 usr
+w 1 1 1 util
+w 1 1 1 utilities
+w 1 1 1 utility
+w 1 1 1 utils
+w 1 1 1 v
+w 1 1 1 v1
+w 1 1 1 v2
+w 1 1 1 var
+w 1 1 1 vault
+w 1 1 1 vector
+w 1 1 1 velocity
+w 1 1 1 vendor
+w 1 1 1 ver
+w 1 1 1 ver1
+w 1 1 1 ver2
+w 1 1 1 version
+w 1 1 1 vfs
+w 1 1 1 video
+w 1 1 1 videos
+w 1 1 1 view
+w 1 1 1 view-source
+w 1 1 1 viewcvs
+w 1 1 1 viewforum
+w 1 1 1 viewonline
+w 1 1 1 views
+w 1 1 1 viewsource
+w 1 1 1 viewsvn
+w 1 1 1 viewtopic
+w 1 1 1 viewvc
+w 1 1 1 virtual
+w 1 1 1 vm
+w 1 1 1 voip
+w 1 1 1 vol
+w 1 1 1 vpn
+w 1 1 1 w
+w 1 1 1 w3
+w 1 1 1 w3c
+w 1 1 1 wa
+w 1 1 1 wap
+w 1 1 1 war
+w 1 1 1 warez
+w 1 1 1 way-board
+w 1 1 1 wbboard
+w 1 1 1 wc
+w 1 1 1 weather
+w 1 1 1 web
+w 1 1 1 web-beans
+w 1 1 1 web-console
+w 1 1 1 webaccess
+w 1 1 1 webadmin
+w 1 1 1 webagent
+w 1 1 1 webalizer
+w 1 1 1 webapp
+w 1 1 1 webb
+w 1 1 1 webbbs
+w 1 1 1 webboard
+w 1 1 1 webcalendar
+w 1 1 1 webcart
+w 1 1 1 webcasts
+w 1 1 1 webcgi
+w 1 1 1 webchat
+w 1 1 1 webdata
+w 1 1 1 webdav
+w 1 1 1 weblog
+w 1 1 1 weblogic
+w 1 1 1 weblogs
+w 1 1 1 webmail
+w 1 1 1 webplus
+w 1 1 1 webshop
+w 1 1 1 website
+w 1 1 1 websphere
+w 1 1 1 webstats
+w 1 1 1 websvn
+w 1 1 1 webwork
+w 1 1 1 welcome
+w 1 1 1 whitepapers
+w 1 1 1 whois
+w 1 1 1 whosonline
+w 1 1 1 wicket
+w 1 1 1 wiki
+w 1 1 1 win
+w 1 1 1 win32
+w 1 1 1 windows
+w 1 1 1 winnt
+w 1 1 1 wireless
+w 1 1 1 wml
+w 1 1 1 word
+w 1 1 1 wordpress
+w 1 1 1 work
+w 1 1 1 working
+w 1 1 1 world
+w 1 1 1 wp
+w 1 1 1 wp-content
+w 1 1 1 wp-includes
+w 1 1 1 wp-login
+w 1 1 1 wrap
+w 1 1 1 ws-client
+w 1 1 1 ws_ftp
+w 1 1 1 wtai
+w 1 1 1 www
+w 1 1 1 www-sql
+w 1 1 1 www1
+w 1 1 1 www2
+w 1 1 1 www3
+w 1 1 1 wwwboard
+w 1 1 1 wwwroot
+w 1 1 1 wwwstats
+w 1 1 1 wwwthreads
+w 1 1 1 wwwuser
+w 1 1 1 x
+w 1 1 1 xalan
+w 1 1 1 xerces
+w 1 1 1 xhtml
+w 1 1 1 xmlrpc
+w 1 1 1 xslt
+w 1 1 1 xsql
+w 1 1 1 xxx
+w 1 1 1 xyzzy
+w 1 1 1 y
+w 1 1 1 yahoo
+w 1 1 1 youtube
+w 1 1 1 yt
+w 1 1 1 z
+w 1 1 1 zboard
+w 1 1 1 zend
+w 1 1 1 zero
+w 1 1 1 zipfiles
+w 1 1 1 zips
+w 1 1 1 zope
+w 1 1 1 zorum
+w 1 1 1 ~admin
+w 1 1 1 ~apache
+w 1 1 1 ~bin
+w 1 1 1 ~bob
+w 1 1 1 ~ftp
+w 1 1 1 ~guest
+w 1 1 1 ~http
+w 1 1 1 ~httpd
+w 1 1 1 ~john
+w 1 1 1 ~log
+w 1 1 1 ~logs
+w 1 1 1 ~lp
+w 1 1 1 ~mark
+w 1 1 1 ~matt
+w 1 1 1 ~nobody
+w 1 1 1 ~root
+w 1 1 1 ~test
+w 1 1 1 ~tmp
+w 1 1 1 ~www
diff --git a/dictionaries/extensions-only.wl b/dictionaries/extensions-only.wl
new file mode 100644
index 0000000..4c13a96
--- /dev/null
+++ b/dictionaries/extensions-only.wl
@@ -0,0 +1,100 @@
+e 1 1 1 asmx
+e 1 1 1 asp
+e 1 1 1 aspx
+e 1 1 1 bak
+e 1 1 1 bat
+e 1 1 1 bin
+e 1 1 1 bz2
+e 1 1 1 c
+e 1 1 1 cc
+e 1 1 1 cfg
+e 1 1 1 cgi
+e 1 1 1 class
+e 1 1 1 conf
+e 1 1 1 config
+e 1 1 1 cpp
+e 1 1 1 cs
+e 1 1 1 csv
+e 1 1 1 dat
+e 1 1 1 db
+e 1 1 1 dll
+e 1 1 1 do
+e 1 1 1 doc
+e 1 1 1 dump
+e 1 1 1 ep
+e 1 1 1 err
+e 1 1 1 error
+e 1 1 1 exe
+e 1 1 1 gif
+e 1 1 1 gz
+e 1 1 1 htm
+e 1 1 1 html
+e 1 1 1 inc
+e 1 1 1 ini
+e 1 1 1 java
+e 1 1 1 jhtml
+e 1 1 1 jpg
+e 1 1 1 js
+e 1 1 1 jsf
+e 1 1 1 jsp
+e 1 1 1 key
+e 1 1 1 lib
+e 1 1 1 log
+e 1 1 1 lst
+e 1 1 1 manifest
+e 1 1 1 mdb
+e 1 1 1 meta
+e 1 1 1 msg
+e 1 1 1 nsf
+e 1 1 1 o
+e 1 1 1 old
+e 1 1 1 ora
+e 1 1 1 orig
+e 1 1 1 out
+e 1 1 1 part
+e 1 1 1 pdf
+e 1 1 1 php
+e 1 1 1 php3
+e 1 1 1 pl
+e 1 1 1 pm
+e 1 1 1 png
+e 1 1 1 ppt
+e 1 1 1 properties
+e 1 1 1 py
+e 1 1 1 rar
+e 1 1 1 rss
+e 1 1 1 rtf
+e 1 1 1 save
+e 1 1 1 sh
+e 1 1 1 shtml
+e 1 1 1 so
+e 1 1 1 sql
+e 1 1 1 stackdump
+e 1 1 1 swf
+e 1 1 1 tar
+e 1 1 1 tar.bz2
+e 1 1 1 tar.gz
+e 1 1 1 temp
+e 1 1 1 test
+e 1 1 1 tgz
+e 1 1 1 tmp
+e 1 1 1 trace
+e 1 1 1 txt
+e 1 1 1 vb
+e 1 1 1 vbs
+e 1 1 1 ws
+e 1 1 1 xls
+e 1 1 1 xml
+e 1 1 1 xsl
+e 1 1 1 zip
+w 1 1 1 AggreSpy
+w 1 1 1 DMSDump
+w 1 1 1 dms0
+w 1 1 1 dmse 1 1 1 7z
+w 1 1 1 getjobid
+w 1 1 1 oprocmgr-status
+w 1 1 1 rwservlet
+w 1 1 1 showenv
+w 1 1 1 showjobs
+w 1 1 1 showmap
+w 1 1 1 soaprouter
diff --git a/dictionaries/minimal.wl b/dictionaries/minimal.wl
new file mode 100644
index 0000000..c8f707f
--- /dev/null
+++ b/dictionaries/minimal.wl
@@ -0,0 +1,1892 @@
+e 1 1 1 bak
+e 1 1 1 cfg
+e 1 1 1 class
+e 1 1 1 cnf
+e 1 1 1 conf
+e 1 1 1 config
+e 1 1 1 csv
+e 1 1 1 err
+e 1 1 1 error
+e 1 1 1 html
+e 1 1 1 inc
+e 1 1 1 ini
+e 1 1 1 java
+e 1 1 1 key
+e 1 1 1 log
+e 1 1 1 old
+e 1 1 1 orig
+e 1 1 1 out
+e 1 1 1 part
+e 1 1 1 pl
+e 1 1 1 sql
+e 1 1 1 temp
+e 1 1 1 test
+e 1 1 1 tmp
+e 1 1 1 txt
+e 1 1 1 xml
+e 1 1 1 zip
+w 1 1 1 .bash_history
+w 1 1 1 .bashrc
+w 1 1 1 .cvsignore
+w 1 1 1 .history
+w 1 1 1 .htaccess
+w 1 1 1 .htpasswd
+w 1 1 1 .passwd
+w 1 1 1 .perf
+w 1 1 1 .ssh
+w 1 1 1 .svn
+w 1 1 1 .web
+w 1 1 1 0
+w 1 1 1 00
+w 1 1 1 01
+w 1 1 1 02
+w 1 1 1 03
+w 1 1 1 04
+w 1 1 1 05
+w 1 1 1 06
+w 1 1 1 07
+w 1 1 1 08
+w 1 1 1 09
+w 1 1 1 1
+w 1 1 1 10
+w 1 1 1 100
+w 1 1 1 1000
+w 1 1 1 1001
+w 1 1 1 101
+w 1 1 1 11
+w 1 1 1 12
+w 1 1 1 13
+w 1 1 1 14
+w 1 1 1 15
+w 1 1 1 1990
+w 1 1 1 1991
+w 1 1 1 1992
+w 1 1 1 1993
+w 1 1 1 1994
+w 1 1 1 1995
+w 1 1 1 1996
+w 1 1 1 1997
+w 1 1 1 1998
+w 1 1 1 1999
+w 1 1 1 2
+w 1 1 1 20
+w 1 1 1 200
+w 1 1 1 2000
+w 1 1 1 2001
+w 1 1 1 2002
+w 1 1 1 2003
+w 1 1 1 2004
+w 1 1 1 2005
+w 1 1 1 2006
+w 1 1 1 2007
+w 1 1 1 2008
+w 1 1 1 2009
+w 1 1 1 2010
+w 1 1 1 2011
+w 1 1 1 2012
+w 1 1 1 21
+w 1 1 1 22
+w 1 1 1 23
+w 1 1 1 24
+w 1 1 1 25
+w 1 1 1 2g
+w 1 1 1 3
+w 1 1 1 300
+w 1 1 1 3g
+w 1 1 1 4
+w 1 1 1 42
+w 1 1 1 5
+w 1 1 1 50
+w 1 1 1 500
+w 1 1 1 51
+w 1 1 1 6
+w 1 1 1 7
+w 1 1 1 7z
+w 1 1 1 8
+w 1 1 1 9
+w 1 1 1 ADM
+w 1 1 1 ADMIN
+w 1 1 1 AggreSpy
+w 1 1 1 AppsLocalLogin
+w 1 1 1 AppsLogin
+w 1 1 1 BUILD
+w 1 1 1 CMS
+w 1 1 1 CVS
+w 1 1 1 DB
+w 1 1 1 DMSDump
+w 1 1 1 Documents and Settings
+w 1 1 1 Entries
+w 1 1 1 FCKeditor
+w 1 1 1 JMXSoapAdapter
+w 1 1 1 LICENSE
+w 1 1 1 MANIFEST.MF
+w 1 1 1 META-INF
+w 1 1 1 Makefile
+w 1 1 1 OA
+w 1 1 1 OAErrorDetailPage
+w 1 1 1 OA_HTML
+w 1 1 1 Program Files
+w 1 1 1 README
+w 1 1 1 Readme
+w 1 1 1 Recycled
+w 1 1 1 Root
+w 1 1 1 SQL
+w 1 1 1 SUNWmc
+w 1 1 1 SiteScope
+w 1 1 1 SiteServer
+w 1 1 1 Spy
+w 1 1 1 TEMP
+w 1 1 1 TMP
+w 1 1 1 TODO
+w 1 1 1 Thumbs.db
+w 1 1 1 WEB-INF
+w 1 1 1 WS_FTP
+w 1 1 1 XXX
+w 1 1 1 _
+w 1 1 1 _adm
+w 1 1 1 _admin
+w 1 1 1 _files
+w 1 1 1 _include
+w 1 1 1 _js
+w 1 1 1 _mem_bin
+w 1 1 1 _old
+w 1 1 1 _pages
+w 1 1 1 _private
+w 1 1 1 _res
+w 1 1 1 _source
+w 1 1 1 _src
+w 1 1 1 _test
+w 1 1 1 _vti_bin
+w 1 1 1 _vti_cnf
+w 1 1 1 _vti_pvt
+w 1 1 1 _vti_txt
+w 1 1 1 _www
+w 1 1 1 a
+w 1 1 1 aa
+w 1 1 1 aaa
+w 1 1 1 abc
+w 1 1 1 abc123
+w 1 1 1 abcd
+w 1 1 1 abcd1234
+w 1 1 1 about
+w 1 1 1 access
+w 1 1 1 access-log
+w 1 1 1 access-log.1
+w 1 1 1 access.1
+w 1 1 1 access_log
+w 1 1 1 access_log.1
+w 1 1 1 accessibility
+w 1 1 1 account
+w 1 1 1 accounting
+w 1 1 1 accounts
+w 1 1 1 action
+w 1 1 1 actions
+w 1 1 1 active
+w 1 1 1 activex
+w 1 1 1 ad
+w 1 1 1 adclick
+w 1 1 1 add
+w 1 1 1 addressbook
+w 1 1 1 adm
+w 1 1 1 admin
+w 1 1 1 admin_
+w 1 1 1 ads
+w 1 1 1 adv
+w 1 1 1 advertise
+w 1 1 1 advertising
+w 1 1 1 affiliate
+w 1 1 1 affiliates
+w 1 1 1 agenda
+w 1 1 1 agent
+w 1 1 1 agents
+w 1 1 1 ajax
+w 1 1 1 album
+w 1 1 1 albums
+w 1 1 1 alert
+w 1 1 1 alerts
+w 1 1 1 alias
+w 1 1 1 aliases
+w 1 1 1 all
+w 1 1 1 alpha
+w 1 1 1 alumni
+w 1 1 1 analog
+w 1 1 1 announcement
+w 1 1 1 announcements
+w 1 1 1 anon
+w 1 1 1 anonymous
+w 1 1 1 ansi
+w 1 1 1 apac
+w 1 1 1 apache
+w 1 1 1 apexec
+w 1 1 1 api
+w 1 1 1 apis
+w 1 1 1 app
+w 1 1 1 appeal
+w 1 1 1 appeals
+w 1 1 1 append
+w 1 1 1 appl
+w 1 1 1 apple
+w 1 1 1 appliation
+w 1 1 1 applications
+w 1 1 1 apps
+w 1 1 1 apr
+w 1 1 1 arch
+w 1 1 1 archive
+w 1 1 1 archives
+w 1 1 1 array
+w 1 1 1 art
+w 1 1 1 article
+w 1 1 1 articles
+w 1 1 1 artwork
+w 1 1 1 ascii
+w 1 1 1 asdf
+w 1 1 1 asmx
+w 1 1 1 asp
+w 1 1 1 aspx
+w 1 1 1 asset
+w 1 1 1 assets
+w 1 1 1 atom
+w 1 1 1 attach
+w 1 1 1 attachment
+w 1 1 1 attachments
+w 1 1 1 attachs
+w 1 1 1 attic
+w 1 1 1 audio
+w 1 1 1 audit
+w 1 1 1 audits
+w 1 1 1 auth
+w 1 1 1 author
+w 1 1 1 authorized_keys
+w 1 1 1 authors
+w 1 1 1 auto
+w 1 1 1 automatic
+w 1 1 1 automation
+w 1 1 1 avatar
+w 1 1 1 avatars
+w 1 1 1 award
+w 1 1 1 awards
+w 1 1 1 awl
+w 1 1 1 awstats
+w 1 1 1 b
+w 1 1 1 b2b
+w 1 1 1 b2c
+w 1 1 1 back
+w 1 1 1 backdoor
+w 1 1 1 backend
+w 1 1 1 backup
+w 1 1 1 backups
+w 1 1 1 bandwidth
+w 1 1 1 bank
+w 1 1 1 banks
+w 1 1 1 banner
+w 1 1 1 banners
+w 1 1 1 bar
+w 1 1 1 base
+w 1 1 1 bash
+w 1 1 1 basic
+w 1 1 1 basket
+w 1 1 1 baskets
+w 1 1 1 bat
+w 1 1 1 batch
+w 1 1 1 baz
+w 1 1 1 bb
+w 1 1 1 bb-hist
+w 1 1 1 bb-histlog
+w 1 1 1 bboard
+w 1 1 1 bbs
+w 1 1 1 beans
+w 1 1 1 beehive
+w 1 1 1 benefits
+w 1 1 1 beta
+w 1 1 1 bfc
+w 1 1 1 big
+w 1 1 1 bigip
+w 1 1 1 bill
+w 1 1 1 billing
+w 1 1 1 bin
+w 1 1 1 binaries
+w 1 1 1 binary
+w 1 1 1 bins
+w 1 1 1 bio
+w 1 1 1 bios
+w 1 1 1 biz
+w 1 1 1 bkup
+w 1 1 1 blah
+w 1 1 1 blank
+w 1 1 1 blog
+w 1 1 1 blogger
+w 1 1 1 bloggers
+w 1 1 1 blogs
+w 1 1 1 board
+w 1 1 1 bofh
+w 1 1 1 book
+w 1 1 1 books
+w 1 1 1 boot
+w 1 1 1 bottom
+w 1 1 1 broken
+w 1 1 1 broker
+w 1 1 1 browse
+w 1 1 1 bs
+w 1 1 1 bsd
+w 1 1 1 bugs
+w 1 1 1 build
+w 1 1 1 buildr
+w 1 1 1 bulk
+w 1 1 1 bullet
+w 1 1 1 business
+w 1 1 1 button
+w 1 1 1 buttons
+w 1 1 1 buy
+w 1 1 1 buynow
+w 1 1 1 bypass
+w 1 1 1 bz2
+w 1 1 1 c
+w 1 1 1 ca
+w 1 1 1 cache
+w 1 1 1 cal
+w 1 1 1 calendar
+w 1 1 1 camel
+w 1 1 1 car
+w 1 1 1 card
+w 1 1 1 cards
+w 1 1 1 career
+w 1 1 1 careers
+w 1 1 1 cars
+w 1 1 1 cart
+w 1 1 1 carts
+w 1 1 1 cat
+w 1 1 1 catalog
+w 1 1 1 catalogs
+w 1 1 1 catalyst
+w 1 1 1 categories
+w 1 1 1 category
+w 1 1 1 catinfo
+w 1 1 1 cats
+w 1 1 1 cc
+w 1 1 1 ccbill
+w 1 1 1 cd
+w 1 1 1 cerificate
+w 1 1 1 cert
+w 1 1 1 certificate
+w 1 1 1 certificates
+w 1 1 1 certs
+w 1 1 1 cf
+w 1 1 1 cfcache
+w 1 1 1 cfdocs
+w 1 1 1 cfide
+w 1 1 1 cfm
+w 1 1 1 cfusion
+w 1 1 1 cgi
+w 1 1 1 cgi-bin
+w 1 1 1 cgi-bin2
+w 1 1 1 cgi-home
+w 1 1 1 cgi-local
+w 1 1 1 cgi-pub
+w 1 1 1 cgi-script
+w 1 1 1 cgi-shl
+w 1 1 1 cgi-sys
+w 1 1 1 cgi-web
+w 1 1 1 cgi-win
+w 1 1 1 cgibin
+w 1 1 1 cgiwrap
+w 1 1 1 cgm-web
+w 1 1 1 change
+w 1 1 1 changed
+w 1 1 1 changes
+w 1 1 1 charge
+w 1 1 1 charges
+w 1 1 1 chat
+w 1 1 1 chats
+w 1 1 1 checkout
+w 1 1 1 child
+w 1 1 1 children
+w 1 1 1 cisco
+w 1 1 1 cisweb
+w 1 1 1 citrix
+w 1 1 1 cl
+w 1 1 1 claim
+w 1 1 1 claims
+w 1 1 1 classes
+w 1 1 1 classified
+w 1 1 1 classifieds
+w 1 1 1 clear
+w 1 1 1 click
+w 1 1 1 clicks
+w 1 1 1 client
+w 1 1 1 clientaccesspolicy
+w 1 1 1 clients
+w 1 1 1 close
+w 1 1 1 closed
+w 1 1 1 closing
+w 1 1 1 club
+w 1 1 1 cluster
+w 1 1 1 clusters
+w 1 1 1 cmd
+w 1 1 1 cms
+w 1 1 1 cnf
+w 1 1 1 cnt
+w 1 1 1 cocoon
+w 1 1 1 code
+w 1 1 1 codec
+w 1 1 1 codecs
+w 1 1 1 codes
+w 1 1 1 cognos
+w 1 1 1 coldfusion
+w 1 1 1 columns
+w 1 1 1 com
+w 1 1 1 comment
+w 1 1 1 comments
+w 1 1 1 commerce
+w 1 1 1 commercial
+w 1 1 1 common
+w 1 1 1 communicator
+w 1 1 1 community
+w 1 1 1 compact
+w 1 1 1 company
+w 1 1 1 complaint
+w 1 1 1 complaints
+w 1 1 1 compliance
+w 1 1 1 component
+w 1 1 1 compressed
+w 1 1 1 computer
+w 1 1 1 computers
+w 1 1 1 computing
+w 1 1 1 conference
+w 1 1 1 conferences
+w 1 1 1 configs
+w 1 1 1 console
+w 1 1 1 consumer
+w 1 1 1 contact
+w 1 1 1 contacts
+w 1 1 1 content
+w 1 1 1 contents
+w 1 1 1 contract
+w 1 1 1 contracts
+w 1 1 1 control
+w 1 1 1 controlpanel
+w 1 1 1 cookie
+w 1 1 1 cookies
+w 1 1 1 copies
+w 1 1 1 copy
+w 1 1 1 copyright
+w 1 1 1 core
+w 1 1 1 corp
+w 1 1 1 corpo
+w 1 1 1 corporate
+w 1 1 1 corrections
+w 1 1 1 count
+w 1 1 1 counter
+w 1 1 1 counters
+w 1 1 1 counts
+w 1 1 1 course
+w 1 1 1 courses
+w 1 1 1 cover
+w 1 1 1 cpanel
+w 1 1 1 cpp
+w 1 1 1 cr
+w 1 1 1 crack
+w 1 1 1 crash
+w 1 1 1 crashes
+w 1 1 1 create
+w 1 1 1 credits
+w 1 1 1 crm
+w 1 1 1 cron
+w 1 1 1 crons
+w 1 1 1 crontab
+w 1 1 1 crontabs
+w 1 1 1 crossdomain
+w 1 1 1 crypt
+w 1 1 1 crypto
+w 1 1 1 cs
+w 1 1 1 css
+w 1 1 1 current
+w 1 1 1 custom
+w 1 1 1 custom-log
+w 1 1 1 custom_log
+w 1 1 1 customer
+w 1 1 1 customers
+w 1 1 1 cv
+w 1 1 1 cxf
+w 1 1 1 czcmdcvt
+w 1 1 1 d
+w 1 1 1 daemon
+w 1 1 1 daily
+w 1 1 1 dana-na
+w 1 1 1 dat
+w 1 1 1 data
+w 1 1 1 database
+w 1 1 1 databases
+w 1 1 1 date
+w 1 1 1 db
+w 1 1 1 dba
+w 1 1 1 dbase
+w 1 1 1 dbman
+w 1 1 1 dc
+w 1 1 1 dcforum
+w 1 1 1 de
+w 1 1 1 dealer
+w 1 1 1 debug
+w 1 1 1 decl
+w 1 1 1 declaration
+w 1 1 1 declarations
+w 1 1 1 decode
+w 1 1 1 decrypt
+w 1 1 1 def
+w 1 1 1 default
+w 1 1 1 defaults
+w 1 1 1 definition
+w 1 1 1 definitions
+w 1 1 1 del
+w 1 1 1 delete
+w 1 1 1 deleted
+w 1 1 1 demo
+w 1 1 1 demos
+w 1 1 1 denied
+w 1 1 1 deny
+w 1 1 1 design
+w 1 1 1 desktop
+w 1 1 1 desktops
+w 1 1 1 detail
+w 1 1 1 details
+w 1 1 1 dev
+w 1 1 1 devel
+w 1 1 1 developer
+w 1 1 1 developers
+w 1 1 1 development
+w 1 1 1 device
+w 1 1 1 devices
+w 1 1 1 devs
+w 1 1 1 df
+w 1 1 1 dialog
+w 1 1 1 dialogs
+w 1 1 1 diff
+w 1 1 1 diffs
+w 1 1 1 digest
+w 1 1 1 digg
+w 1 1 1 dir
+w 1 1 1 directories
+w 1 1 1 directory
+w 1 1 1 dirs
+w 1 1 1 disabled
+w 1 1 1 disclaimer
+w 1 1 1 display
+w 1 1 1 django
+w 1 1 1 dl
+w 1 1 1 dll
+w 1 1 1 dm
+w 1 1 1 dm-config
+w 1 1 1 dms
+w 1 1 1 dms0
+w 1 1 1 dns
+w 1 1 1 do
+w 1 1 1 doc
+w 1 1 1 dock
+w 1 1 1 docroot
+w 1 1 1 docs
+w 1 1 1 document
+w 1 1 1 documentation
+w 1 1 1 documents
+w 1 1 1 domain
+w 1 1 1 domains
+w 1 1 1 down
+w 1 1 1 download
+w 1 1 1 downloads
+w 1 1 1 drop
+w 1 1 1 dropped
+w 1 1 1 drupal
+w 1 1 1 dummy
+w 1 1 1 dump
+w 1 1 1 dumps
+w 1 1 1 dvd
+w 1 1 1 dwr
+w 1 1 1 dynamic
+w 1 1 1 e
+w 1 1 1 e2fs
+w 1 1 1 ear
+w 1 1 1 ecommerce
+w 1 1 1 edge
+w 1 1 1 edit
+w 1 1 1 editor
+w 1 1 1 edits
+w 1 1 1 edu
+w 1 1 1 education
+w 1 1 1 ee
+w 1 1 1 effort
+w 1 1 1 efforts
+w 1 1 1 egress
+w 1 1 1 ejb
+w 1 1 1 element
+w 1 1 1 elements
+w 1 1 1 em
+w 1 1 1 email
+w 1 1 1 emails
+w 1 1 1 emea
+w 1 1 1 employees
+w 1 1 1 employment
+w 1 1 1 empty
+w 1 1 1 emu
+w 1 1 1 emulator
+w 1 1 1 en
+w 1 1 1 en_US
+w 1 1 1 encode
+w 1 1 1 encrypt
+w 1 1 1 eng
+w 1 1 1 engine
+w 1 1 1 english
+w 1 1 1 enterprise
+w 1 1 1 entertainment
+w 1 1 1 entries
+w 1 1 1 entry
+w 1 1 1 env
+w 1 1 1 environ
+w 1 1 1 environment
+w 1 1 1 ep
+w 1 1 1 error
+w 1 1 1 error-log
+w 1 1 1 error_log
+w 1 1 1 errors
+w 1 1 1 es
+w 1 1 1 esale
+w 1 1 1 esales
+w 1 1 1 etc
+w 1 1 1 europe
+w 1 1 1 event
+w 1 1 1 events
+w 1 1 1 evil
+w 1 1 1 evt
+w 1 1 1 ews
+w 1 1 1 ex
+w 1 1 1 example
+w 1 1 1 examples
+w 1 1 1 excalibur
+w 1 1 1 exchange
+w 1 1 1 exe
+w 1 1 1 exec
+w 1 1 1 export
+w 1 1 1 ext
+w 1 1 1 ext2
+w 1 1 1 extern
+w 1 1 1 external
+w 1 1 1 ezshopper
+w 1 1 1 f
+w 1 1 1 face
+w 1 1 1 faces
+w 1 1 1 faculty
+w 1 1 1 fail
+w 1 1 1 failure
+w 1 1 1 family
+w 1 1 1 faq
+w 1 1 1 faqs
+w 1 1 1 fcgi-bin
+w 1 1 1 feature
+w 1 1 1 features
+w 1 1 1 feed
+w 1 1 1 feedback
+w 1 1 1 feeds
+w 1 1 1 felix
+w 1 1 1 field
+w 1 1 1 fields
+w 1 1 1 file
+w 1 1 1 fileadmin
+w 1 1 1 files
+w 1 1 1 filez
+w 1 1 1 finance
+w 1 1 1 financial
+w 1 1 1 find
+w 1 1 1 finger
+w 1 1 1 firewall
+w 1 1 1 fixed
+w 1 1 1 flags
+w 1 1 1 flash
+w 1 1 1 flow
+w 1 1 1 flows
+w 1 1 1 flv
+w 1 1 1 fn
+w 1 1 1 folder
+w 1 1 1 folders
+w 1 1 1 font
+w 1 1 1 fonts
+w 1 1 1 foo
+w 1 1 1 footer
+w 1 1 1 footers
+w 1 1 1 form
+w 1 1 1 formatting
+w 1 1 1 formmail
+w 1 1 1 forms
+w 1 1 1 forrest
+w 1 1 1 fortune
+w 1 1 1 forum
+w 1 1 1 forum1
+w 1 1 1 forum2
+w 1 1 1 forumdisplay
+w 1 1 1 forums
+w 1 1 1 forward
+w 1 1 1 foto
+w 1 1 1 foundation
+w 1 1 1 fr
+w 1 1 1 frame
+w 1 1 1 frames
+w 1 1 1 framework
+w 1 1 1 free
+w 1 1 1 freebsd
+w 1 1 1 friend
+w 1 1 1 friends
+w 1 1 1 frob
+w 1 1 1 frontend
+w 1 1 1 fs
+w 1 1 1 ftp
+w 1 1 1 fuck
+w 1 1 1 fuckoff
+w 1 1 1 fuckyou
+w 1 1 1 full
+w 1 1 1 fun
+w 1 1 1 func
+w 1 1 1 funcs
+w 1 1 1 function
+w 1 1 1 functions
+w 1 1 1 fusion
+w 1 1 1 fw
+w 1 1 1 g
+w 1 1 1 galleries
+w 1 1 1 gallery
+w 1 1 1 game
+w 1 1 1 games
+w 1 1 1 ganglia
+w 1 1 1 garbage
+w 1 1 1 gateway
+w 1 1 1 gb
+w 1 1 1 geeklog
+w 1 1 1 general
+w 1 1 1 geronimo
+w 1 1 1 get
+w 1 1 1 getaccess
+w 1 1 1 getjobid
+w 1 1 1 gfx
+w 1 1 1 gif
+w 1 1 1 gitweb
+w 1 1 1 glimpse
+w 1 1 1 global
+w 1 1 1 globals
+w 1 1 1 glossary
+w 1 1 1 go
+w 1 1 1 goaway
+w 1 1 1 google
+w 1 1 1 government
+w 1 1 1 gprs
+w 1 1 1 grant
+w 1 1 1 grants
+w 1 1 1 graphics
+w 1 1 1 group
+w 1 1 1 groupcp
+w 1 1 1 groups
+w 1 1 1 gsm
+w 1 1 1 guest
+w 1 1 1 guestbook
+w 1 1 1 guests
+w 1 1 1 guide
+w 1 1 1 guides
+w 1 1 1 gump
+w 1 1 1 gwt
+w 1 1 1 gz
+w 1 1 1 h
+w 1 1 1 hack
+w 1 1 1 hacker
+w 1 1 1 hacking
+w 1 1 1 hackme
+w 1 1 1 hadoop
+w 1 1 1 hardcore
+w 1 1 1 hardware
+w 1 1 1 harmony
+w 1 1 1 head
+w 1 1 1 header
+w 1 1 1 headers
+w 1 1 1 health
+w 1 1 1 hello
+w 1 1 1 help
+w 1 1 1 helper
+w 1 1 1 helpers
+w 1 1 1 hi
+w 1 1 1 hidden
+w 1 1 1 hide
+w 1 1 1 high
+w 1 1 1 hipaa
+w 1 1 1 history
+w 1 1 1 hit
+w 1 1 1 hits
+w 1 1 1 hole
+w 1 1 1 home
+w 1 1 1 homepage
+w 1 1 1 hop
+w 1 1 1 horde
+w 1 1 1 hosting
+w 1 1 1 hosts
+w 1 1 1 howto
+w 1 1 1 hp
+w 1 1 1 hr
+w 1 1 1 hta
+w 1 1 1 htbin
+w 1 1 1 htdoc
+w 1 1 1 htdocs
+w 1 1 1 htm
+w 1 1 1 htpasswd
+w 1 1 1 http
+w 1 1 1 httpd
+w 1 1 1 https
+w 1 1 1 httpuser
+w 1 1 1 hu
+w 1 1 1 hyper
+w 1 1 1 i
+w 1 1 1 ia
+w 1 1 1 ibm
+w 1 1 1 icat
+w 1 1 1 icon
+w 1 1 1 icons
+w 1 1 1 id
+w 1 1 1 idea
+w 1 1 1 ideas
+w 1 1 1 ids
+w 1 1 1 ie
+w 1 1 1 iframe
+w 1 1 1 ig
+w 1 1 1 ignore
+w 1 1 1 iisadmin
+w 1 1 1 iisadmpwd
+w 1 1 1 iissamples
+w 1 1 1 image
+w 1 1 1 imagefolio
+w 1 1 1 images
+w 1 1 1 img
+w 1 1 1 imgs
+w 1 1 1 imp
+w 1 1 1 import
+w 1 1 1 important
+w 1 1 1 in
+w 1 1 1 inbound
+w 1 1 1 incl
+w 1 1 1 include
+w 1 1 1 includes
+w 1 1 1 incoming
+w 1 1 1 incubator
+w 1 1 1 index
+w 1 1 1 index1
+w 1 1 1 index2
+w 1 1 1 index_1
+w 1 1 1 index_2
+w 1 1 1 inetpub
+w 1 1 1 inetsrv
+w 1 1 1 inf
+w 1 1 1 info
+w 1 1 1 information
+w 1 1 1 ingress
+w 1 1 1 init
+w 1 1 1 inline
+w 1 1 1 input
+w 1 1 1 inquire
+w 1 1 1 inquiries
+w 1 1 1 inquiry
+w 1 1 1 insert
+w 1 1 1 install
+w 1 1 1 int
+w 1 1 1 interim
+w 1 1 1 intermediate
+w 1 1 1 internal
+w 1 1 1 international
+w 1 1 1 internet
+w 1 1 1 intl
+w 1 1 1 intranet
+w 1 1 1 intro
+w 1 1 1 ip
+w 1 1 1 ipc
+w 1 1 1 ips
+w 1 1 1 irc
+w 1 1 1 is
+w 1 1 1 isapi
+w 1 1 1 iso
+w 1 1 1 issues
+w 1 1 1 it
+w 1 1 1 item
+w 1 1 1 j
+w 1 1 1 j2ee
+w 1 1 1 j2me
+w 1 1 1 jakarta
+w 1 1 1 java-plugin
+w 1 1 1 javadoc
+w 1 1 1 javascript
+w 1 1 1 javax
+w 1 1 1 jboss
+w 1 1 1 jdbc
+w 1 1 1 jhtml
+w 1 1 1 jigsaw
+w 1 1 1 jj
+w 1 1 1 jmx-console
+w 1 1 1 job
+w 1 1 1 jobs
+w 1 1 1 joe
+w 1 1 1 john
+w 1 1 1 join
+w 1 1 1 joomla
+w 1 1 1 journal
+w 1 1 1 jp
+w 1 1 1 jpa
+w 1 1 1 jpg
+w 1 1 1 jre
+w 1 1 1 jrun
+w 1 1 1 js
+w 1 1 1 jsf
+w 1 1 1 json
+w 1 1 1 jsp
+w 1 1 1 jsso
+w 1 1 1 jsx
+w 1 1 1 juniper
+w 1 1 1 junk
+w 1 1 1 jvm
+w 1 1 1 k
+w 1 1 1 kboard
+w 1 1 1 keep
+w 1 1 1 kernel
+w 1 1 1 keygen
+w 1 1 1 keys
+w 1 1 1 kids
+w 1 1 1 kill
+w 1 1 1 known_hosts
+w 1 1 1 l
+w 1 1 1 labs
+w 1 1 1 lang
+w 1 1 1 large
+w 1 1 1 law
+w 1 1 1 layout
+w 1 1 1 layouts
+w 1 1 1 ldap
+w 1 1 1 leader
+w 1 1 1 leaders
+w 1 1 1 left
+w 1 1 1 legacy
+w 1 1 1 legal
+w 1 1 1 lenya
+w 1 1 1 letters
+w 1 1 1 level
+w 1 1 1 lg
+w 1 1 1 lib
+w 1 1 1 library
+w 1 1 1 libs
+w 1 1 1 license
+w 1 1 1 licenses
+w 1 1 1 line
+w 1 1 1 link
+w 1 1 1 links
+w 1 1 1 linux
+w 1 1 1 list
+w 1 1 1 listinfo
+w 1 1 1 lists
+w 1 1 1 live
+w 1 1 1 lo
+w 1 1 1 loader
+w 1 1 1 loading
+w 1 1 1 loc
+w 1 1 1 local
+w 1 1 1 location
+w 1 1 1 lock
+w 1 1 1 locked
+w 1 1 1 log4j
+w 1 1 1 logfile
+w 1 1 1 logging
+w 1 1 1 login
+w 1 1 1 logins
+w 1 1 1 logo
+w 1 1 1 logoff
+w 1 1 1 logon
+w 1 1 1 logos
+w 1 1 1 logout
+w 1 1 1 logs
+w 1 1 1 lost+found
+w 1 1 1 low
+w 1 1 1 ls
+w 1 1 1 lst
+w 1 1 1 lucene
+w 1 1 1 m
+w 1 1 1 mac
+w 1 1 1 mail
+w 1 1 1 mailer
+w 1 1 1 mailing
+w 1 1 1 mailman
+w 1 1 1 mails
+w 1 1 1 main
+w 1 1 1 manage
+w 1 1 1 management
+w 1 1 1 manager
+w 1 1 1 manifest
+w 1 1 1 manual
+w 1 1 1 manuals
+w 1 1 1 map
+w 1 1 1 maps
+w 1 1 1 mark
+w 1 1 1 marketing
+w 1 1 1 master
+w 1 1 1 master.passwd
+w 1 1 1 match
+w 1 1 1 matrix
+w 1 1 1 maven
+w 1 1 1 mbox
+w 1 1 1 mdb
+w 1 1 1 me
+w 1 1 1 media
+w 1 1 1 medium
+w 1 1 1 mem
+w 1 1 1 member
+w 1 1 1 members
+w 1 1 1 membership
+w 1 1 1 memory
+w 1 1 1 menu
+w 1 1 1 messaging
+w 1 1 1 meta
+w 1 1 1 microsoft
+w 1 1 1 migrate
+w 1 1 1 migration
+w 1 1 1 mina
+w 1 1 1 mirror
+w 1 1 1 mirrors
+w 1 1 1 misc
+w 1 1 1 mission
+w 1 1 1 mix
+w 1 1 1 mms
+w 1 1 1 mobi
+w 1 1 1 mobile
+w 1 1 1 mock
+w 1 1 1 mod
+w 1 1 1 modify
+w 1 1 1 mods
+w 1 1 1 module
+w 1 1 1 modules
+w 1 1 1 mojo
+w 1 1 1 money
+w 1 1 1 monitoring
+w 1 1 1 more
+w 1 1 1 move
+w 1 1 1 movie
+w 1 1 1 movies
+w 1 1 1 mp
+w 1 1 1 mp3
+w 1 1 1 mp3s
+w 1 1 1 ms
+w 1 1 1 ms-sql
+w 1 1 1 msadc
+w 1 1 1 msadm
+w 1 1 1 msg
+w 1 1 1 msie
+w 1 1 1 msql
+w 1 1 1 mssql
+w 1 1 1 mta
+w 1 1 1 multimedia
+w 1 1 1 music
+w 1 1 1 mx
+w 1 1 1 my
+w 1 1 1 myfaces
+w 1 1 1 myphpnuke
+w 1 1 1 mysql
+w 1 1 1 mysqld
+w 1 1 1 n
+w 1 1 1 nav
+w 1 1 1 navigation
+w 1 1 1 net
+w 1 1 1 netbsd
+w 1 1 1 nethome
+w 1 1 1 nets
+w 1 1 1 network
+w 1 1 1 networking
+w 1 1 1 new
+w 1 1 1 news
+w 1 1 1 newsletter
+w 1 1 1 newsletters
+w 1 1 1 next
+w 1 1 1 nfs
+w 1 1 1 nice
+w 1 1 1 nl
+w 1 1 1 nobody
+w 1 1 1 node
+w 1 1 1 none
+w 1 1 1 note
+w 1 1 1 notes
+w 1 1 1 notification
+w 1 1 1 notifications
+w 1 1 1 notified
+w 1 1 1 notify
+w 1 1 1 ns
+w 1 1 1 nsf
+w 1 1 1 nuke
+w 1 1 1 nul
+w 1 1 1 null
+w 1 1 1 o
+w 1 1 1 oa_servlets
+w 1 1 1 oauth
+w 1 1 1 obdc
+w 1 1 1 obsolete
+w 1 1 1 obsoleted
+w 1 1 1 odbc
+w 1 1 1 ode
+w 1 1 1 oem
+w 1 1 1 ofbiz
+w 1 1 1 office
+w 1 1 1 onbound
+w 1 1 1 online
+w 1 1 1 op
+w 1 1 1 open
+w 1 1 1 openbsd
+w 1 1 1 opendir
+w 1 1 1 openejb
+w 1 1 1 openjpa
+w 1 1 1 operations
+w 1 1 1 opinion
+w 1 1 1 oprocmgr-status
+w 1 1 1 opt
+w 1 1 1 option
+w 1 1 1 options
+w 1 1 1 ora
+w 1 1 1 oracle
+w 1 1 1 oracle.xml.xsql.XSQLServlet
+w 1 1 1 order
+w 1 1 1 ordered
+w 1 1 1 orders
+w 1 1 1 org
+w 1 1 1 osc
+w 1 1 1 oscommerce
+w 1 1 1 other
+w 1 1 1 outgoing
+w 1 1 1 outline
+w 1 1 1 output
+w 1 1 1 outreach
+w 1 1 1 overview
+w 1 1 1 owa
+w 1 1 1 ows
+w 1 1 1 ows-bin
+w 1 1 1 p
+w 1 1 1 p2p
+w 1 1 1 pack
+w 1 1 1 packages
+w 1 1 1 page
+w 1 1 1 page1
+w 1 1 1 page2
+w 1 1 1 page_1
+w 1 1 1 page_2
+w 1 1 1 pages
+w 1 1 1 paid
+w 1 1 1 panel
+w 1 1 1 paper
+w 1 1 1 papers
+w 1 1 1 parse
+w 1 1 1 partner
+w 1 1 1 partners
+w 1 1 1 party
+w 1 1 1 pass
+w 1 1 1 passwd
+w 1 1 1 password
+w 1 1 1 passwords
+w 1 1 1 past
+w 1 1 1 patch
+w 1 1 1 patches
+w 1 1 1 paypal
+w 1 1 1 pc
+w 1 1 1 pci
+w 1 1 1 pda
+w 1 1 1 pdf
+w 1 1 1 pdfs
+w 1 1 1 peek
+w 1 1 1 pending
+w 1 1 1 people
+w 1 1 1 perf
+w 1 1 1 performance
+w 1 1 1 perl
+w 1 1 1 personal
+w 1 1 1 pg
+w 1 1 1 phf
+w 1 1 1 phone
+w 1 1 1 phones
+w 1 1 1 phorum
+w 1 1 1 photo
+w 1 1 1 photos
+w 1 1 1 php
+w 1 1 1 php3
+w 1 1 1 phpBB
+w 1 1 1 phpBB2
+w 1 1 1 phpEventCalendar
+w 1 1 1 phpMyAdmin
+w 1 1 1 phpbb
+w 1 1 1 phpmyadmin
+w 1 1 1 phpnuke
+w 1 1 1 phps
+w 1 1 1 pic
+w 1 1 1 pics
+w 1 1 1 pictures
+w 1 1 1 pii
+w 1 1 1 ping
+w 1 1 1 pipermail
+w 1 1 1 piranha
+w 1 1 1 pix
+w 1 1 1 pixel
+w 1 1 1 pkg
+w 1 1 1 pkgs
+w 1 1 1 plain
+w 1 1 1 play
+w 1 1 1 pls
+w 1 1 1 plugin
+w 1 1 1 plugins
+w 1 1 1 pm
+w 1 1 1 png
+w 1 1 1 poi
+w 1 1 1 policies
+w 1 1 1 policy
+w 1 1 1 politics
+w 1 1 1 poll
+w 1 1 1 polls
+w 1 1 1 pop
+w 1 1 1 pop3
+w 1 1 1 porn
+w 1 1 1 port
+w 1 1 1 portal
+w 1 1 1 portals
+w 1 1 1 portfolio
+w 1 1 1 pos
+w 1 1 1 post
+w 1 1 1 posted
+w 1 1 1 postgres
+w 1 1 1 postgresql
+w 1 1 1 postnuke
+w 1 1 1 postpaid
+w 1 1 1 posts
+w 1 1 1 ppt
+w 1 1 1 pr
+w 1 1 1 pr0n
+w 1 1 1 premium
+w 1 1 1 prepaid
+w 1 1 1 presentation
+w 1 1 1 presentations
+w 1 1 1 preserve
+w 1 1 1 press
+w 1 1 1 preview
+w 1 1 1 previews
+w 1 1 1 previous
+w 1 1 1 pricing
+w 1 1 1 print
+w 1 1 1 printenv
+w 1 1 1 printer
+w 1 1 1 printers
+w 1 1 1 priv
+w 1 1 1 privacy
+w 1 1 1 private
+w 1 1 1 problems
+w 1 1 1 proc
+w 1 1 1 procedures
+w 1 1 1 prod
+w 1 1 1 product
+w 1 1 1 product_info
+w 1 1 1 production
+w 1 1 1 products
+w 1 1 1 profile
+w 1 1 1 profiles
+w 1 1 1 profiling
+w 1 1 1 program
+w 1 1 1 programming
+w 1 1 1 programs
+w 1 1 1 project
+w 1 1 1 projects
+w 1 1 1 promo
+w 1 1 1 prop
+w 1 1 1 properties
+w 1 1 1 property
+w 1 1 1 props
+w 1 1 1 protect
+w 1 1 1 proto
+w 1 1 1 proxies
+w 1 1 1 proxy
+w 1 1 1 prv
+w 1 1 1 ps
+w 1 1 1 psql
+w 1 1 1 pt
+w 1 1 1 pub
+w 1 1 1 public
+w 1 1 1 publication
+w 1 1 1 publications
+w 1 1 1 pubs
+w 1 1 1 pull
+w 1 1 1 purchase
+w 1 1 1 purchases
+w 1 1 1 purchasing
+w 1 1 1 push
+w 1 1 1 pw
+w 1 1 1 pwd
+w 1 1 1 py
+w 1 1 1 python
+w 1 1 1 q
+w 1 1 1 qpid
+w 1 1 1 queries
+w 1 1 1 query
+w 1 1 1 queue
+w 1 1 1 queues
+w 1 1 1 quote
+w 1 1 1 quotes
+w 1 1 1 r
+w 1 1 1 radio
+w 1 1 1 random
+w 1 1 1 rar
+w 1 1 1 rdf
+w 1 1 1 read
+w 1 1 1 readme
+w 1 1 1 realestate
+w 1 1 1 receive
+w 1 1 1 received
+w 1 1 1 recharge
+w 1 1 1 record
+w 1 1 1 records
+w 1 1 1 recovery
+w 1 1 1 recycle
+w 1 1 1 recycled
+w 1 1 1 redir
+w 1 1 1 redirect
+w 1 1 1 reference
+w 1 1 1 reg
+w 1 1 1 register
+w 1 1 1 registered
+w 1 1 1 registration
+w 1 1 1 registrations
+w 1 1 1 release
+w 1 1 1 releases
+w 1 1 1 remote
+w 1 1 1 remove
+w 1 1 1 removed
+w 1 1 1 render
+w 1 1 1 rendered
+w 1 1 1 rep
+w 1 1 1 repl
+w 1 1 1 replica
+w 1 1 1 replicas
+w 1 1 1 replicate
+w 1 1 1 replicated
+w 1 1 1 replication
+w 1 1 1 replicator
+w 1 1 1 reply
+w 1 1 1 report
+w 1 1 1 reporting
+w 1 1 1 reports
+w 1 1 1 reprints
+w 1 1 1 req
+w 1 1 1 reqs
+w 1 1 1 request
+w 1 1 1 requests
+w 1 1 1 requisition
+w 1 1 1 requisitions
+w 1 1 1 res
+w 1 1 1 research
+w 1 1 1 resin
+w 1 1 1 resource
+w 1 1 1 resources
+w 1 1 1 rest
+w 1 1 1 restore
+w 1 1 1 restored
+w 1 1 1 restricted
+w 1 1 1 results
+w 1 1 1 retail
+w 1 1 1 reverse
+w 1 1 1 reversed
+w 1 1 1 revert
+w 1 1 1 reverted
+w 1 1 1 review
+w 1 1 1 reviews
+w 1 1 1 right
+w 1 1 1 roam
+w 1 1 1 roaming
+w 1 1 1 robot
+w 1 1 1 robots
+w 1 1 1 roller
+w 1 1 1 room
+w 1 1 1 root
+w 1 1 1 rpc
+w 1 1 1 rss
+w 1 1 1 rtf
+w 1 1 1 ru
+w 1 1 1 rule
+w 1 1 1 rules
+w 1 1 1 run
+w 1 1 1 rwservlet
+w 1 1 1 s
+w 1 1 1 sale
+w 1 1 1 sales
+w 1 1 1 sam
+w 1 1 1 samba
+w 1 1 1 sample
+w 1 1 1 samples
+w 1 1 1 sav
+w 1 1 1 save
+w 1 1 1 saved
+w 1 1 1 saves
+w 1 1 1 sbin
+w 1 1 1 scan
+w 1 1 1 scanned
+w 1 1 1 scans
+w 1 1 1 sched
+w 1 1 1 schedule
+w 1 1 1 scheduled
+w 1 1 1 scheduling
+w 1 1 1 schema
+w 1 1 1 science
+w 1 1 1 screen
+w 1 1 1 screens
+w 1 1 1 screenshot
+w 1 1 1 screenshots
+w 1 1 1 script
+w 1 1 1 scriptlet
+w 1 1 1 scriptlets
+w 1 1 1 scripts
+w 1 1 1 sdk
+w 1 1 1 se
+w 1 1 1 search
+w 1 1 1 sec
+w 1 1 1 secret
+w 1 1 1 section
+w 1 1 1 sections
+w 1 1 1 secure
+w 1 1 1 secured
+w 1 1 1 security
+w 1 1 1 seed
+w 1 1 1 sell
+w 1 1 1 send
+w 1 1 1 sendmail
+w 1 1 1 sendto
+w 1 1 1 sent
+w 1 1 1 serial
+w 1 1 1 serv
+w 1 1 1 serve
+w 1 1 1 server
+w 1 1 1 server-info
+w 1 1 1 server-status
+w 1 1 1 servers
+w 1 1 1 service
+w 1 1 1 services
+w 1 1 1 servlet
+w 1 1 1 servlets
+w 1 1 1 session
+w 1 1 1 sessions
+w 1 1 1 setting
+w 1 1 1 settings
+w 1 1 1 setup
+w 1 1 1 sh
+w 1 1 1 share
+w 1 1 1 shared
+w 1 1 1 shares
+w 1 1 1 shell
+w 1 1 1 ship
+w 1 1 1 shipped
+w 1 1 1 shipping
+w 1 1 1 shop
+w 1 1 1 shopper
+w 1 1 1 shopping
+w 1 1 1 shops
+w 1 1 1 shoutbox
+w 1 1 1 show
+w 1 1 1 show_post
+w 1 1 1 show_thread
+w 1 1 1 showcat
+w 1 1 1 showenv
+w 1 1 1 showjobs
+w 1 1 1 showmap
+w 1 1 1 showmsg
+w 1 1 1 showpost
+w 1 1 1 showthread
+w 1 1 1 shtml
+w 1 1 1 sign
+w 1 1 1 signoff
+w 1 1 1 signon
+w 1 1 1 signup
+w 1 1 1 simple
+w 1 1 1 sink
+w 1 1 1 site
+w 1 1 1 site-map
+w 1 1 1 site_map
+w 1 1 1 sitemap
+w 1 1 1 sites
+w 1 1 1 skel
+w 1 1 1 skin
+w 1 1 1 skins
+w 1 1 1 skip
+w 1 1 1 sl
+w 1 1 1 sling
+w 1 1 1 sm
+w 1 1 1 small
+w 1 1 1 sms
+w 1 1 1 smtp
+w 1 1 1 snoop
+w 1 1 1 so
+w 1 1 1 soap
+w 1 1 1 soaprouter
+w 1 1 1 soft
+w 1 1 1 software
+w 1 1 1 solaris
+w 1 1 1 sold
+w 1 1 1 solution
+w 1 1 1 solutions
+w 1 1 1 source
+w 1 1 1 sources
+w 1 1 1 soutbox
+w 1 1 1 sox
+w 1 1 1 sp
+w 1 1 1 space
+w 1 1 1 spacer
+w 1 1 1 spam
+w 1 1 1 special
+w 1 1 1 specials
+w 1 1 1 sponsor
+w 1 1 1 sponsors
+w 1 1 1 spool
+w 1 1 1 sport
+w 1 1 1 sports
+w 1 1 1 sqlnet
+w 1 1 1 squirrel
+w 1 1 1 squirrelmail
+w 1 1 1 src
+w 1 1 1 srv
+w 1 1 1 ss
+w 1 1 1 ssh
+w 1 1 1 ssi
+w 1 1 1 ssl
+w 1 1 1 sslvpn
+w 1 1 1 ssn
+w 1 1 1 sso
+w 1 1 1 stackdump
+w 1 1 1 staff
+w 1 1 1 staging
+w 1 1 1 standard
+w 1 1 1 standards
+w 1 1 1 star
+w 1 1 1 start
+w 1 1 1 stat
+w 1 1 1 statement
+w 1 1 1 statements
+w 1 1 1 static
+w 1 1 1 staticpages
+w 1 1 1 statistic
+w 1 1 1 statistics
+w 1 1 1 stats
+w 1 1 1 status
+w 1 1 1 stock
+w 1 1 1 storage
+w 1 1 1 store
+w 1 1 1 stored
+w 1 1 1 stories
+w 1 1 1 story
+w 1 1 1 strut
+w 1 1 1 struts
+w 1 1 1 student
+w 1 1 1 students
+w 1 1 1 stuff
+w 1 1 1 style
+w 1 1 1 styles
+w 1 1 1 submissions
+w 1 1 1 submit
+w 1 1 1 subscribe
+w 1 1 1 subscriber
+w 1 1 1 subscribers
+w 1 1 1 subscription
+w 1 1 1 subscriptions
+w 1 1 1 success
+w 1 1 1 suite
+w 1 1 1 suites
+w 1 1 1 sun
+w 1 1 1 sunos
+w 1 1 1 super
+w 1 1 1 support
+w 1 1 1 surf
+w 1 1 1 survey
+w 1 1 1 surveys
+w 1 1 1 swf
+w 1 1 1 sws
+w 1 1 1 synapse
+w 1 1 1 sync
+w 1 1 1 synced
+w 1 1 1 sys
+w 1 1 1 system
+w 1 1 1 systems
+w 1 1 1 sysuser
+w 1 1 1 t
+w 1 1 1 tag
+w 1 1 1 tags
+w 1 1 1 tape
+w 1 1 1 tapes
+w 1 1 1 tapestry
+w 1 1 1 tar
+w 1 1 1 tar.bz2
+w 1 1 1 tar.gz
+w 1 1 1 tb
+w 1 1 1 tcl
+w 1 1 1 team
+w 1 1 1 tech
+w 1 1 1 technical
+w 1 1 1 technology
+w 1 1 1 tel
+w 1 1 1 tele
+w 1 1 1 templ
+w 1 1 1 template
+w 1 1 1 templates
+w 1 1 1 terms
+w 1 1 1 test-cgi
+w 1 1 1 test-env
+w 1 1 1 test1
+w 1 1 1 test123
+w 1 1 1 test1234
+w 1 1 1 test2
+w 1 1 1 test3
+w 1 1 1 testimonial
+w 1 1 1 testimonials
+w 1 1 1 testing
+w 1 1 1 tests
+w 1 1 1 texis
+w 1 1 1 text
+w 1 1 1 texts
+w 1 1 1 tgz
+w 1 1 1 theme
+w 1 1 1 themes
+w 1 1 1 thread
+w 1 1 1 threads
+w 1 1 1 thumb
+w 1 1 1 thumbnail
+w 1 1 1 thumbnails
+w 1 1 1 thumbs
+w 1 1 1 tickets
+w 1 1 1 tiki
+w 1 1 1 tiles
+w 1 1 1 tip
+w 1 1 1 tips
+w 1 1 1 title
+w 1 1 1 tls
+w 1 1 1 tmpl
+w 1 1 1 tmps
+w 1 1 1 tn
+w 1 1 1 toc
+w 1 1 1 todo
+w 1 1 1 toggle
+w 1 1 1 tomcat
+w 1 1 1 tool
+w 1 1 1 toolbar
+w 1 1 1 toolkit
+w 1 1 1 tools
+w 1 1 1 top
+w 1 1 1 topic
+w 1 1 1 topics
+w 1 1 1 torrent
+w 1 1 1 torrents
+w 1 1 1 tos
+w 1 1 1 tour
+w 1 1 1 tpl
+w 1 1 1 tpv
+w 1 1 1 tr
+w 1 1 1 trace
+w 1 1 1 traceroute
+w 1 1 1 traces
+w 1 1 1 track
+w 1 1 1 trackback
+w 1 1 1 tracker
+w 1 1 1 trackers
+w 1 1 1 tracking
+w 1 1 1 tracks
+w 1 1 1 traffic
+w 1 1 1 trailer
+w 1 1 1 trailers
+w 1 1 1 training
+w 1 1 1 trans
+w 1 1 1 transparent
+w 1 1 1 transport
+w 1 1 1 trash
+w 1 1 1 travel
+w 1 1 1 treasury
+w 1 1 1 tree
+w 1 1 1 trees
+w 1 1 1 trial
+w 1 1 1 trunk
+w 1 1 1 tsweb
+w 1 1 1 tt
+w 1 1 1 turbine
+w 1 1 1 tuscany
+w 1 1 1 tutorial
+w 1 1 1 tutorials
+w 1 1 1 tv
+w 1 1 1 tweak
+w 1 1 1 type
+w 1 1 1 typo3
+w 1 1 1 typo3conf
+w 1 1 1 u
+w 1 1 1 ubb
+w 1 1 1 uds
+w 1 1 1 uk
+w 1 1 1 umts
+w 1 1 1 union
+w 1 1 1 unix
+w 1 1 1 unlock
+w 1 1 1 unreg
+w 1 1 1 unregister
+w 1 1 1 up
+w 1 1 1 upd
+w 1 1 1 update
+w 1 1 1 updated
+w 1 1 1 updater
+w 1 1 1 updates
+w 1 1 1 upload
+w 1 1 1 uploads
+w 1 1 1 url
+w 1 1 1 us
+w 1 1 1 usa
+w 1 1 1 usage
+w 1 1 1 user
+w 1 1 1 userlog
+w 1 1 1 users
+w 1 1 1 usr
+w 1 1 1 util
+w 1 1 1 utilities
+w 1 1 1 utility
+w 1 1 1 utils
+w 1 1 1 v
+w 1 1 1 v1
+w 1 1 1 v2
+w 1 1 1 var
+w 1 1 1 vault
+w 1 1 1 vb
+w 1 1 1 vbs
+w 1 1 1 vector
+w 1 1 1 velocity
+w 1 1 1 vendor
+w 1 1 1 ver
+w 1 1 1 ver1
+w 1 1 1 ver2
+w 1 1 1 version
+w 1 1 1 vfs
+w 1 1 1 video
+w 1 1 1 videos
+w 1 1 1 view
+w 1 1 1 view-source
+w 1 1 1 viewcvs
+w 1 1 1 viewforum
+w 1 1 1 viewonline
+w 1 1 1 views
+w 1 1 1 viewsource
+w 1 1 1 viewsvn
+w 1 1 1 viewtopic
+w 1 1 1 viewvc
+w 1 1 1 virtual
+w 1 1 1 vm
+w 1 1 1 voip
+w 1 1 1 vol
+w 1 1 1 vpn
+w 1 1 1 w
+w 1 1 1 w3
+w 1 1 1 w3c
+w 1 1 1 wa
+w 1 1 1 wap
+w 1 1 1 war
+w 1 1 1 warez
+w 1 1 1 way-board
+w 1 1 1 wbboard
+w 1 1 1 wc
+w 1 1 1 weather
+w 1 1 1 web
+w 1 1 1 web-beans
+w 1 1 1 web-console
+w 1 1 1 webaccess
+w 1 1 1 webadmin
+w 1 1 1 webagent
+w 1 1 1 webalizer
+w 1 1 1 webapp
+w 1 1 1 webb
+w 1 1 1 webbbs
+w 1 1 1 webboard
+w 1 1 1 webcalendar
+w 1 1 1 webcart
+w 1 1 1 webcasts
+w 1 1 1 webcgi
+w 1 1 1 webchat
+w 1 1 1 webdata
+w 1 1 1 webdav
+w 1 1 1 weblog
+w 1 1 1 weblogic
+w 1 1 1 weblogs
+w 1 1 1 webmail
+w 1 1 1 webplus
+w 1 1 1 webshop
+w 1 1 1 website
+w 1 1 1 websphere
+w 1 1 1 webstats
+w 1 1 1 websvn
+w 1 1 1 webwork
+w 1 1 1 welcome
+w 1 1 1 whitepapers
+w 1 1 1 whois
+w 1 1 1 whosonline
+w 1 1 1 wicket
+w 1 1 1 wiki
+w 1 1 1 win
+w 1 1 1 win32
+w 1 1 1 windows
+w 1 1 1 winnt
+w 1 1 1 wireless
+w 1 1 1 wml
+w 1 1 1 word
+w 1 1 1 wordpress
+w 1 1 1 work
+w 1 1 1 working
+w 1 1 1 world
+w 1 1 1 wp
+w 1 1 1 wp-content
+w 1 1 1 wp-includes
+w 1 1 1 wp-login
+w 1 1 1 wrap
+w 1 1 1 ws
+w 1 1 1 ws-client
+w 1 1 1 ws_ftp
+w 1 1 1 wtai
+w 1 1 1 www
+w 1 1 1 www-sql
+w 1 1 1 www1
+w 1 1 1 www2
+w 1 1 1 www3
+w 1 1 1 wwwboard
+w 1 1 1 wwwroot
+w 1 1 1 wwwstats
+w 1 1 1 wwwthreads
+w 1 1 1 wwwuser
+w 1 1 1 x
+w 1 1 1 xalan
+w 1 1 1 xerces
+w 1 1 1 xhtml
+w 1 1 1 xls
+w 1 1 1 xmlrpc
+w 1 1 1 xsl
+w 1 1 1 xslt
+w 1 1 1 xsql
+w 1 1 1 xxx
+w 1 1 1 xyzzy
+w 1 1 1 y
+w 1 1 1 yahoo
+w 1 1 1 youtube
+w 1 1 1 yt
+w 1 1 1 z
+w 1 1 1 zboard
+w 1 1 1 zend
+w 1 1 1 zero
+w 1 1 1 zipfiles
+w 1 1 1 zips
+w 1 1 1 zope
+w 1 1 1 zorum
+w 1 1 1 ~admin
+w 1 1 1 ~apache
+w 1 1 1 ~bin
+w 1 1 1 ~bob
+w 1 1 1 ~ftp
+w 1 1 1 ~guest
+w 1 1 1 ~http
+w 1 1 1 ~httpd
+w 1 1 1 ~john
+w 1 1 1 ~log
+w 1 1 1 ~logs
+w 1 1 1 ~lp
+w 1 1 1 ~mark
+w 1 1 1 ~matt
+w 1 1 1 ~nobody
+w 1 1 1 ~root
+w 1 1 1 ~test
+w 1 1 1 ~tmp
+w 1 1 1 ~www
diff --git a/http_client.c b/http_client.c
new file mode 100644
index 0000000..155d00f
--- /dev/null
+++ b/http_client.c
@@ -0,0 +1,2455 @@
+/*
+   skipfish - high-performance, single-process asynchronous HTTP client
+   --------------------------------------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/poll.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <idna.h>
+#include <zlib.h>
+
+#include "types.h"
+#include "alloc-inl.h"
+#include "string-inl.h"
+#include "database.h"
+
+#include "http_client.h"
+
+/* Assorted exported settings: */
+
+u32 max_connections = MAX_CONNECTIONS,
+    max_conn_host   = MAX_CONN_HOST,
+    max_requests    = MAX_REQUESTS,
+    max_fail        = MAX_FAIL,
+    idle_tmout      = IDLE_TMOUT,
+    resp_tmout      = RESP_TMOUT,
+    rw_tmout        = RW_TMOUT,
+    size_limit      = SIZE_LIMIT;
+
+u8 browser_type     = BROWSER_FAST;
+u8 auth_type        = AUTH_NONE;
+
+struct param_array global_http_par;
+
+/* Counters: */
+
+u32 req_errors_net,
+    req_errors_http,
+    req_errors_cur,
+    req_count,
+    req_dropped,
+    queue_cur,
+    conn_cur,
+    conn_count,
+    conn_idle_tmout,
+    conn_busy_tmout,
+    conn_failed,
+    req_retried,
+    url_scope;
+
+u64 bytes_sent,
+    bytes_recv,
+    bytes_deflated,
+    bytes_inflated;
+
+u8 *auth_user,
+   *auth_pass;
+
+u8  ignore_cookies;
+
+/* Internal globals for queue management: */
+
+static struct queue_entry* queue;
+static struct conn_entry*  conn;
+static struct dns_entry*   dns;
+
+#ifdef QUEUE_FILO
+static struct queue_entry* q_tail;
+#endif /* QUEUE_FILO */
+
+static u8 tear_down_idle;
+
+
+/* Extracts parameter value from param_array. Name is matched if
+   non-NULL. Returns pointer to value data, not a duplicate string;
+   NULL if no match found. */
+
+u8* get_value(u8 type, u8* name, u32 offset,
+              struct param_array* par) {
+
+  u32 i, coff = 0;
+
+  for (i=0;i<par->c;i++) {
+    if (type != par->t[i]) continue;
+    if (name && strcasecmp((char*)par->n[i], (char*)name)) continue;
+    if (offset != coff) { coff++; continue; }
+    return par->v[i];
+  }
+
+  return NULL;
+
+}
+
+
+/* Inserts or overwrites parameter value in param_array. If offset
+   == -1, will append parameter to list. Duplicates strings,
+   name and val can be NULL. */
+
+void set_value(u8 type, u8* name, u8* val,
+               s32 offset, struct param_array* par) {
+
+  u32 i, coff = 0, matched = -1;
+
+  /* If offset specified, try to find an entry to replace. */
+
+  if (offset >= 0)
+    for (i=0;i<par->c;i++) {
+      if (type != par->t[i]) continue;
+      if (name && strcasecmp((char*)par->n[i], (char*)name)) continue;
+      if (offset != coff) { coff++; continue; }
+      matched = i;
+      break;
+    }
+
+  if (matched == -1) {
+
+    /* No offset or no match - append to the end of list. */
+
+    par->t = ck_realloc(par->t, (par->c + 1) * sizeof(u8));
+    par->n = ck_realloc(par->n, (par->c + 1) * sizeof(u8*));
+    par->v = ck_realloc(par->v, (par->c + 1) * sizeof(u8*));
+    par->t[par->c] = type;
+    par->n[par->c] = ck_strdup(name);
+    par->v[par->c] = ck_strdup(val);
+    par->c++;
+
+  } else {
+
+    /* Matched - replace name & value. */
+
+    ck_free(par->n[matched]);
+    ck_free(par->v[matched]);
+    par->n[matched] = ck_strdup(name);
+    par->v[matched] = ck_strdup(val);
+
+  }
+
+}
+
+
+/* Convert a fully-qualified or relative URL string to a proper http_request
+   representation. Returns 0 on success, 1 on format error. */
+
+u8 parse_url(u8* url, struct http_request* req, struct http_request* ref) {
+
+  u8* cur = url;
+  u32 maybe_proto = strcspn((char*)url, ":/?#@");
+  u8 has_host = 0, add_slash = 1;
+
+  if (strlen((char*)url) > MAX_URL_LEN) return 1;
+  req->orig_url = ck_strdup(url);
+
+  /* Interpret, skip protocol string if the URL seems to be fully-qualified;
+     otherwise, copy from referring URL. We could be stricter here, as
+     browsers bail out on seemingly invalid chars in proto names, but... */
+
+  if (maybe_proto && url[maybe_proto] == ':') {
+
+    if (!strncasecmp((char*)url, "http:", 5)) {
+      req->proto = PROTO_HTTP;
+      cur += 5;
+    } else if (!strncasecmp((char*)url, "https:", 6)) {
+      req->proto = PROTO_HTTPS;
+      cur += 6;
+    } else return 1;
+
+  } else {
+
+    if (!ref || !ref->proto) return 1;
+    req->proto = ref->proto;
+
+  }
+
+  /* Interpret, skip //[login[:pass@](\[ipv4\]|\[ipv6\]|host)[:port] part of the
+     URL, if present. Note that "http:blarg" is a valid relative URL to most
+     browsers, and "//example.com/blarg" is a valid non-FQDN absolute one.
+     We need to mimick this, which complicates the code a bit. */
+
+  if (cur[0] == '/' && cur[1] == '/') {
+
+    u32 path_st;
+    u8  *at_sign, *host, *x;
+    u8  has_utf = 0;
+
+    cur += 2;
+
+    /* Detect, skip login[:pass]@; we only use cmdline-supplied credentials or
+       wordlists into account. Be sure to report any embedded auth, though. */
+
+    at_sign = (u8*)strchr((char*)cur, '@');
+    path_st = strcspn((char*)cur, ":/?#");
+
+    if (at_sign && path_st > (at_sign - cur)) {
+      cur = at_sign + 1;
+      if (!req->pivot) return 1;
+      problem(PROB_URL_AUTH, ref, 0, url, req->pivot, 0);
+    }
+
+    /* No support for IPv6 or [ip] notation for now, so let's just refuse to
+       parse the URL. Also, refuse excessively long domain names for sanity. */
+
+    if (*cur == '[') return 1;
+    if (path_st > MAX_DNS_LEN) return 1;
+
+    x = host = ck_memdup(cur, path_st + 1);
+    host[path_st] = 0;
+
+    /* Scan, normalize extracted host name. */
+
+    while (*x) {
+
+      switch (*x) {
+
+        case 'A' ... 'Z':
+          *x = tolower(*x);
+          break;
+
+        case 'a' ... 'z':
+        case '0' ... '9':
+        case '.':
+        case '-':
+        case '_':
+          break;
+
+        case 0x80 ... 0xff:
+          has_utf = 1;
+          break;
+
+        default:
+          /* Uh-oh, invalid characters in a host name - abandon ship. */
+          return 1;
+
+      }
+
+      x++;
+
+    }
+
+    /* Host names that contained high bits need to be converted to Punycode
+       in order to resolve properly. */
+
+    if (has_utf) {
+
+      char* output = 0;
+
+      if (idna_to_ascii_8z((char*)host, &output, 0) != IDNA_SUCCESS ||
+          strlen(output) > MAX_DNS_LEN) {
+        ck_free(output);
+        return 1;
+      }
+
+      ck_free(host);
+      host = (u8*)output;
+
+    }
+
+    req->host = host;
+    cur += path_st;
+
+    /* All right, moving on: if host name is followed by :, let's try to
+       parse and validate port number; otherwise, assume 80 / 443, depending
+       on protocol. */
+
+    if (*cur == ':') {
+
+      u32 digit_cnt = strspn((char*)++cur, "0123456789");
+      u32 port = atoi((char*)cur);
+      if (!digit_cnt || (cur[digit_cnt] && !strchr("/?#", cur[digit_cnt])))
+        return 1;
+      req->port = port;
+      cur += digit_cnt;
+
+    } else {
+
+      if (req->proto == PROTO_HTTPS) req->port = 443; else req->port = 80;
+
+    }
+
+    has_host = 1;
+
+  } else {
+
+    /* No host name found - copy from referring request instead. */
+
+    if (!ref || !ref->host) return 1;
+
+    req->host = ck_strdup(ref->host);
+    req->addr = ref->addr;
+    req->port = ref->port;
+
+  }
+
+  if (!*cur || *cur == '#') {
+    u32 i;
+
+    /* No-op path. If the URL does not specify host (e.g., #foo), copy
+       everything from referring request, call it a day. Otherwise
+       (e.g., http://example.com#foo), let tokenize_path() run to
+       add NULL-"" entry to the list. */
+
+    if (!has_host) {
+      for (i=0;i<ref->par.c;i++)
+        if (PATH_SUBTYPE(ref->par.t[i]) || QUERY_SUBTYPE(ref->par.t[i]))
+          set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
+      return 0;
+    }
+
+  }
+
+  if (!has_host && *cur == '?') {
+    u32 i;
+
+    /* URL begins with ? and does not specify host (e.g., ?foo=bar). Copy all
+       path segments, but no query, then fall through to parse the query
+       string. */
+
+    for (i=0;i<ref->par.c;i++)
+      if (PATH_SUBTYPE(ref->par.t[i]))
+        set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
+
+    /* In this case, we do not want tokenize_path() to tinker with the path
+       in any way. */
+
+    add_slash = 0;
+
+  } else if (!has_host && *cur != '/') {
+
+    /* The URL does not begin with / or ?, and does not specify host (e.g.,
+       foo/bar?baz). Copy path from referrer, but drop the last "proper"
+       path segment and everything that follows it. This mimicks browser
+       behavior (for URLs ending with /, it just drops the final NULL-""
+       pair). */
+
+    u32 i;
+    u32 path_cnt = 0, path_cur = 0;
+
+    for (i=0;i<ref->par.c;i++)
+      if (ref->par.t[i] == PARAM_PATH) path_cnt++;
+
+    for (i=0;i<ref->par.c;i++) {
+      if (ref->par.t[i] == PARAM_PATH) path_cur++;
+      if (path_cur < path_cnt && PATH_SUBTYPE(ref->par.t[i]))
+        set_value(ref->par.t[i], ref->par.n[i], ref->par.v[i], -1, &req->par);
+    }
+
+  }
+
+  /* Tokenize the remaining path on top of what we parsed / copied over. */
+
+  tokenize_path(cur, req, add_slash);
+  return 0;
+
+}
+
+
+/* URL-decodes a string. 'Plus' parameter governs the behavior on +
+   signs (as they have a special meaning only in query params, not in path). */
+
+u8* url_decode_token(u8* str, u32 len, u8 plus) {
+  u8 *ret = ck_alloc(len + 1);
+  u8 *src = str, *dst = ret;
+  char *hex_str = "0123456789abcdef";
+
+  while (len--) {
+    u8 c = *(src++);
+    char *f, *s;
+
+    if (plus && c == '+') c = ' ';
+
+    if (c == '%' && len >= 2 && 
+        (f = strchr(hex_str, tolower(src[0]))) &&
+        (s = strchr(hex_str, tolower(src[1])))) {
+      c = ((f - hex_str) << 4) | (s - hex_str);
+      src += 2; len -= 2;
+    }
+
+    /* We can't handle NUL-terminators gracefully when deserializing request
+       parameters, because param_array values are NUL-terminated themselves.
+       Let's encode \0 as \xFF instead, and hope nobody notices. */
+
+    if (!c) c = 0xff;
+
+    *(dst++) = c;
+
+  }
+
+  *(dst++) = 0;
+
+  ret = ck_realloc(ret, dst - ret);
+
+  return ret;
+}
+
+
+/* URL-encodes a string according to custom rules. The assumption here is that
+   the data is already tokenized as "special" boundaries such as ?, =, &, /,
+   ;, !, $, and , so these characters must always be escaped if present in
+   tokens. We otherwise let pretty much everything else go through, as it
+   may help with the exploitation of certain vulnerabilities. */
+
+u8* url_encode_token(u8* str, u32 len) {
+
+  u8 *ret = ck_alloc(len * 3 + 1);
+  u8 *src = str, *dst = ret;
+
+  while (len--) {
+    u8 c = *(src++);
+
+    if (c <= 0x20 || c >= 0x80 || strchr("#%&=/+;,!$?", c)) {
+      if (c == 0xFF) c = 0;
+      sprintf((char*)dst, "%%%02X", c);
+      dst += 3;
+    } else *(dst++) = c;
+
+  }
+
+  *(dst++) = 0;
+
+  ret = ck_realloc(ret, dst - ret);
+
+  return ret;
+
+}
+
+
+/* Split path at known "special" character boundaries, URL decode values,
+   then put them in the provided http_request struct. */
+
+void tokenize_path(u8* str, struct http_request* req, u8 add_slash) {
+
+  u8* cur;
+  u8  know_dir = 0;
+
+  while (*str == '/') str++;
+  cur = str;
+
+  /* Parse path elements first. */
+
+  while (*cur && !strchr("?#", *cur)) {
+
+    u32 next_seg, next_eq;
+
+    u8 *name = NULL, *value = NULL;
+    u8 first_el = (str == cur);
+
+    if (first_el || *cur == '/') {
+
+      /* Optimize out //, /\0, /./, and /.\0. They do indicate
+         we are looking at a directory, so mark this. */
+
+      if (!first_el && (cur[1] == '/' || !cur[1])) {
+        cur++;
+        know_dir = 1;
+        continue;
+      }
+
+      if (cur[0 + !first_el] == '.' && (cur[1 + !first_el] == '/' ||
+          !cur[1 + !first_el])) {
+        cur += 1 + !first_el;
+        know_dir = 1;
+        continue;
+      }
+
+      /* If we encountered /../ or /..\0, remove everything up to and
+         including the last "true" path element. It's also indicative
+         of a directory, by the way. */
+
+      if (cur[0 + !first_el] == '.' && cur[1 + !first_el] == '.' &&
+          (cur[2 + !first_el] == '/' || !cur[2 + !first_el])) {
+
+        u32 i, last_p = req->par.c;
+
+        for (i=0;i<req->par.c;i++)
+          if (req->par.t[i] == PARAM_PATH) last_p = i;
+
+        for (i=last_p;i<req->par.c;i++) {
+          req->par.t[i] = PARAM_NONE;
+        }
+
+        cur += 2 + !first_el;
+        know_dir = 1;
+        continue;
+
+      }
+
+    }
+
+    /* If we're here, we have an actual item to add; cur points to
+       the string if it's the first element, or to field separator
+       if one of the subsequent ones. */
+
+    next_seg = strcspn((char*)cur + 1, "/;,!$?#") + 1,
+    next_eq  = strcspn((char*)cur + 1, "=/;,!$?#") + 1;
+    know_dir = 0;
+
+    if (next_eq < next_seg) {
+      name  = url_decode_token(cur + !first_el, next_eq - !first_el, 0);
+      value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 0);
+    } else {
+      value = url_decode_token(cur + !first_el, next_seg - !first_el, 0);
+    }
+
+    switch (first_el ? '/' : *cur) {
+
+      case ';': set_value(PARAM_PATH_S, name, value, -1, &req->par); break;
+      case ',': set_value(PARAM_PATH_C, name, value, -1, &req->par); break;
+      case '!': set_value(PARAM_PATH_E, name, value, -1, &req->par); break;
+      case '$': set_value(PARAM_PATH_D, name, value, -1, &req->par); break;
+      default:  set_value(PARAM_PATH, name, value, -1, &req->par);
+
+    }
+
+    ck_free(name);
+    ck_free(value);
+
+    cur += next_seg;
+
+  }
+
+  /* If the last segment was /, /./, or /../, *or* if we never added
+     anything to the path to begin with, we want to store a NULL-""
+     entry to denote it's a directory. */
+
+  if (know_dir || (add_slash && (!*str || strchr("?#", *str))))
+    set_value(PARAM_PATH, NULL, (u8*)"", -1, &req->par);
+
+  /* Deal with regular query parameters now. This is much simpler,
+     obviously. */
+
+  while (*cur && !strchr("#", *cur)) {
+
+    u32 next_seg = strcspn((char*)cur + 1, "#&;,!$") + 1;
+    u32 next_eq  = strcspn((char*)cur + 1, "=#&;,!$") + 1;
+    u8 *name = NULL, *value = NULL;
+
+    /* foo=bar syntax... */
+
+    if (next_eq < next_seg) {
+      name  = url_decode_token(cur + 1, next_eq - 1, 1);
+      value = url_decode_token(cur + next_eq + 1, next_seg - next_eq - 1, 1);
+    } else {
+      value = url_decode_token(cur + 1, next_seg - 1, 1);
+    }
+
+    switch (*cur) {
+
+      case ';': set_value(PARAM_QUERY_S, name, value, -1, &req->par); break;
+      case ',': set_value(PARAM_QUERY_C, name, value, -1, &req->par); break;
+      case '!': set_value(PARAM_QUERY_E, name, value, -1, &req->par); break;
+      case '$': set_value(PARAM_QUERY_D, name, value, -1, &req->par); break;
+      default: set_value(PARAM_QUERY, name, value, -1, &req->par);
+
+    }
+
+    ck_free(name);
+    ck_free(value);
+
+    cur += next_seg;
+
+  }
+
+}
+
+
+/* Reconstructs URI from http_request data. Includes protocol and host
+   if with_host is non-zero. */
+
+u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post) {
+  u32 i, cur_pos;
+  u8 got_search = 0;
+  u8* ret;
+
+  NEW_STR(ret, cur_pos);
+
+#define ASD(_p3) ADD_STR_DATA(ret, cur_pos, _p3)
+
+  /* For human-readable uses... */
+
+  if (with_host) {
+    ASD("http");
+    if (req->proto == PROTO_HTTPS) ASD("s");
+    ASD("://");
+    ASD(req->host);
+
+    if ((req->proto == PROTO_HTTP && req->port != 80) ||
+        (req->proto == PROTO_HTTPS && req->port != 443)) {
+      u8 port[7];
+      sprintf((char*)port, ":%u", req->port);
+      ASD(port);
+    }
+
+  }
+
+  /* First print path... */
+
+  for (i=0;i<req->par.c;i++)
+    if (PATH_SUBTYPE(req->par.t[i])) {
+
+      switch (req->par.t[i]) {
+
+        case PARAM_PATH_S: ASD(";"); break;
+        case PARAM_PATH_C: ASD(","); break;
+        case PARAM_PATH_E: ASD("!"); break;
+        case PARAM_PATH_D: ASD("$"); break;
+        default: ASD("/");
+
+      }
+
+      if (req->par.n[i]) {
+        u32 len = strlen((char*)req->par.n[i]);
+        u8* str = url_encode_token(req->par.n[i], len);
+        ASD(str); ASD("=");
+        ck_free(str);
+      }
+      if (req->par.v[i]) {
+        u32 len = strlen((char*)req->par.v[i]);
+        u8* str = url_encode_token(req->par.v[i], len);
+        ASD(str);
+        ck_free(str);
+      }
+
+    }
+
+  /* Then actual parameters. */
+
+  for (i=0;i<req->par.c;i++)
+    if (QUERY_SUBTYPE(req->par.t[i])) {
+
+      if (!got_search) {
+        ASD("?");
+        got_search = 1;
+      } else switch (req->par.t[i]) {
+
+        case PARAM_QUERY_S: ASD(";"); break;
+        case PARAM_QUERY_C: ASD(","); break;
+        case PARAM_QUERY_E: ASD("!"); break;
+        case PARAM_QUERY_D: ASD("$"); break;
+        default: ASD("&");
+
+      }
+
+      if (req->par.n[i]) {
+        u32 len = strlen((char*)req->par.n[i]);
+        u8* str = url_encode_token(req->par.n[i], len);
+        ASD(str); ASD("=");
+        ck_free(str);
+      }
+      if (req->par.v[i]) {
+        u32 len = strlen((char*)req->par.v[i]);
+        u8* str = url_encode_token(req->par.v[i], len);
+        ASD(str);
+        ck_free(str);
+      }
+
+    }
+
+  got_search = 0;
+
+  if (with_post)
+    for (i=0;i<req->par.c;i++)
+      if (POST_SUBTYPE(req->par.t[i])) {
+
+      if (!got_search) {
+        ASD(" POST: ");
+        got_search = 1;
+      } else ASD("&");
+
+      if (req->par.n[i]) {
+        u32 len = strlen((char*)req->par.n[i]);
+        u8* str = url_encode_token(req->par.n[i], len);
+        ASD(str); ASD("=");
+        ck_free(str);
+      }
+      if (req->par.v[i]) {
+        u32 len = strlen((char*)req->par.v[i]);
+        u8* str = url_encode_token(req->par.v[i], len);
+        ASD(str);
+        ck_free(str);
+      }
+
+    }
+
+#undef ASD
+
+  TRIM_STR(ret, cur_pos);
+  return ret;
+
+}
+
+
+/* Looks up IP for a particular host, returns data in network order.
+   Uses standard resolver, so it is slow and blocking, but we only
+   expect to call it a couple of times during a typical assessment.
+   There are some good async DNS libraries to consider in the long run. */
+
+u32 maybe_lookup_host(u8* name) {
+  struct hostent* h;
+  struct dns_entry *d = dns, *prev = NULL;
+  u32 ret_addr = 0;
+  struct in_addr in;
+
+  /* Don't bother resolving raw IP addresses, naturally. */
+
+  if (inet_aton((char*)name, &in))
+    return (u32)in.s_addr;
+
+  while (d) {
+    if (!strcasecmp((char*)name, (char*)d->name)) return d->addr;
+    prev = d;
+    d = d->next;
+  }
+
+  h = gethostbyname((char*)name);
+
+  /* If lookup fails with a transient error, be nice - try again. */
+
+  if (!h && h_errno == TRY_AGAIN) h = gethostbyname((char*)name);
+
+  if (h) {
+
+    u32 i = 0;
+
+    /* For each address associated with the host, see if we have any
+       other hosts that resolved to that same IP. If yes, return
+       that address; otherwise, just return first. This is for HTTP
+       performance and bookkeeping reasons. */
+
+    while (h->h_addr_list[i]) {
+      d = dns;
+      while (d) {
+        if (d->addr == *(u32*)h->h_addr_list[i]) {
+          ret_addr = d->addr;
+          goto dns_got_name;
+        }
+        d = d->next;
+      }
+      i++;
+    }
+
+    ret_addr = *(u32*)h->h_addr_list[0];
+
+  }
+
+dns_got_name:
+
+  if (!prev) d = dns = ck_alloc(sizeof(struct dns_entry));
+    else d = prev->next = ck_alloc(sizeof(struct dns_entry));
+
+  d->name = ck_strdup(name);
+  d->addr = ret_addr;
+
+  return ret_addr;
+
+}
+
+
+/* Creates an ad hoc DNS cache entry, to override NS lookups. */
+
+void fake_host(u8* name, u32 addr) {
+  struct dns_entry *d = dns, *prev = 0;
+
+  while (d && d->next) { prev = d ; d = d->next;}
+
+  if (!dns) d = dns = ck_alloc(sizeof(struct dns_entry));
+    else d = prev->next = ck_alloc(sizeof(struct dns_entry));
+
+  d->name = ck_strdup(name);
+  d->addr = addr;
+
+}
+
+
+/* Prepares a serialized HTTP buffer to be sent over the network. */
+
+u8* build_request_data(struct http_request* req) {
+
+  u8 *ret_buf, *ck_buf, *pay_buf, *path;
+  u32 ret_pos, ck_pos, pay_pos, i;
+  u8  req_type = PARAM_NONE;
+
+  if (req->proto == PROTO_NONE)
+    FATAL("uninitialized http_request");
+
+  NEW_STR(ret_buf, ret_pos);
+
+  path = serialize_path(req, 0, 0);
+
+#define ASD(_p3) ADD_STR_DATA(ret_buf, ret_pos, _p3)
+
+  if (req->method) ASD(req->method); else ASD((u8*)"GET");
+  ASD(" ");
+  ASD(path);
+  ASD(" HTTP/1.1\r\n");
+  ck_free(path);
+
+  ASD("Host: ");
+  ASD(req->host);
+
+  if ((req->proto == PROTO_HTTP && req->port != 80) ||
+      (req->proto == PROTO_HTTPS && req->port != 443)) {
+    char port[7];
+    sprintf((char*)port, ":%u", req->port);
+    ASD(port);
+  }
+
+  ASD("\r\n");
+
+  /* Insert generic browser headers first. */
+
+  if (browser_type == BROWSER_FAST) {
+
+    ASD("Accept-Encoding: gzip\r\n");
+    ASD("Connection: keep-alive\r\n");
+    ASD("User-Agent: Mozilla/5.0 SF/" VERSION "\r\n");
+
+    /* Some servers will reject to gzip responses unless "Mozilla/..."
+       is seen in User-Agent. Bleh. */
+
+  } else if (browser_type == BROWSER_FFOX) {
+
+    if (!GET_HDR((u8*)"User-Agent", &req->par))
+      ASD("User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; "
+          "rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 SF/" VERSION "\r\n");
+
+    ASD("Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;"
+        "q=0.8\r\n");
+
+    if (!GET_HDR((u8*)"Accept-Language", &req->par))
+      ASD("Accept-Language: en-us,en\r\n");
+
+    ASD("Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n");
+    ASD("Keep-Alive: 300\r\n");
+    ASD("Connction: keep-alive\r\n");
+
+  } else /* MSIE */ {
+
+    ASD("Accept: */*\r\n");
+
+    if (!GET_HDR((u8*)"Accept-Language", &req->par))
+      ASD("Accept-Language: en,en-US;q=0.5\r\n");
+
+    if (!GET_HDR((u8*)"User-Agent", &req->par))
+      ASD("User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; "
+          "Trident/4.0; .NET CLR 1.1.4322; InfoPath.1; .NET CLR "
+          "2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; SF/"
+          VERSION ")\r\n");
+
+    ASD("Accept-Encoding: gzip, deflate\r\n");
+    ASD("Connection: Keep-Alive\r\n");
+
+  }
+
+
+  /* Request a limited range up front to minimize unwanted traffic. */
+
+  if (size_limit) {
+    u8 limit[32];
+    sprintf((char*)limit, "Range: bytes=0-%u\r\n", size_limit - 1);
+    ASD(limit);
+  }
+
+  /* Include a dummy "Referer" header, to avoid certain XSRF checks. */
+
+  if (!GET_HDR((u8*)"Referer", &req->par)) {
+    ASD("Referer: http");
+    if (req->proto == PROTO_HTTPS) ASD("s");
+    ASD("://");
+    ASD(req->host);
+    ASD("/\r\n");
+  }
+
+  /* Take care of HTTP authentication next. */
+
+  if (auth_type == AUTH_BASIC) {
+    u8* lp = ck_alloc(strlen((char*)auth_user) + strlen((char*)auth_pass) + 2);
+    u8* lpb64;
+
+    sprintf((char*)lp, "%s:%s", auth_user, auth_pass);
+
+    lpb64 = b64_encode(lp, strlen((char*)lp));
+
+    ASD("Authorization: basic ");
+    ASD(lpb64);
+    ASD("\r\n");
+
+    ck_free(lpb64);
+    ck_free(lp);
+
+  }
+
+  /* Append any other requested headers and cookies. */
+
+  NEW_STR(ck_buf, ck_pos);
+
+  for (i=0;i<req->par.c;i++) {
+    if (req->par.t[i] == PARAM_HEADER) {
+      ASD(req->par.n[i]);
+      ASD(": ");
+      ASD(req->par.v[i]);
+      ASD("\r\n");
+    } else if (req->par.t[i] == PARAM_COOKIE) {
+      if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";");
+      ADD_STR_DATA(ck_buf, ck_pos, req->par.n[i]);
+      ADD_STR_DATA(ck_buf, ck_pos, "=");
+      ADD_STR_DATA(ck_buf, ck_pos, req->par.v[i]);
+    }
+  }
+
+  /* Also include extra globals, if any (but avoid dupes). */
+
+  for (i=0;i<global_http_par.c;i++) {
+    if (global_http_par.t[i] == PARAM_HEADER &&
+        !GET_HDR(global_http_par.n[i], &req->par)) {
+      ASD(global_http_par.n[i]);
+      ASD(": ");
+      ASD(global_http_par.v[i]);
+      ASD("\r\n");
+    } else if (global_http_par.t[i] == PARAM_COOKIE &&
+        !GET_CK(global_http_par.n[i], &req->par)) {
+      if (ck_pos) ADD_STR_DATA(ck_buf, ck_pos, ";");
+      ADD_STR_DATA(ck_buf, ck_pos, global_http_par.n[i]);
+      ADD_STR_DATA(ck_buf, ck_pos, "=");
+      ADD_STR_DATA(ck_buf, ck_pos, global_http_par.v[i]);
+    }
+  }
+
+  if (ck_pos) {
+    ASD("Cookie: ");
+    ASD(ck_buf);
+    ASD("\r\n");
+  }
+
+  ck_free(ck_buf);
+
+  /* Now, let's serialize the payload, if necessary. */
+
+  for (i=0;i<req->par.c;i++) {
+    switch (req->par.t[i]) {
+      case PARAM_POST_F:
+      case PARAM_POST_O:
+        req_type  = req->par.t[i];
+        break;
+      case PARAM_POST:
+        if (req_type == PARAM_NONE) req_type = PARAM_POST;
+        break;
+    }
+  }
+
+  NEW_STR(pay_buf, pay_pos);
+
+  if (req_type == PARAM_POST) {
+
+    /* The default case: application/x-www-form-urlencoded. */
+
+    for (i=0;i<req->par.c;i++)
+      if (req->par.t[i] == PARAM_POST) {
+        if (pay_pos) ADD_STR_DATA(pay_buf, pay_pos, "&");
+        if (req->par.n[i]) {
+          u32 len = strlen((char*)req->par.n[i]);
+          u8* str = url_encode_token(req->par.n[i], len);
+          ADD_STR_DATA(pay_buf, pay_pos, str);
+          ADD_STR_DATA(pay_buf, pay_pos, "=");
+          ck_free(str);
+        }
+        if (req->par.v[i]) {
+          u32 len = strlen((char*)req->par.v[i]);
+          u8* str = url_encode_token(req->par.v[i], len);
+          ADD_STR_DATA(pay_buf, pay_pos, str);
+          ck_free(str);
+        }
+      }
+
+    ASD("Content-Type: application/x-www-form-urlencoded\r\n");
+
+  } else if (req_type == PARAM_POST_O) {
+
+    /* Opaque, non-escaped data of some sort. */
+
+    for (i=0;i<req->par.c;i++)
+      if (req->par.t[i] == PARAM_POST_O && req->par.v[i])
+        ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]);
+
+    ASD("Content-Type: text/plain\r\n");
+
+  } else if (req_type == PARAM_POST_F) {
+    u8 bound[20];
+
+    /* MIME envelopes: multipart/form-data */
+
+    sprintf((char*)bound, "sf%u", R(1000000));
+
+    for (i=0;i<req->par.c;i++)
+      if (req->par.t[i] == PARAM_POST || req->par.t[i] == PARAM_POST_F) {
+
+        ADD_STR_DATA(pay_buf, pay_pos, "--");
+        ADD_STR_DATA(pay_buf, pay_pos, bound);
+        ADD_STR_DATA(pay_buf, pay_pos, "\r\n"
+                     "Content-Disposition: form-data; name=\"");
+        if (req->par.n[i])
+          ADD_STR_DATA(pay_buf, pay_pos, req->par.n[i]);
+
+        if (req->par.t[i] == PARAM_POST_F) {
+          u8 tmp[64];
+          sprintf((char*)tmp, "\"; filename=\"sfish%u." DUMMY_EXT "\"\r\n"
+                  "Content-Type: " DUMMY_MIME "\r\n\r\n", R(16));
+          ADD_STR_DATA(pay_buf, pay_pos, tmp);
+          ADD_STR_DATA(pay_buf, pay_pos, new_xss_tag((u8*)DUMMY_FILE));
+          register_xss_tag(req);
+        } else {
+          ADD_STR_DATA(pay_buf, pay_pos, "\"\r\n\r\n");
+          if (req->par.v[i])
+            ADD_STR_DATA(pay_buf, pay_pos, req->par.v[i]);
+        }
+
+        ADD_STR_DATA(pay_buf, pay_pos, "\r\n");
+      }
+
+    ADD_STR_DATA(pay_buf, pay_pos, "--");
+    ADD_STR_DATA(pay_buf, pay_pos, bound);
+    ADD_STR_DATA(pay_buf, pay_pos, "--\r\n");
+
+    ASD("Content-Type: multipart/form-data; boundary=");
+    ASD(bound);
+    ASD("\r\n");
+
+  } else if (req_type == 0) ASD("\r\n");
+
+  /* Finalize HTTP payload... */
+
+  for (i=0;i<pay_pos;i++)
+    if (pay_buf[i] == 0xff) pay_buf[i] = 0x00;
+
+  TRIM_STR(pay_buf, pay_pos);
+
+  if (pay_pos) {
+    u8 cl[40];
+    sprintf((char*)cl, "Content-Length: %u\r\n\r\n", pay_pos);
+    ASD(cl);
+    ASD(pay_buf);
+  }
+
+  ck_free(pay_buf);
+
+#undef ASD
+
+  /* Phew! */
+
+  TRIM_STR(ret_buf, ret_pos);
+  return ret_buf;
+
+}
+
+
+/* Internal helper for parsing lines for parse_response(), etc. */
+
+static u8* grab_line(u8* data, u32* cur_pos, u32 data_len) {
+  u8 *cur_ptr   = data + *cur_pos,
+     *start_ptr = cur_ptr,
+     *end_ptr   = data + data_len,
+     *ret;
+
+  if (start_ptr == end_ptr) return 0;
+  while (cur_ptr < end_ptr && *cur_ptr != '\n') cur_ptr++;
+  if (cur_ptr != end_ptr) cur_ptr++;
+
+  *cur_pos += cur_ptr - start_ptr;
+
+  while (cur_ptr > start_ptr && strchr("\r\n", *(cur_ptr-1))) cur_ptr--;
+
+  ret = ck_alloc(cur_ptr - start_ptr + 1);
+  memcpy(ret, start_ptr, cur_ptr - start_ptr);
+  ret[cur_ptr - start_ptr] = 0;
+
+  return ret;
+
+}
+
+
+/* Builds response fingerprint data. These fingerprints are used to
+   find "roughly comparable" pages based on their word length
+   distributions (divided into FP_SIZE buckets). */
+
+void fprint_response(struct http_response* res) {
+  u32 i, c_len = 0, in_space = 0;
+
+  res->sig.code = res->code;
+
+  for (i=0;i<res->pay_len;i++)
+
+    if (res->payload[i] <= 0x20 || strchr("<>'\"", res->payload[i])) {
+      if (!in_space) {
+        in_space = 1;
+        if (c_len <= FP_MAX_LEN)
+          res->sig.data[c_len % FP_SIZE]++;
+        c_len = 0;
+      } else c_len++;
+    } else {
+      if (in_space) {
+        in_space = 0;
+        if (c_len <= FP_MAX_LEN)
+          res->sig.data[c_len % FP_SIZE]++;
+        c_len = 0;
+      } else c_len++;
+    }
+
+  res->sig.data[c_len % FP_SIZE]++;
+
+}
+
+
+/* Parses a network buffer containing raw HTTP response received over the
+   network ('more' == the socket is still available for reading). Returns 0
+   if response parses OK, 1 if more data should be read from the socket,
+   2 if the response seems invalid, 3 if response OK but connection must be
+   closed. */
+
+u8 parse_response(struct http_request* req, struct http_response* res,
+                  u8* data, u32 data_len, u8 more) {
+  u8* cur_line = 0;
+  s32 pay_len  = -1;
+  u32 cur_data_off = 0,
+      total_chunk = 0,
+      http_ver;
+  u8  chunked = 0, compressed = 0, must_close = 0;
+
+  if (res->code)
+    FATAL("struct http_response reused! Original code '%u'.", res->code);
+
+#define NEXT_LINE() do { \
+    if (cur_line) ck_free(cur_line); \
+    cur_line = grab_line(data, &cur_data_off, data_len); \
+  } while (0)
+
+  /* First, let's do a superficial request completeness check. Be
+     prepared for a premature end at any point. */
+
+  NEXT_LINE(); /* HTTP/1.x xxx ... */
+
+  if (!cur_line) return more ? 1 : 2;
+
+  if (strlen((char*)cur_line) < 7 && more) {
+    ck_free(cur_line);
+    return 1;
+  }
+
+  if (strncmp((char*)cur_line, "HTTP/1.", 7)) {
+    ck_free(cur_line);
+    return 2;
+  }
+
+  /* Scan headers for Content-Length, Transfer-Encoding, etc. */
+
+  while (1) {
+
+    NEXT_LINE(); /* Next header or empty line. */
+
+    /* If headers end prematurely, and more data might arrive, ask for
+       it; otherwise, just assume end of headers and continue. */
+
+    if (!cur_line) {
+      if (more) return 1;
+      res->warn |= WARN_PARTIAL;
+      break;
+    }
+
+    /* Empty line indicates the beginning of a payload. */
+
+    if (!cur_line[0]) break;
+
+    if (!strncasecmp((char*)cur_line, "Content-Length:", 15)) {
+
+      /* The value in Content-Length header would be useful for seeing if we
+         have all the requested data already. Reject invalid values to avoid
+         integer overflows, etc, though. */
+
+      if (sscanf((char*)cur_line + 15, "%d", &pay_len) == 1) {
+        if (pay_len < 0 || pay_len > 1000000000 /* 1 GB */) {
+          ck_free(cur_line);
+          return 2;
+        }
+      } else pay_len = -1;
+
+    } else if (!strncasecmp((char*)cur_line, "Transfer-Encoding:", 18)) {
+
+      /* Transfer-Encoding: chunked must be accounted for to properly
+         determine if we received all the data when Content-Length not found. */
+
+      u8* x = cur_line + 18;
+
+      while (isspace(*x)) x++;
+      if (!strcasecmp((char*)x, "chunked")) chunked = 1;
+
+    } else if (!strncasecmp((char*)cur_line, "Content-Encoding:", 17)) {
+
+      /* Content-Encoding is good to know, too. */
+
+      u8* x = cur_line + 17;
+
+      while (isspace(*x)) x++;
+
+      if (!strcasecmp((char*)x, "deflate") || !strcasecmp((char*)x, "gzip"))
+        compressed = 1;
+
+    } else if (!strncasecmp((char*)cur_line, "Connection:", 11)) {
+
+      u8* x = cur_line + 11;
+
+      while (isspace(*x)) x++;
+
+      if (!strcasecmp((char*)x, "close")) must_close = 1;
+
+
+
+    }
+  }
+
+  /* We are now at the beginning of the payload. Firstly, how about decoding
+     'chunked' to see if we received a complete 0-byte terminator chunk
+     already? */
+
+  if (chunked) {
+    while (1) {
+      u32 chunk_len;
+
+      NEXT_LINE(); /* Should be chunk size, hex. */
+
+      if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) {
+        if (more) { ck_free(cur_line); return 1; }
+        res->warn |= WARN_PARTIAL;
+        break;
+      }
+
+      if (chunk_len > 1000000000 || total_chunk > 1000000000 /* 1 GB */) {
+        ck_free(cur_line);
+        return 2;
+      }
+
+      /* See if we actually enough buffer to skip the chunk. Bail out if
+         not and more data might be coming; otherwise, adjust chunk size
+         accordingly. */
+
+      if (cur_data_off + chunk_len > data_len) {
+
+        if (more) { ck_free(cur_line); return 1; }
+        chunk_len = data_len - cur_data_off;
+        total_chunk += chunk_len;
+
+        res->warn |= WARN_PARTIAL;
+        break;
+      }
+
+      total_chunk += chunk_len;
+
+      cur_data_off += chunk_len;
+      NEXT_LINE();
+
+      /* No newline? */
+      if (!cur_line) {
+        if (more) return 1;
+        res->warn |= WARN_PARTIAL;
+      }
+
+      /* All right, so that was the last, complete 0-size chunk?
+         Exit the loop if so. */
+
+      if (!chunk_len) break;
+
+    }
+
+    if (cur_data_off != data_len) res->warn |= WARN_TRAIL;
+
+  } else if (pay_len == -1 && more) {
+
+    /* If in a mode other than 'chunked', and C-L not received, but more
+       data might be available - try to request it. */
+
+    ck_free(cur_line);
+    return 1;
+
+  } else if (pay_len != 1) {
+
+    if (cur_data_off + pay_len > data_len) {
+
+      /* If C-L seen, but not nough data in the buffer, try to request more
+         if possible, otherwise tag the response as partial. */
+
+      if (more) { ck_free(cur_line); return 1; }
+      res->warn |= WARN_PARTIAL;
+
+    } else if (cur_data_off + pay_len < data_len) res->warn |= WARN_TRAIL;
+
+  }
+
+  /* Rewind, then properly parse HTTP headers, parsing cookies. */
+
+  cur_data_off = 0;
+
+  NEXT_LINE();
+
+  if (strlen((char*)cur_line) < 13 ||
+      sscanf((char*)cur_line, "HTTP/1.%u %u ", &http_ver, &res->code) != 2 ||
+      res->code < 100 || res->code > 999) {
+    ck_free(cur_line);
+    return 2;
+  }
+
+  /* Some servers, when presented with 'Range' header, will return 200 on
+     some queries for a particular resource, and 206 on other queries (e.g.,
+     with query string), despite returning exactly as much data. As an
+     ugly workaround... */
+
+  if (res->code == 206) res->code = 200;
+
+  if (http_ver == 0) must_close = 1;
+
+  res->msg = ck_strdup(cur_line + 13);
+
+  while (1) {
+    u8* val;
+
+    NEXT_LINE(); /* Next header or empty line. */
+
+    if (!cur_line) return 2;
+    if (!cur_line[0]) break;
+
+    /* Split field name and value */
+
+    val = (u8*) strchr((char*)cur_line, ':');
+    if (!val) { ck_free(cur_line); return 2; }
+    *val = 0;
+    while (isspace(*(++val)));
+
+    if (!strcasecmp((char*)cur_line, "Set-Cookie") ||
+        !strcasecmp((char*)cur_line, "Set-Cookie2")) {
+
+       /* We could bother with a proper tokenizer here, but contrary to "teh
+          standards", browsers generally don't accept multiple cookies in
+          Set-Cookie headers, handle quoted-string encoding inconsistently,
+          etc. So let's just grab the first value naively and move on. */
+
+       u8* cval;
+       u8* orig_val;
+
+       cval = (u8*) strchr((char*)val, ';');
+       if (cval) *cval = 0;
+       cval = (u8*) strchr((char*)val, '=');
+       if (cval) { *cval = 0; cval++; }
+
+       /* If proper value not found, use NULL name and put whatever was
+          found in the value field. */
+
+       if (!cval) { cval = val; val = 0; }
+
+       SET_CK(val, cval, &res->hdr);
+
+       if (val) {
+
+         /* New or drastically changed cookies are noteworthy. */
+
+         orig_val = GET_CK(val, &global_http_par);
+
+         if (!orig_val || (strlen((char*)orig_val) != strlen((char*)cval) &&
+             strncmp((char*)cval, (char*)orig_val, 3))) {
+           res->cookies_set = 1;
+           problem(PROB_NEW_COOKIE, req, res, val, req->pivot, 0);
+         }
+
+         /* Set cookie globally, but ignore obvious attempts to delete
+            existing ones. */
+
+         if (!ignore_cookies && cval[0])
+           SET_CK(val, cval, &global_http_par);
+
+      }
+
+    } else SET_HDR(cur_line, val, &res->hdr);
+
+    /* Content-Type is worth mining for MIME, charset data at this point. */
+
+    if (!strcasecmp((char*)cur_line, "Content-Type")) {
+
+      if (res->header_mime) {
+
+        /* Duplicate Content-Type. Fetch previous value, if different,
+           complain. */
+
+        u8* tmp = GET_HDR((u8*)"Content-Type", &res->hdr);
+        if (strcasecmp((char*)tmp, (char*)val)) res->warn |= WARN_CFL_HDR;
+
+      } else {
+        u8 *tmp = (u8*)strchr((char*)val, ';'), *cset;
+
+        if (tmp) {
+          *tmp = 0;
+          if ((cset = (u8*)strchr((char*)tmp + 1, '=')))
+            res->header_charset = ck_strdup(cset + 1);
+        }
+
+        res->header_mime = ck_strdup(val);
+        if (tmp) *tmp = ';';
+      }
+
+    }
+
+  }
+
+  /* At the beginning of the payload again! */
+
+  if (!chunked) {
+
+    /* Identity. Ignore actual C-L data, use just as much as we collected. */
+
+    res->pay_len = data_len - cur_data_off;
+    res->payload = ck_alloc(res->pay_len + 1);
+    res->payload[res->pay_len] = 0; /* NUL-terminate for safer parsing. */
+
+    memcpy(res->payload, data + cur_data_off, res->pay_len);
+
+  } else {
+
+    u32 chunk_off = 0;
+
+    /* Chunked - we should have the authoritative length of chunk
+       contents in total_chunk already, and the overall structure
+       validated, so let's just reparse quickly. */
+
+    res->pay_len = total_chunk;
+    res->payload = ck_alloc(total_chunk + 1);
+    res->payload[res->pay_len] = 0;
+
+    while (1) {
+      u32 chunk_len;
+
+      NEXT_LINE();
+
+      if (!cur_line || sscanf((char*)cur_line, "%x", &chunk_len) != 1) break;
+
+      if (cur_data_off + chunk_len > data_len)
+        chunk_len = data_len - cur_data_off;
+
+      memcpy(res->payload + chunk_off, data + cur_data_off, chunk_len);
+
+      chunk_off += chunk_len;
+      cur_data_off += chunk_len;
+
+      NEXT_LINE();
+
+      if (!chunk_len) break;
+    }
+
+  }
+
+  ck_free(cur_line);
+
+  if (compressed) {
+
+    u8* tmp_buf;
+
+    /* Deflate or gzip - zlib can handle both the same way. We lazily allocate
+       a SIZE_LIMIT output buffer, then truncate it if necessary. */
+
+    z_stream d;
+    s32 err;
+
+    tmp_buf = ck_alloc(SIZE_LIMIT + 1);
+
+    d.zalloc    = 0;
+    d.zfree     = 0;
+    d.opaque    = 0;
+    d.next_in   = res->payload;
+    d.avail_in  = res->pay_len;
+    d.next_out  = tmp_buf;
+    d.avail_out = SIZE_LIMIT;
+
+    /* Say hello to third-party vulnerabilities! */
+
+    if (inflateInit2(&d, 32 + 15) != Z_OK) {
+      inflateEnd(&d);
+      ck_free(tmp_buf);
+      return 2;
+    }
+
+    err = inflate(&d, Z_FINISH);
+    inflateEnd(&d);
+
+    if (err != Z_BUF_ERROR && err != Z_OK && err != Z_STREAM_END) {
+      ck_free(tmp_buf);
+      return 2;
+    }
+
+    ck_free(res->payload);
+
+    bytes_deflated += res->pay_len;
+
+    res->pay_len = SIZE_LIMIT - d.avail_out;
+    res->payload = ck_realloc(tmp_buf, res->pay_len + 1);
+    res->payload[res->pay_len] = 0;
+
+
+    bytes_inflated += res->pay_len;
+
+  }
+
+#undef NEXT_LINE
+
+  fprint_response(res);
+
+  return must_close ? 3 : 0;
+}
+
+
+/* Performs a deep free() of struct http_request */
+
+void destroy_request(struct http_request* req) {
+  u32 i;
+
+  for (i=0;i<req->par.c;i++) {
+    ck_free(req->par.n[i]);
+    ck_free(req->par.v[i]);
+  }
+
+  ck_free(req->par.t);
+  ck_free(req->par.n);
+  ck_free(req->par.v);
+
+  ck_free(req->method);
+  ck_free(req->host);
+  ck_free(req->orig_url);
+  ck_free(req);
+
+}
+
+
+/* Performs a deep free() of struct http_response */
+
+void destroy_response(struct http_response* res) {
+  u32 i;
+
+  for (i=0;i<res->hdr.c;i++) {
+    ck_free(res->hdr.n[i]);
+    ck_free(res->hdr.v[i]);
+  }
+
+  ck_free(res->hdr.t);
+  ck_free(res->hdr.n);
+  ck_free(res->hdr.v);
+
+  ck_free(res->meta_charset);
+  ck_free(res->header_charset);
+  ck_free(res->header_mime);
+
+  ck_free(res->msg);
+  ck_free(res->payload);
+  ck_free(res);
+
+}
+
+
+/* Performs a deep free(), unlinking of struct queue_entry, and the
+   underlying request / response pair. */
+
+static void destroy_unlink_queue(struct queue_entry* q, u8 keep) {
+  if (!keep) {
+    if (q->req) destroy_request(q->req);
+    if (q->res) destroy_response(q->res);
+  }
+  if (!q->prev) queue = q->next; else q->prev->next = q->next;
+#ifdef QUEUE_FILO
+  if (!q->next) q_tail = q->prev;
+#endif /* QUEUE_FILO */
+  if (q->next) q->next->prev = q->prev;
+  ck_free(q);
+  queue_cur--;
+}
+
+
+/* Performs a deep free(), unlinking, network shutdown for struct
+   conn_entry, as well as the underlying queue entry, request
+   and response structs. */
+
+static void destroy_unlink_conn(struct conn_entry* c, u8 keep) {
+  if (c->q) destroy_unlink_queue(c->q, keep);
+  if (!c->prev) conn = c->next; else c->prev->next = c->next;
+  if (c->next) c->next->prev = c->prev;
+  if (c->srv_ssl) SSL_free(c->srv_ssl);
+  if (c->srv_ctx) SSL_CTX_free(c->srv_ctx);
+  ck_free(c->write_buf);
+  ck_free(c->read_buf);
+  close(c->fd);
+  ck_free(c);
+  conn_cur--;
+}
+
+
+/* Performs struct conn_entry for reuse following a clean shutdown. */
+
+static void reuse_conn(struct conn_entry* c, u8 keep) {
+  if (c->q) destroy_unlink_queue(c->q, keep);
+  c->q = 0;
+  ck_free(c->read_buf);
+  ck_free(c->write_buf);
+  c->read_buf = c->write_buf = NULL;
+  c->read_len = c->write_len = c->write_off = 0;
+  c->SSL_rd_w_wr = c->SSL_wr_w_rd = 0;
+}
+
+
+/* Schedules a new asynchronous request (does not make a copy of the
+   original http_request struct, may deallocate it immediately or
+   later on); req->callback() will be invoked when the request is
+   completed (or fails - maybe right away). */
+
+void async_request(struct http_request* req) {
+  struct queue_entry *qe;
+  struct http_response *res;
+
+  if (req->proto == PROTO_NONE || !req->callback)
+    FATAL("uninitialized http_request");
+
+  res = ck_alloc(sizeof(struct http_response));
+
+  req->addr = maybe_lookup_host(req->host);
+
+  /* Don't try to issue extra requests if max_fail
+     consecutive failures exceeded; but still try to
+     wrap up the (partial) scan. */
+
+  if (req_errors_cur > max_fail) {
+    DEBUG("!!! Too many subsequent request failures!\n");
+    res->state = STATE_SUPPRESS;
+    if (!req->callback(req, res)) {
+      destroy_request(req);
+      destroy_response(res);
+    }
+    req_dropped++;
+    return;
+  }
+
+  /* DNS errors mean instant fail. */
+
+  if (!req->addr) {
+    DEBUG("!!! DNS error!\n");
+    res->state = STATE_DNSERR;
+    if (!req->callback(req, res)) {
+      destroy_request(req);
+      destroy_response(res);
+    }
+    req_errors_net++;
+    conn_count++;
+    conn_failed++;
+    return;
+  }
+
+  /* Enforce user limits. */
+
+  if (req_count > max_requests) {
+    DEBUG("!!! Total request limit exceeded!\n");
+    res->state = STATE_SUPPRESS;
+    if (!req->callback(req, res)) {
+      destroy_request(req);
+      destroy_response(res);
+    }
+    req_dropped++;
+    return;
+  }
+
+  /* OK, looks like we're good to go. Insert the request
+     into the the queue. */
+
+#ifdef QUEUE_FILO
+
+  qe = q_tail;
+  q_tail = ck_alloc(sizeof(struct queue_entry));
+  q_tail->req  = req;
+  q_tail->res  = res;
+  q_tail->prev = qe;
+
+  if (q_tail->prev) q_tail->prev->next = q_tail;
+
+  if (!queue) queue = q_tail;
+
+#else
+
+  qe = queue;
+
+  queue = ck_alloc(sizeof(struct queue_entry));
+  queue->req  = req;
+  queue->res  = res;
+  queue->next = qe;
+
+  if (queue->next) queue->next->prev = queue;
+
+#endif /* ^QUEUE_FILO */
+
+  queue_cur++;
+  req_count++;
+
+}
+
+
+/* Check SSL properties, raise security alerts if necessary. We do not perform
+   a very thorough validation - we do not check for valid root CAs, bad ciphers,
+   SSLv2 support, etc - as these are covered well by network-level security
+   assessment tools anyway.
+
+   We might eventually want to check aliases or support TLS SNI. */
+
+static void check_ssl(struct conn_entry* c) {
+  X509 *p;
+
+  p = SSL_get_peer_certificate(c->srv_ssl);
+
+  if (p) {
+    u32 cur_time = time(0);
+    char *issuer, *host, *req_host;
+
+    /* Check for certificate expiration... */
+
+    if (ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notBefore, cur_time)
+        != -1 ||
+        ASN1_UTCTIME_cmp_time_t(p->cert_info->validity->notAfter, cur_time)
+        != 1)
+      problem(PROB_SSL_CERT_DATE, c->q->req, 0, 0,
+              host_pivot(c->q->req->pivot), 0);
+
+    /* Check for self-signed certs or no issuer data. */
+
+    issuer = X509_NAME_oneline(p->cert_info->issuer,NULL,0);
+
+    if (!issuer || !p->name || !strcmp(issuer, p->name))
+      problem(PROB_SSL_SELF_CERT, c->q->req, 0, (u8*)issuer,
+              host_pivot(c->q->req->pivot), 0);
+    else
+      problem(PROB_SSL_CERT, c->q->req, 0, (u8*)issuer,
+              host_pivot(c->q->req->pivot), 0);
+
+    free(issuer);
+
+    /* Extract CN= from certificate name, compare to destination host. */
+
+    host = strrchr(p->name, '=');
+    req_host = (char*)c->q->req->host;
+
+    if (host) {
+      host++;
+      if (host[0] == '*' && host[1] == '.') {
+        host++;
+        if (strlen(req_host) > strlen(host))
+          req_host += strlen(req_host) - strlen(host);
+      }
+    }
+
+    if (!host || strcasecmp(host, req_host))
+      problem(PROB_SSL_BAD_HOST, c->q->req, 0, (u8*)host,
+              host_pivot(c->q->req->pivot), 0);
+
+    X509_free(p);
+
+  } else problem(PROB_SSL_NO_CERT, c->q->req, 0, 0,
+                 host_pivot(c->q->req->pivot), 0);
+
+  c->ssl_checked = 1;
+}
+
+
+/* Associates a queue entry with an existing connection (if 'use_c' is
+   non-NULL), or creates a new connection to host (if 'use_c' NULL). */
+
+static void conn_associate(struct conn_entry* use_c, struct queue_entry* q) {
+  struct conn_entry* c;
+
+  if (use_c) {
+
+    c = use_c;
+    c->reused = 1;
+
+  } else {
+
+    struct sockaddr_in sin;
+
+    /* OK, we need to create a new connection list entry and connect
+       it to a target host. */
+
+    c = ck_alloc(sizeof(struct conn_entry));
+
+    conn_count++;
+
+    c->proto = q->req->proto;
+    c->addr  = q->req->addr;
+    c->port  = q->req->port;
+
+    c->fd = socket(PF_INET, SOCK_STREAM, 0);
+
+    if (c->fd < 0) {
+
+connect_error:
+
+      if (c->fd >=0) close(c->fd);
+      q->res->state = STATE_LOCALERR;
+      destroy_unlink_queue(q, q->req->callback(q->req, q->res));
+      req_errors_net++;
+      req_errors_cur++;
+
+      ck_free(c);
+      conn_failed++;
+      return;
+    }
+
+    sin.sin_family = PF_INET;
+    sin.sin_port   = htons(c->port);
+
+    memcpy(&sin.sin_addr, &q->req->addr, 4);
+
+    fcntl(c->fd, F_SETFL, O_NONBLOCK);
+
+    if (connect(c->fd, (struct sockaddr*) &sin, sizeof(struct sockaddr_in)) &&
+        (errno != EINPROGRESS)) goto connect_error;
+
+    /* HTTPS also requires SSL state to be initialized at this point. */
+
+    if (c->proto == PROTO_HTTPS) {
+
+      c->srv_ctx = SSL_CTX_new(SSLv23_client_method());
+
+      if (!c->srv_ctx) goto connect_error;
+
+      SSL_CTX_set_mode(c->srv_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE |
+                       SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
+
+      c->srv_ssl = SSL_new(c->srv_ctx);
+
+      if (!c->srv_ssl) {
+        SSL_CTX_free(c->srv_ctx);
+        goto connect_error;
+      }
+
+      SSL_set_fd(c->srv_ssl, c->fd);
+      SSL_set_connect_state(c->srv_ssl);
+
+    }
+
+    /* Make it official. */
+
+    c->next  = conn;
+    conn     = c;
+    if (c->next) c->next->prev = c;
+
+    conn_cur++;
+
+  }
+
+  c->q = q;
+  q->c = c;
+
+  q->res->state = STATE_CONNECT;
+  c->req_start  = c->last_rw = time(0);
+  c->write_buf  = build_request_data(q->req);
+  c->write_len  = strlen((char*)c->write_buf);
+
+}
+
+
+/* Processes the queue. Returns the number of queue entries remaining,
+   0 if none. Will do a blocking select() to wait for socket state changes
+   (or timeouts) if no data available to process. This is the main
+   routine for the scanning loop. */
+
+u32 next_from_queue(void) {
+
+  u32 cur_time = time(0);
+
+  if (conn_cur) {
+    static struct pollfd* p;
+    struct conn_entry* c = conn;
+    u32 i = 0;
+
+    /* First, go through all connections, handle connects, SSL handshakes, data
+       reads and writes, and exceptions. */
+
+    if (p) free(p);
+    p = __DFL_ck_alloc(sizeof(struct pollfd) * conn_cur);
+
+    while (c) {
+      p[i].fd = c->fd;
+      p[i].events = POLLIN | POLLERR | POLLHUP;
+      if (c->write_len - c->write_off || c->SSL_rd_w_wr)
+        p[i].events |= POLLOUT;
+      c = c->next;
+      i++;
+    }
+
+    poll(p, conn_cur, 100);
+
+    c = conn;
+
+    for (i=0;i<conn_cur;i++) {
+
+      struct conn_entry* next = c->next;
+
+      /* Connection closed: see if we have any pending data to write. If yes,
+         fail. If not, try parse_response() to see if we have all the data.
+         Clean up. */
+
+      if (p[i].revents & (POLLERR|POLLHUP)) {
+
+        u8 keep;
+
+network_error:
+
+        keep = 0;
+
+        /* Retry requests that were sent on old keep-alive connections
+           and failed instantly with no data read; might be just that
+           the server got bored. */
+
+        if (c->q && c->reused && !c->read_len) {
+
+          c->q->res->state = STATE_NOTINIT;
+          c->q->c = 0;
+          c->q = 0;
+
+          req_retried++;
+
+        } else if (c->q) {
+
+          if (c->write_len - c->write_off || !c->read_len) {
+            c->q->res->state = STATE_CONNERR;
+            keep = c->q->req->callback(c->q->req, c->q->res);
+            req_errors_net++;
+            req_errors_cur++;
+          } else {
+            if (parse_response(c->q->req, c->q->res, c->read_buf,
+                               c->read_len, 0) != 2) {
+              c->q->res->state = STATE_OK;
+              keep = c->q->req->callback(c->q->req, c->q->res);
+              if (req_errors_cur <= max_fail)
+                req_errors_cur = 0;
+            } else {
+              c->q->res->state = STATE_CONNERR;
+              keep = c->q->req->callback(c->q->req, c->q->res);
+              req_errors_net++;
+              req_errors_cur++;
+            }
+          }
+
+        }
+
+        destroy_unlink_conn(c, keep);
+
+      } else
+
+      /* Incoming data (when SSL_write() did not request a read) or
+         continuation of SSL_read() possible (if SSL_read() wanted to write).
+         Process data, call parse_response() to see if w have all we wanted.
+         Update event timers. */
+
+      if (((p[i].revents & POLLIN) && !c->SSL_wr_w_rd) ||
+          ((p[i].revents & POLLOUT) && c->SSL_rd_w_wr)) {
+
+        if (c->q) {
+          s32 read_res;
+          u8 p_ret;
+
+          c->read_buf = ck_realloc(c->read_buf, c->read_len + READ_CHUNK + 1);
+
+          if (c->proto == PROTO_HTTPS) {
+            s32 ssl_err;
+
+            c->SSL_rd_w_wr = 0;
+
+            read_res = SSL_read(c->srv_ssl, c->read_buf + c->read_len,
+                                READ_CHUNK);
+
+            if (!read_res) goto network_error;
+
+            if (read_res < 0) {
+              ssl_err = SSL_get_error(c->srv_ssl, read_res);
+              if (ssl_err == SSL_ERROR_WANT_WRITE) c->SSL_rd_w_wr = 1;
+              else if (ssl_err != SSL_ERROR_WANT_READ) goto network_error;
+              read_res = 0;
+            }
+
+          } else {
+            read_res = read(c->fd, c->read_buf + c->read_len, READ_CHUNK);
+            if (read_res <= 0) goto network_error;
+          }
+
+          bytes_recv += read_res;
+
+          c->read_len += read_res;
+          c->read_buf = ck_realloc(c->read_buf, c->read_len + 1);
+
+          c->read_buf[c->read_len] = 0; /* NUL-terminate for sanity. */
+
+          /* We force final parse_response() if response length exceeded
+             size_limit by more than 4 kB. The assumption here is that
+             it is less expensive to redo the connection than it is
+             to continue receiving an unknown amount of extra data. */
+
+          p_ret = parse_response(c->q->req, c->q->res, c->read_buf, c->read_len,
+            (c->read_len > (size_limit + READ_CHUNK)) ? 0 : 1);
+
+          if (!p_ret || p_ret == 3) {
+
+            u8 keep;
+
+            c->q->res->state = STATE_OK;
+            keep = c->q->req->callback(c->q->req, c->q->res);
+
+            /* If we got all data without hitting the limit, and if
+               "Connection: close" is not indicated, we might want
+               to keep the connection for future use. */
+
+            if (c->read_len > (size_limit + READ_CHUNK) || p_ret)
+              destroy_unlink_conn(c, keep); else reuse_conn(c, keep);
+
+            if (req_errors_cur <= max_fail)
+              req_errors_cur = 0;
+
+          } else if (p_ret == 2) {
+            c->q->res->state = STATE_RESPERR;
+            destroy_unlink_conn(c, c->q->req->callback(c->q->req, c->q->res));
+            req_errors_http++;
+            req_errors_cur++;
+          } else {
+            c->last_rw = cur_time;
+            c->q->res->state = STATE_RECEIVE;
+          }
+
+        } else destroy_unlink_conn(c, 0); /* Unsolicited response! */
+
+      } else
+
+      /* Write possible (if SSL_read() did not request a write), or
+         continuation of SSL_write() possible (if SSL_write() wanted to
+         read). Send data, update timers, etc. */
+
+      if (((p[i].revents & POLLOUT) && !c->SSL_rd_w_wr) ||
+          ((p[i].revents & POLLIN) && c->SSL_wr_w_rd)) {
+
+        if (c->write_len - c->write_off) {
+          s32 write_res;
+
+          if (c->proto == PROTO_HTTPS) {
+            s32 ssl_err;
+
+            c->SSL_wr_w_rd = 0;
+
+            write_res = SSL_write(c->srv_ssl, c->write_buf + c->write_off,
+                                 c->write_len - c->write_off);
+
+            if (!write_res) goto network_error;
+
+            if (write_res < 0) {
+              ssl_err = SSL_get_error(c->srv_ssl, write_res);
+              if (ssl_err == SSL_ERROR_WANT_READ) c->SSL_wr_w_rd = 1;
+              else if (ssl_err != SSL_ERROR_WANT_WRITE) goto network_error;
+              write_res = 0;
+            } else if (!c->ssl_checked) check_ssl(c);
+
+          } else {
+            write_res = write(c->fd, c->write_buf + c->write_off,
+                              c->write_len - c->write_off);
+            if (write_res <= 0) goto network_error;
+          }
+
+          bytes_sent += write_res;
+
+          c->write_off += write_res;
+
+          c->q->res->state = STATE_SEND;
+
+          c->last_rw = cur_time;
+
+        }
+
+      } else
+
+      /* Nothing happened. Check timeouts, kill stale connections.
+         Active (c->q) connections get checked for total and last I/O
+         timeouts. Non-active connctions must just not exceed
+         idle_tmout. */
+
+      if (!p[i].revents) {
+
+        u8 keep = 0;
+
+        if ((c->q && (cur_time - c->last_rw > rw_tmout ||
+            cur_time - c->req_start > resp_tmout)) ||
+            (!c->q && (cur_time - c->last_rw > idle_tmout)) ||
+            (!c->q && tear_down_idle)) {
+
+          if (c->q) {
+            c->q->res->state = STATE_CONNERR;
+            keep = c->q->req->callback(c->q->req, c->q->res);
+            req_errors_net++;
+            req_errors_cur++;
+            conn_busy_tmout++;
+          } else {
+            conn_idle_tmout++;
+            tear_down_idle = 0;
+          }
+
+          destroy_unlink_conn(c, keep);
+
+        }
+
+      }
+
+      c = next;
+
+    }
+
+  }
+
+  /* OK, connection-handling affairs taken care of! Next, let's go through all
+     queue entries NOT currently associated with a connection, and try to
+     pair them up with something. */
+
+  if (queue_cur) {
+    struct queue_entry *q = queue;
+
+    while (q) {
+      struct queue_entry* next = q->next;
+      u32 to_host = 0;
+
+      if (!q->c) {
+
+        struct conn_entry* c = conn;
+
+        /* Let's try to find a matching, idle connection first. */
+
+        while (c) {
+          struct conn_entry* cnext = c->next;
+
+          if (c->addr == q->req->addr   && (++to_host) &&
+              c->port == q->req->port   &&
+              c->proto == q->req->proto && !c->q) {
+            conn_associate(c, q);
+            goto next_q_entry;
+          }
+
+          c = cnext;
+        }
+
+        /* No match. If we are out of slots, request some other idle
+           connection to be nuked soon. */
+
+        if (to_host < max_conn_host && conn_cur < max_connections) {
+          conn_associate(0, q);
+          goto next_q_entry;
+        } else tear_down_idle = 1;
+
+      }
+
+next_q_entry:
+
+      q = next;
+
+    }
+
+  }
+
+  return queue_cur;
+}
+
+
+/* Helper function for request / response dumpers: */
+static void dump_params(struct param_array* par) {
+  u32 i;
+
+  for (i=0;i<par->c;i++) {
+
+    switch (par->t[i]) {
+      case PARAM_NONE:     SAY("  <<<<"); break;
+      case PARAM_PATH:     SAY("  PATH"); break;
+      case PARAM_PATH_S:   SAY("  PT_S"); break;
+      case PARAM_PATH_C:   SAY("  PT_C"); break;
+      case PARAM_PATH_E:   SAY("  PT_E"); break;
+      case PARAM_PATH_D:   SAY("  PT_D"); break;
+      case PARAM_QUERY:    SAY("  QUER"); break;
+      case PARAM_QUERY_S:  SAY("  QR_S"); break;
+      case PARAM_QUERY_C:  SAY("  QR_C"); break;
+      case PARAM_QUERY_E:  SAY("  QR_E"); break;
+      case PARAM_QUERY_D:  SAY("  QR_D"); break;
+      case PARAM_POST:     SAY("  POST"); break;
+      case PARAM_POST_F:   SAY("  FILE"); break;
+      case PARAM_POST_O:   SAY("  OPAQ"); break;
+      case PARAM_HEADER:   SAY("  head"); break;
+      case PARAM_COOKIE:   SAY("  cook"); break;
+      default:             SAY("  ????");
+    }
+
+    SAY(":%-20s = '%s'\n",
+        par->n[i] ? par->n[i] : (u8*)"-",
+        par->v[i] ? par->v[i] : (u8*)"-");
+
+  }
+}
+
+
+/* Creates a working copy of a request. If all is 0, does not copy
+   path, query parameters, or POST data (but still copies headers). */
+
+struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv,
+                              u8 all) {
+  struct http_request* ret;
+  u32 i;
+
+  if (!req) return NULL;
+
+  ret = ck_alloc(sizeof(struct http_request));
+
+  ret->proto  = req->proto;
+
+  if (all)
+    ret->method = ck_strdup(req->method);
+  else
+    ret->method = ck_strdup((u8*)"GET");
+
+  ret->host     = ck_strdup(req->host);
+  ret->addr     = req->addr;
+  ret->port     = req->port;
+  ret->pivot    = pv;
+  ret->user_val = req->user_val;
+
+  /* Copy all the requested data. */
+
+  for (i=0;i<req->par.c;i++)
+    if (all || HEADER_SUBTYPE(req->par.t[i]))
+      set_value(req->par.t[i], req->par.n[i], req->par.v[i], -1,
+                &ret->par);
+
+  memcpy(&ret->same_sig, &req->same_sig, sizeof(struct http_sig));
+
+  return ret;
+
+}
+
+
+/* Creates a copy of a response. */
+
+struct http_response* res_copy(struct http_response* res) {
+  struct http_response* ret;
+  u32 i;
+
+  if (!res) return NULL;
+
+  ret = ck_alloc(sizeof(struct http_response));
+
+  ret->state = res->state;
+  ret->code  = res->code;
+  ret->msg   = res->msg ? ck_strdup(res->msg) : NULL;
+  ret->warn  = res->warn;
+
+  for (i=0;i<res->hdr.c;i++)
+    set_value(res->hdr.t[i], res->hdr.n[i], res->hdr.v[i], -1, &ret->hdr);
+
+  ret->pay_len = res->pay_len;
+
+  if (res->pay_len) {
+    ret->payload = ck_alloc(res->pay_len);
+    memcpy(ret->payload, res->payload, res->pay_len);
+  }
+
+  memcpy(&ret->sig, &res->sig, sizeof(struct http_sig));
+
+  ret->sniff_mime_id = res->sniff_mime_id;
+  ret->decl_mime_id  = res->decl_mime_id;
+  ret->doc_type      = res->doc_type;
+  ret->css_type      = res->css_type;
+  ret->js_type       = res->js_type;
+  ret->json_safe     = res->json_safe;
+  ret->stuff_checked = res->stuff_checked;
+  ret->scraped       = res->scraped;
+
+  if (res->meta_charset)
+    ret->meta_charset = ck_strdup(res->meta_charset);
+
+  if (res->header_charset)
+    ret->header_charset = ck_strdup(res->header_charset);
+
+  if (res->header_mime)
+    ret->header_mime = ck_strdup(res->header_mime);
+
+  ret->sniffed_mime = res->sniffed_mime;
+
+  return ret;
+
+}
+
+
+/* Dumps HTTP request data, for diagnostic purposes: */
+
+void dump_http_request(struct http_request* r) {
+
+  u8 *new_url, *tmp;
+
+  SAY("\n== HTTP REQUEST %p ==\n\nBasic values:\n", r);
+
+  SAY("  Proto   = %u\n", r->proto);
+  SAY("  Method  = %s\n", r->method ? r->method : (u8*)"(GET)");
+  SAY("  Host    = %s\n", r->host);
+  SAY("  Addr    = %u.%u.%u.%u\n", ((u8*)&r->addr)[0], ((u8*)&r->addr)[1],
+                                      ((u8*)&r->addr)[2], ((u8*)&r->addr)[3]);
+  SAY("  Port    = %d\n", r->port);
+  SAY("  Xrefs   = pivot %p, handler %p, user %d\n", r->pivot,
+      r->callback, r->user_val);
+
+  new_url = serialize_path(r, 1, 0);
+
+  SAY("\nURLs:\n  Original  = %s\n"
+      "  Synthetic = %s\n", r->orig_url ? r->orig_url : (u8*)"[none]",
+      new_url);
+
+  ck_free(new_url);
+
+  SAY("\nParameter array:\n");
+
+  dump_params(&r->par);
+
+  SAY("\nRaw request data:\n\n");
+
+  tmp = build_request_data(r);
+  SAY("%s\n",tmp);
+  ck_free(tmp);
+
+  SAY("\n== END OF REQUEST ==\n");
+
+}
+
+
+/* Dumps HTTP response data, likewise: */
+
+void dump_http_response(struct http_response* r) {
+
+  SAY("\n== HTTP RESPONSE %p ==\n\nBasic values:\n", r);
+
+  SAY("  State    = %u\n", r->state);
+  SAY("  Response = %u ('%s')\n", r->code, r->msg);
+  SAY("  Flags    = %08x\n", r->warn);
+  SAY("  Data len = %u\n", r->pay_len);
+
+  SAY("\nParameter array:\n");
+
+  dump_params(&r->hdr);
+
+  if (r->payload) SAY("\nPayload data (%u):\n\n%s\n", r->pay_len, r->payload);
+
+  SAY("\n== END OF RESPONSE ==\n");
+
+}
+
+/* Destroys http state information, for memory profiling. */
+
+void destroy_http() {
+  u32 i;
+  struct dns_entry* cur;
+
+  for (i=0;i<global_http_par.c;i++) {
+    ck_free(global_http_par.n[i]);
+    ck_free(global_http_par.v[i]);
+  }
+
+  ck_free(global_http_par.t);
+  ck_free(global_http_par.n);
+  ck_free(global_http_par.v);
+
+  while (conn) destroy_unlink_conn(conn,0 );
+  while (queue) destroy_unlink_queue(queue,0 );
+
+  cur = dns;
+
+  while (cur) {
+   struct dns_entry* next = cur->next;
+   ck_free(cur->name);
+   ck_free(cur);
+   cur = next;
+  }
+
+}
+
+
+/* Shows some pretty statistics. */
+
+void http_stats(u64 st_time) {
+  u64 en_time;
+  struct timeval tv;
+
+  gettimeofday(&tv, NULL);
+  en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
+
+  SAY("Scan statistics\n"
+      "---------------\n\n"
+      cGRA "       Scan time : " cNOR "%u:%02u:%02u.%04u\n"
+      cGRA "   HTTP requests : " cNOR "%u sent (%.02f/s), %.02f kB in, "
+                                      "%.02f kB out (%.02f kB/s)  \n"
+      cGRA "     Compression : " cNOR "%.02f kB in, %.02f kB out "
+                               "(%.02f%% gain)    \n"
+      cGRA " HTTP exceptions : " cNOR "%u net errors, %u proto errors, "
+                               "%u retried, %u drops\n"
+      cGRA " TCP connections : " cNOR "%u total (%.02f req/conn)  \n"
+      cGRA "  TCP exceptions : " cNOR "%u failures, %u timeouts, %u purged\n"
+      cGRA "  External links : " cNOR "%u skipped\n"
+      cGRA "    Reqs pending : " cNOR "%u        \n",
+
+      /* hrs */ (u32)((en_time - st_time) / 1000 / 60 / 60),
+      /* min */ (u32)((en_time - st_time) / 1000 / 60) % 60,
+      /* sec */ (u32)((en_time - st_time) / 1000) % 60,
+      /* ms  */ (u32)((en_time - st_time) % 1000),
+
+      req_count - queue_cur,
+      (float) (req_count - queue_cur / 1.15) * 1000 / (en_time - st_time + 1),
+      (float) bytes_recv / 1024, (float) bytes_sent / 1024,
+      (float) (bytes_recv + bytes_sent) / 1.024 / (en_time - st_time + 1),
+
+      (float) bytes_deflated / 1024, (float) bytes_inflated / 1024,
+      ((float) bytes_inflated - bytes_deflated) / (bytes_inflated +
+      bytes_deflated + 1) * 100,
+
+      req_errors_net, req_errors_http, req_retried, req_dropped,
+
+      conn_count, (float) req_count / conn_count,
+      conn_failed, conn_busy_tmout, conn_idle_tmout,
+      url_scope, queue_cur);
+}
diff --git a/http_client.h b/http_client.h
new file mode 100644
index 0000000..e2fb405
--- /dev/null
+++ b/http_client.h
@@ -0,0 +1,418 @@
+/*
+   skipfish - high-performance, single-process asynchronous HTTP client
+   --------------------------------------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_HTTP_CLIENT_H
+#define _HAVE_HTTP_CLIENT_H
+
+#include <openssl/ssl.h>
+
+#include "config.h"
+#include "types.h"
+#include "alloc-inl.h"
+#include "string-inl.h"
+
+/* Generic type-name-value array, used for HTTP headers, etc: */
+
+struct param_array {
+  u8*  t;                       /* Type  */
+  u8** n;                       /* Name  */
+  u8** v;                       /* Value */
+  u32  c;                       /* Count */
+};
+
+/* Flags for http_request protocol: */
+
+#define PROTO_NONE      0       /* Illegal value                */
+#define PROTO_HTTP      1       /* Plain-text HTTP              */
+#define PROTO_HTTPS     2       /* TLS/SSL wrapper              */
+
+/* Flags for http_request parameter list entries: */
+
+#define PARAM_NONE      0       /* Empty parameter slot         */
+
+#define PARAM_PATH      10      /* Path or parametrized path    */
+#define PARAM_PATH_S    11      /* - Semicolon element          */
+#define PARAM_PATH_C    12      /* - Comma element              */
+#define PARAM_PATH_E    13      /* - Exclamation mark element   */
+#define PARAM_PATH_D    14      /* - Dollar sign element        */
+
+#define PATH_SUBTYPE(_x) ((_x) >= PARAM_PATH && (_x) < PARAM_QUERY)
+
+#define PARAM_QUERY     20      /* Query parameter              */
+#define PARAM_QUERY_S   21      /* - Semicolon element          */
+#define PARAM_QUERY_C   22      /* - Comma element              */
+#define PARAM_QUERY_E   23      /* - Exclamation mark element   */
+#define PARAM_QUERY_D   24      /* - Dollar sign element        */
+
+#define QUERY_SUBTYPE(_x) ((_x) >= PARAM_QUERY && (_x) < PARAM_POST)
+
+#define PARAM_POST      50      /* Post parameter               */
+#define PARAM_POST_F    51      /* - File field                 */
+#define PARAM_POST_O    52      /* - Non-standard (e.g., JSON)  */
+
+#define POST_SUBTYPE(_x) ((_x) >= PARAM_POST && (_x) < PARAM_HEADER)
+
+#define PARAM_HEADER    100     /* Generic HTTP header          */
+#define PARAM_COOKIE    101     /* - HTTP cookie                */
+
+#define HEADER_SUBTYPE(_x) ((_x) >= PARAM_HEADER)
+
+struct http_response;
+struct queue_entry;
+
+/* HTTP response signature. */
+
+struct http_sig {
+  u32 code;                     /* HTTP response code           */
+  u32 data[FP_SIZE];            /* Response fingerprint data    */
+};
+
+/* HTTP request descriptor: */
+
+struct http_request {
+
+  u8  proto;                    /* Protocol (PROTO_*)           */
+  u8* method;                   /* HTTP method (GET, POST, ...) */
+  u8* host;                     /* Host name                    */
+  u32 addr;                     /* Resolved IP address          */
+  u16 port;                     /* Port number to connect to    */
+
+  u8* orig_url;                 /* Copy of the original URL     */
+
+  struct param_array par;       /* Parameters, headers, cookies */
+
+  struct pivot_desc *pivot;     /* Pivot descriptor             */
+
+  u32 user_val;                 /* Can be used freely           */
+
+  u8 (*callback)(struct http_request*, struct http_response*);
+                                /* Callback to invoke when done */
+
+  struct http_sig same_sig;     /* Used by secondary ext fuzz.  */
+
+};
+
+/* Flags for http_response completion state: */
+
+#define STATE_NOTINIT   0       /* Request not sent             */
+#define STATE_CONNECT   1       /* Connecting...                */
+#define STATE_SEND      2       /* Sending request              */
+#define STATE_RECEIVE   3       /* Waiting for response         */
+
+#define STATE_OK        100     /* Proper fetch                 */
+#define STATE_DNSERR    101     /* DNS error                    */
+#define STATE_LOCALERR  102     /* Socket or routing error      */
+#define STATE_CONNERR   103     /* Connection failed            */
+#define STATE_RESPERR   104     /* Response not valid           */
+#define STATE_SUPPRESS  200     /* Dropped (limits / errors)    */
+
+/* Flags for http_response warnings: */
+
+#define WARN_NONE       0       /* No warnings                  */
+#define WARN_PARTIAL    1       /* Incomplete read              */
+#define WARN_TRAIL      2       /* Trailing request garbage     */
+#define WARN_CFL_HDR    4       /* Conflicting headers          */
+
+/* HTTP response descriptor: */
+
+struct http_response {
+
+  u32 state;                    /* HTTP convo state (STATE_*)   */
+  u32 code;                     /* HTTP response code           */
+  u8* msg;                      /* HTTP response message        */
+  u32 warn;                     /* Warning flags                */
+
+  u8  cookies_set;              /* Sets cookies?                */
+
+  struct param_array hdr;       /* Server header, cookie list   */
+
+  u32 pay_len;                  /* Response payload length      */
+  u8* payload;                  /* Response payload data        */
+
+  struct http_sig sig;          /* Response signature data      */
+
+  /* Various information populated by content checks: */
+
+  u8  sniff_mime_id;            /* Sniffed MIME (MIME_*)        */
+  u8  decl_mime_id;             /* Declared MIME (MIME_*)       */
+
+  u8* meta_charset;             /* META tag charset value       */
+  u8* header_charset;           /* Content-Type charset value   */
+  u8* header_mime;              /* Content-Type MIME type       */
+  u8* sniffed_mime;             /* Detected MIME type (ref)     */
+
+  /* Everything below is of interest to scrape_response() only: */
+
+  u8  doc_type;                 /* 0 - tbd, 1 - bin, 2 - ascii  */
+  u8  css_type;                 /* 0 - tbd, 1 - other, 2 - css  */
+  u8  js_type;                  /* 0 - tbd, 1 - other, 2 - js   */
+  u8  json_safe;                /* 0 - no, 1 - yes              */
+  u8  stuff_checked;            /* check_stuff() called?        */
+  u8  scraped;                  /* scrape_response() called?    */
+
+};
+
+/* Open keep-alive connection descriptor: */
+
+struct conn_entry {
+
+  s32 fd;                       /* The actual file descriptor   */
+
+  u8  proto;                    /* Protocol (PROTO_*)           */
+  u32 addr;                     /* Destination IP               */
+  u32 port;                     /* Destination port             */
+
+  u8  reused;                   /* Used for earier requests?    */
+
+  u32 req_start;                /* Unix time: request start     */
+  u32 last_rw;                  /* Unix time: last read / write */
+
+  SSL_CTX *srv_ctx;             /* SSL context                  */
+  SSL     *srv_ssl;
+  u8  SSL_rd_w_wr;              /* SSL_read() wants to write?   */
+  u8  SSL_wr_w_rd;              /* SSL_write() wants to read?   */
+  u8  ssl_checked;              /* SSL state checked?           */
+
+  u8* read_buf;                 /* Current read buffer          */
+  u32 read_len;
+  u8* write_buf;                /* Pending write buffer         */
+  u32 write_off;                /* Current write offset         */
+  u32 write_len;
+
+  struct queue_entry* q;        /* Current queue entry          */
+
+  struct conn_entry*  prev;     /* Previous connection entry    */
+  struct conn_entry*  next;     /* Next connection entry        */
+
+};
+
+/* Request queue descriptor: */
+
+struct queue_entry {
+  struct http_request*  req;    /* Request descriptor           */
+  struct http_response* res;    /* Response descriptor          */
+  struct conn_entry*    c;      /* Connection currently used    */
+  struct queue_entry*   prev;   /* Previous queue entry         */
+  struct queue_entry*   next;   /* Next queue entry             */
+};
+
+/* DNS cache item: */
+
+struct dns_entry {
+  u8* name;                     /* Name requested               */
+  u32 addr;                     /* IP address (0 = bad host)    */
+  struct dns_entry* next;       /* Next cache entry             */
+};
+
+
+/* Simplified macros to manipulate param_arrays: */
+
+#define ADD(_ar,_t,_n,_v) do { \
+    u32 _cur = (_ar)->c++; \
+    (_ar)->t = ck_realloc((_ar)->t, (_ar)->c); \
+    (_ar)->n = ck_realloc((_ar)->n, (_ar)->c * sizeof(u8*)); \
+    (_ar)->v = ck_realloc((_ar)->v, (_ar)->c * sizeof(u8*)); \
+    (_ar)->t[cur] = _t; \
+    (_ar)->n[cur] = (_n) ? ck_strdup(_n) : 0; \
+    (_ar)->v[cur] = (_v) ? ck_strdup(_v) : 0; \
+  } while (0)
+
+#define FREE(_ar) do { \
+    while ((_ar)->c--) { \
+      free((_ar)->n[(_ar)->c]); \
+      free((_ar)->v[(_ar)->c]); \
+    } \
+    free((_ar)->t); \
+    free((_ar)->n); \
+    free((_ar)->v); \
+  } while (0)
+
+
+/* Extracts parameter value from param_array. Name is matched if
+   non-NULL. Returns pointer to value data, not a duplicate string;
+   NULL if no match found. */
+
+u8* get_value(u8 type, u8* name, u32 offset, struct param_array* par);
+
+/* Inserts or overwrites parameter value in param_array. If offset
+   == -1, will append parameter to list. Duplicates strings,
+   name and val can be NULL. */
+
+void set_value(u8 type, u8* name, u8* val, s32 offset, struct param_array* par);
+
+/* Simplified macros for value table access: */
+
+#define GET_HDR(_name, _p)       get_value(PARAM_HEADER, _name, 0, _p)
+#define SET_HDR(_name, _val, _p) set_value(PARAM_HEADER, _name, _val, -1, _p)
+#define GET_CK(_name, _p)        get_value(PARAM_COOKIE, _name, 0, _p)
+#define SET_CK(_name, _val, _p)  set_value(PARAM_COOKIE, _name, _val, 0, _p)
+
+void tokenize_path(u8* str, struct http_request* req, u8 add_slash);
+
+/* Convert a fully-qualified or relative URL string to a proper http_request
+   representation. Returns 0 on success, 1 on format error. */
+
+u8 parse_url(u8* url, struct http_request* req, struct http_request* ref);
+
+/* URL-decodes a string. 'Plus' parameter governs the behavior on +
+   signs (as they have a special meaning only in query params, not in path). */
+
+u8* url_decode_token(u8* str, u32 len, u8 plus);
+
+/* URL-encodes a string according to custom rules. The assumption here is that
+   the data is already tokenized as "special" boundaries such as ?, =, &, /,
+   ;, so these characters must always be escaped if present in tokens. We
+   otherwise let pretty much everything else go through, as it may help with
+   the exploitation of certain vulnerabilities. */
+
+u8* url_encode_token(u8* str, u32 len);
+
+/* Reconstructs URI from http_request data. Includes protocol and host
+   if with_host is non-zero. */
+
+u8* serialize_path(struct http_request* req, u8 with_host, u8 with_post);
+
+/* Looks up IP for a particular host, returns data in network order.
+   Uses standard resolver, so it is slow and blocking, but we only
+   expect to call it a couple of times. */
+
+u32 maybe_lookup_host(u8* name);
+
+/* Creates an ad hoc DNS cache entry, to override NS lookups. */
+
+void fake_host(u8* name, u32 addr);
+
+/* Schedules a new asynchronous request; req->callback() will be invoked when
+   the request is completed. */
+
+void async_request(struct http_request* req);
+
+/* Prepares a serialized HTTP buffer to be sent over the network. */
+
+u8* build_request_data(struct http_request* req);
+
+/* Parses a network buffer containing raw HTTP response received over the
+   network ('more' == the socket is still available for reading). Returns 0
+   if response parses OK, 1 if more data should be read from the socket,
+   2 if the response seems invalid. */
+
+u8 parse_response(struct http_request* req, struct http_response* res, u8* data,
+                  u32 data_len, u8 more);
+
+/* Processes the queue. Returns the number of queue entries remaining,
+   0 if none. Will do a blocking select() to wait for socket state changes
+   (or timeouts) if no data available to process. This is the main
+   routine for the scanning loop. */
+
+u32 next_from_queue(void);
+
+/* Dumps HTTP request stats, for debugging purposes: */
+
+void dump_http_request(struct http_request* r);
+
+/* Dumps HTTP response stats, for debugging purposes: */
+
+void dump_http_response(struct http_response* r);
+
+/* Fingerprints a response: */
+
+void fprint_response(struct http_response* res);
+
+/* Performs a deep free() of sturct http_request */
+
+void destroy_request(struct http_request* req);
+
+/* Performs a deep free() of sturct http_response */
+
+void destroy_response(struct http_response* res);
+
+/* Creates a working copy of a request. If all is 0, does not copy
+   path, query parameters, or POST data (but still copies headers). */
+
+struct http_request* req_copy(struct http_request* req, struct pivot_desc* pv,
+                              u8 all);
+
+/* Creates a copy of a response. */
+
+struct http_response* res_copy(struct http_response* res);
+
+/* Various settings and counters exported to other modules: */
+
+extern u32 max_connections,
+           max_conn_host,
+           max_requests,
+           max_fail,
+           idle_tmout,
+           resp_tmout,
+           rw_tmout,
+           size_limit,
+           req_errors_net,
+           req_errors_http,
+           req_errors_cur,
+           req_count,
+           req_dropped,
+           req_retried,
+           url_scope,
+           conn_count,
+           conn_idle_tmout,
+           conn_busy_tmout,
+           conn_failed,
+           queue_cur;
+
+extern u64 bytes_sent,
+           bytes_recv,
+           bytes_deflated,
+           bytes_inflated;
+
+extern u8  ignore_cookies;
+
+/* Flags for browser type: */
+
+#define BROWSER_FAST    0       /* Minimimal HTTP headers       */
+#define BROWSER_MSIE    1       /* Try to mimic MSIE            */
+#define BROWSER_FFOX    2       /* Try to mimic Firefox         */
+
+extern u8 browser_type;
+
+/* Flags for authentication type: */
+
+#define AUTH_NONE       0       /* No authentication            */
+#define AUTH_BASIC      1       /* 'Basic' HTTP auth            */
+
+extern u8 auth_type;
+
+extern u8 *auth_user,
+          *auth_pass;
+
+/* Global HTTP cookies, extra headers: */
+
+extern struct param_array global_http_par;
+
+/* Destroys http state information, for memory profiling. */
+
+void destroy_http();
+
+/* Shows some pretty statistics. */
+
+void http_stats(u64 st_time);
+
+#endif /* !_HAVE_HTTP_CLIENT_H */
diff --git a/report.c b/report.c
new file mode 100644
index 0000000..3698706
--- /dev/null
+++ b/report.c
@@ -0,0 +1,779 @@
+/*
+   skipfish - post-processing and reporting
+   ----------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <dirent.h>
+#include <sys/fcntl.h>
+
+#include "debug.h"
+#include "config.h"
+#include "types.h"
+#include "http_client.h"
+#include "database.h"
+#include "crawler.h"
+#include "analysis.h"
+
+/* Pivot and issue signature data. */
+
+struct p_sig_desc {
+  u8 type;                               /* Pivot type             */
+  struct http_sig* res_sig;              /* Response signature     */
+  u32 issue_sig;                         /* Issues fingerprint     */
+  u32 child_sig;                         /* Children fingerprint   */
+};
+
+
+static struct p_sig_desc* p_sig;
+static u32 p_sig_cnt;
+u8 suppress_dupes;
+
+
+/* Response, issue sample data. */
+
+struct mime_sample_desc {
+  u8* det_mime;
+  struct http_request** req;
+  struct http_response** res;
+  u32 sample_cnt;
+};
+
+
+struct issue_sample_desc {
+  u32 type;
+  struct issue_desc** i;
+  u32 sample_cnt;
+};
+
+static struct mime_sample_desc* m_samp;
+static struct issue_sample_desc* i_samp;
+static u32 m_samp_cnt, i_samp_cnt;
+
+
+/* qsort() helper for sort_annotate_pivot(). */
+
+static int pivot_compar(const void* par1, const void* par2) {
+  const struct pivot_desc *p1 = *(struct pivot_desc**)par1,
+                          *p2 = *(struct pivot_desc**)par2;
+  return strcasecmp((char*)p1->name, (char*)p2->name);
+}
+
+static int issue_compar(const void* par1, const void* par2) {
+  const struct issue_desc *i1 = par1, *i2 = par2;
+  return i2->type - i1->type;
+}
+
+
+/* Recursively annotates and sorts pivots. */
+
+static void sort_annotate_pivot(struct pivot_desc* pv) {
+  u32 i, path_child = 0;
+  static u32 proc_cnt;
+  u8 *q1, *q2;
+
+  /* Add notes to all non-dir nodes with dir or file children... */
+
+  for (i=0;i<pv->child_cnt;i++) {
+    if (pv->child[i]->type == PIVOT_FILE || pv->child[i]->type == PIVOT_DIR) path_child = 1;
+    sort_annotate_pivot(pv->child[i]); 
+  }
+
+  if (pv->type != PIVOT_DIR && pv->type != PIVOT_SERV &&
+      pv->type != PIVOT_ROOT && path_child)
+    problem(PROB_NOT_DIR, pv->req, pv->res, 0, pv, 0);
+
+  /* Non-parametric nodes with digits in the name were not brute-forced,
+     but the user might be interested in doing so. Skip images here. */
+
+  if (pv->fuzz_par == -1 && pv->res &&
+      (pv->res->sniff_mime_id < MIME_IMG_JPEG ||
+      pv->res->sniff_mime_id > MIME_AV_WMEDIA) && 
+      (pv->type == PIVOT_DIR || pv->type == PIVOT_FILE ||
+      pv->type == PIVOT_PATHINFO) && !pv->missing) {
+    i = strlen((char*)pv->name);
+    while (i--)
+      if (isdigit(pv->name[i])) {
+        problem(PROB_FUZZ_DIGIT, pv->req, pv->res, 0, pv, 0);
+        break;
+      }
+  }
+
+  /* Parametric nodes that seem to contain queries in parameters, and are not
+     marked as bogus_par, should be marked as dangerous. */
+
+  if (pv->fuzz_par != -1 && !pv->bogus_par &&
+      (((q1 = (u8*)strchr((char*)pv->req->par.v[pv->fuzz_par], '(')) &&
+        (q2 = (u8*)strchr((char*)pv->req->par.v[pv->fuzz_par], ')')) && q1 < q2)
+      ||
+      ((inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"SELECT ") || 
+        inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"DELETE ") ) &&
+        inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" FROM ")) ||
+      (inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"UPDATE ") || 
+      inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" WHERE ")) ||
+      inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)"DROP TABLE ") ||
+      inl_strcasestr(pv->req->par.v[pv->fuzz_par], (u8*)" ORDER BY ")))
+    problem(PROB_SQL_PARAM, pv->req, pv->res, 0, pv, 0);
+
+  /* Sort children nodes and issues as appropriate. */
+
+  if (pv->child_cnt > 1)
+    qsort(pv->child, pv->child_cnt, sizeof(struct pivot_desc*), pivot_compar);
+
+  if (pv->issue_cnt > 1)
+    qsort(pv->issue, pv->issue_cnt, sizeof(struct issue_desc), issue_compar);
+
+  if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) {
+    SAY(cLGN "\r[+] " cNOR "Sorting and annotating crawl nodes: %u", proc_cnt);
+    fflush(0);
+  }
+
+}
+
+
+/* Issue extra hashing helper. */
+
+static inline u32 hash_extra(u8* str) {
+  register u32 ret = 0;
+  register u8  cur;
+
+  if (str)
+    while ((cur=*str)) {
+      ret = ~ret ^ (cur) ^
+            (cur << 5)   ^ (~cur >> 5) ^
+            (cur << 10)  ^ (~cur << 15) ^
+            (cur << 20)  ^ (~cur << 25) ^
+            (cur << 30);
+      str++;
+    }
+
+  return ret;
+}
+
+
+
+/* Registers a new pivot signature, or updates an existing one. */
+
+static void maybe_add_sig(struct pivot_desc* pv) {
+  u32 i, issue_sig = ~pv->issue_cnt, 
+         child_sig = ~pv->child_cnt;
+
+  if (!pv->res) return;
+
+  /* Compute a rough children node signature based on children types. */
+
+  for (i=0;i<pv->child_cnt;i++)
+    child_sig ^= (hash_extra(pv->child[i]->name) ^ 
+                  pv->child[i]->type) << (i % 16);
+
+  /* Do the same for all recorded issues. */
+
+  for (i=0;i<pv->issue_cnt;i++)
+    issue_sig ^= (hash_extra(pv->issue[i].extra) ^ 
+                 pv->issue[i].type) << (i % 16);
+
+  /* Assign a simplified signature to the pivot. */
+
+  pv->pv_sig = (pv->type << 16) ^ ~child_sig ^ issue_sig;
+
+  /* See if a matching signature already exists. */
+
+  for (i=0;i<p_sig_cnt;i++) 
+    if (p_sig[i].type == pv->type && p_sig[i].issue_sig == issue_sig &&
+        p_sig[i].child_sig == child_sig &&
+        same_page(p_sig[i].res_sig, &pv->res->sig)) {
+
+      pv->dupe = 1;
+      return;
+
+    }
+
+  /* No match - create a new one. */
+
+  p_sig = ck_realloc(p_sig, (p_sig_cnt + 1) * sizeof(struct p_sig_desc));
+
+  p_sig[p_sig_cnt].type      = pv->type;
+  p_sig[p_sig_cnt].res_sig   = &pv->res->sig;
+  p_sig[p_sig_cnt].issue_sig = issue_sig;
+  p_sig[p_sig_cnt].child_sig = child_sig;
+  p_sig_cnt++;
+
+}
+
+
+
+
+/* Recursively collects unique signatures for pivots. */
+
+static void collect_signatures(struct pivot_desc* pv) {
+  u32 i;
+  static u32 proc_cnt;
+
+  maybe_add_sig(pv);
+  for (i=0;i<pv->child_cnt;i++) collect_signatures(pv->child[i]);
+
+  if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) {
+    SAY(cLGN "\r[+] " cNOR "Looking for duplicate entries: %u", proc_cnt);
+    fflush(0);
+  }
+
+}
+
+
+/* Destroys signature data (for memory profiling purposes). */
+
+void destroy_signatures(void) {
+  u32 i;
+
+  ck_free(p_sig);
+
+  for (i=0;i<m_samp_cnt;i++) {
+    ck_free(m_samp[i].req);
+    ck_free(m_samp[i].res);
+  }
+
+  for (i=0;i<i_samp_cnt;i++) 
+    ck_free(i_samp[i].i);
+
+  ck_free(m_samp);
+  ck_free(i_samp);
+}
+
+
+/* Prepares issue, pivot stats, backtracing through all children.
+   Do not count nodes that seem duplicate. */
+
+static void compute_counts(struct pivot_desc* pv) {
+  u32 i;
+  struct pivot_desc* tmp = pv->parent;
+  static u32 proc_cnt;
+
+  for (i=0;i<pv->child_cnt;i++) compute_counts(pv->child[i]);
+
+  if (pv->dupe) return;
+
+  while (tmp) {
+    tmp->total_child_cnt++;
+    tmp = tmp->parent;
+  }
+
+  for (i=0;i<pv->issue_cnt;i++) {
+    u8 sev = PSEV(pv->issue[i].type);
+    tmp = pv;
+    while (tmp) {
+      tmp->total_issues[sev]++;
+      tmp = tmp->parent;
+    }
+  }
+
+  if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) {
+    SAY(cLGN "\r[+] " cNOR "Counting unique issues: %u", proc_cnt);
+    fflush(0);
+  }
+
+}
+
+
+/* Helper to JS-escape data. Static buffer, will be destroyed on
+   subsequent calls. */
+
+static inline u8* js_escape(u8* str) {
+  u32 len;
+  static u8* ret;
+  u8* opos;
+
+  if (!str) return (u8*)"[none]";
+
+  len = strlen((char*)str);
+
+  if (ret) free(ret);
+  opos = ret = __DFL_ck_alloc(len * 4 + 1);
+
+  while (len--) {
+    if (*str > 0x1f && *str < 0x80 && !strchr("<>\\'\"", *str)) {
+      *(opos++) = *(str++);
+    } else {
+      sprintf((char*)opos, "\\x%02x", *(str++));
+      opos += 4;
+    }
+  }
+
+  *opos = 0;
+
+  return ret;
+
+}
+
+
+static void output_scan_info(u64 scan_time, u32 seed) {
+  FILE* f;
+  time_t t = time(NULL);
+  u8* ct = (u8*)ctime(&t);
+
+  if (isspace(ct[strlen((char*)ct)-1]))
+    ct[strlen((char*)ct)-1] = 0;
+
+  f = fopen("summary.js", "w");
+  if (!f) PFATAL("Cannot open 'summary.js'");
+
+  fprintf(f, "var sf_version = '%s';\n", VERSION);
+  fprintf(f, "var scan_date  = '%s';\n", js_escape(ct));
+  fprintf(f, "var scan_seed  = '0x%08x';\n", seed);
+  fprintf(f, "var scan_ms    = %llu;\n", (long long)scan_time);
+
+  fclose(f);
+
+}
+
+
+/* Helper to save request, response data. */
+
+static void describe_res(FILE* f, struct http_response* res) {
+
+  if (!res) {
+    fprintf(f, "'fetched': false, 'error': 'Content not fetched'");
+    return;
+  }
+  
+  switch (res->state) {
+
+    case 0 ... STATE_OK - 1:
+      fprintf(f, "'fetched': false, 'error': '(Reported while fetch in progress)'");
+      break;
+
+    case STATE_OK:
+      fprintf(f, "'fetched': true, 'code': %u, 'len': %u, 'decl_mime': '%s', ",
+                 res->code, res->pay_len,
+                 js_escape(res->header_mime));
+
+      fprintf(f, "'sniff_mime': '%s', 'cset': '%s'", 
+                 res->sniffed_mime ? res->sniffed_mime : (u8*)"[none]",
+                 js_escape(res->header_charset ? res->header_charset 
+                 : res->meta_charset));
+      break;
+
+    case STATE_DNSERR:
+      fprintf(f, "'fetched': false, 'error': 'DNS error'");
+      break;
+
+    case STATE_LOCALERR:
+      fprintf(f, "'fetched': false, 'error': 'Local network error'");
+      break;
+
+    case STATE_CONNERR:
+      fprintf(f, "'fetched': false, 'error': 'Connection error'");
+      break;
+
+    case STATE_RESPERR:
+      fprintf(f, "'fetched': false, 'error': 'Malformed HTTP response'");
+      break;
+
+    case STATE_SUPPRESS:
+      fprintf(f, "'fetched': false, 'error': 'Limits exceeded'");
+      break;
+
+
+    default:
+      fprintf(f, "'fetched': false, 'error': 'Unknown error'");
+
+  }
+
+}
+
+
+/* Helper to save request, response data. */
+
+static void save_req_res(struct http_request* req, struct http_response* res, u8 sample) {
+  FILE* f;
+
+  if (req) {
+    u8* rd = build_request_data(req);
+    f = fopen("request.dat", "w");
+    if (!f) PFATAL("Cannot create 'request.dat'");
+    fwrite(rd, strlen((char*)rd), 1, f);
+    fclose(f);
+    ck_free(rd);
+  }
+
+  if (res && res->state == STATE_OK) {
+    u32 i;
+    f = fopen("response.dat", "w");
+    if (!f) PFATAL("Cannot create 'response.dat'");
+    fprintf(f, "HTTP/1.1 %u %s\n", res->code, res->msg);
+
+    for (i=0;i<res->hdr.c;i++)
+      if (res->hdr.t[i] == PARAM_HEADER)
+        fprintf(f, "%s: %s\n", res->hdr.n[i], res->hdr.v[i]);
+      else
+        fprintf(f, "Set-Cookie: %s=%s\n", res->hdr.n[i], res->hdr.v[i]);
+
+    fprintf(f, "\n");
+    fwrite(res->payload, res->pay_len, 1, f);
+    fclose(f);
+
+    /* Also collect MIME samples at this point. */
+
+    if (!req->pivot->dupe && res->sniffed_mime && sample) {
+
+      for (i=0;i<m_samp_cnt;i++) 
+        if (!strcmp((char*)m_samp[i].det_mime, (char*)res->sniffed_mime)) break;
+
+      if (i == m_samp_cnt) {
+        m_samp = ck_realloc(m_samp, (i + 1) * sizeof(struct mime_sample_desc));
+        m_samp[i].det_mime = res->sniffed_mime;
+        m_samp_cnt++;
+      } else {
+        u32 c;
+
+        /* If we already have something that looks very much the same on the
+           list, don't bother reporting it again. */
+
+        for (c=0;c<m_samp[i].sample_cnt;c++)
+          if (same_page(&m_samp[i].res[c]->sig, &res->sig)) return;
+      }
+
+      m_samp[i].req = ck_realloc(m_samp[i].req, (m_samp[i].sample_cnt + 1) *
+                                 sizeof(struct http_request*));
+      m_samp[i].res = ck_realloc(m_samp[i].res, (m_samp[i].sample_cnt + 1) *
+                                 sizeof(struct http_response*));
+      m_samp[i].req[m_samp[i].sample_cnt] = req;
+      m_samp[i].res[m_samp[i].sample_cnt] = res;
+      m_samp[i].sample_cnt++;
+
+    }
+
+  }
+
+}
+
+
+/* Dumps the actual crawl data. */
+
+static void output_crawl_tree(struct pivot_desc* pv) {
+  u32 i;
+  FILE* f;
+  static u32 proc_cnt;
+
+  /* Save request, response. */
+
+  save_req_res(pv->req, pv->res, 1);
+
+  /* Write children information. Don't crawl children just yet,
+     because we could run out of file descriptors on a particularly
+     deep tree if we keep one open and recurse. */
+
+  f = fopen("child_index.js", "w");
+  if (!f) PFATAL("Cannot create 'child_index.js'.");
+
+  fprintf(f, "var child = [\n");
+
+  for (i=0;i<pv->child_cnt;i++) {
+    u8 tmp[32];
+    u8* p;
+
+    if (suppress_dupes && pv->child[i]->dupe && 
+        !pv->child[i]->total_child_cnt) continue;
+
+    /* Also completely suppress nodes that seem identical to the
+       previous one, and have a common prefix (as this implies
+       a mod_rewrite or htaccess filter). */
+
+    if (i && pv->child[i-1]->pv_sig == pv->child[i]->pv_sig) {
+      u8 *pn = pv->child[i-1]->name, *cn = pv->child[i]->name;
+      u32 pnd = strcspn((char*)pn, ".");
+      if (!strncasecmp((char*)pn, (char*)cn, pnd)) continue;
+    }
+
+    sprintf((char*)tmp, "c%u", i);
+
+    fprintf(f, "  { 'dupe': %s, 'type': %u, 'name': '%s%s",
+            pv->child[i]->dupe ? "true" : "false",
+            pv->child[i]->type, js_escape(pv->child[i]->name),
+            (pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE)
+            ? (u8*)"" : (u8*)"=");
+
+    fprintf(f, "%s', 'dir': '%s', 'linked': %d, ",
+            (pv->child[i]->fuzz_par == -1 || pv->child[i]->type == PIVOT_VALUE)
+            ? (u8*)"" :
+            js_escape(pv->child[i]->req->par.v[pv->child[i]->fuzz_par]),
+            tmp, pv->child[i]->linked);
+
+    p = serialize_path(pv->child[i]->req, 1, 1);
+    fprintf(f, "'url': '%s', ", js_escape(p));
+    ck_free(p);
+
+    describe_res(f, pv->child[i]->res);
+
+    fprintf(f,", 'missing': %s, 'csens': %s, 'child_cnt': %u, "
+            "'issue_cnt': [ %u, %u, %u, %u, %u ] }%s\n", 
+            pv->child[i]->missing ? "true" : "false",
+            pv->child[i]->csens ? "true" : "false",
+            pv->child[i]->total_child_cnt, pv->child[i]->total_issues[1],
+            pv->child[i]->total_issues[2], pv->child[i]->total_issues[3],
+            pv->child[i]->total_issues[4], pv->child[i]->total_issues[5],
+            (i == pv->child_cnt - 1) ? "" : ",");
+  }
+
+  fprintf(f, "];\n");
+  fclose(f);
+
+  /* Write issue index, issue dumps. */
+
+  f = fopen("issue_index.js", "w");
+  if (!f) PFATAL("Cannot create 'issue_index.js'.");
+
+  fprintf(f, "var issue = [\n");
+
+  for (i=0;i<pv->issue_cnt;i++) {
+    u8 tmp[32];
+    sprintf((char*)tmp, "i%u", i);
+
+    fprintf(f, "  { 'severity': %u, 'type': %u, 'extra': '%s', ",
+            PSEV(pv->issue[i].type) - 1, pv->issue[i].type,
+            pv->issue[i].extra ? js_escape(pv->issue[i].extra) : (u8*)"");
+
+    describe_res(f, pv->issue[i].res);
+
+    fprintf(f, ", 'dir': '%s' }%s\n",
+            tmp, (i == pv->issue_cnt - 1) ? "" : ",");
+
+    if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp);
+    chdir((char*)tmp);
+    save_req_res(pv->issue[i].req, pv->issue[i].res, 1);
+    chdir((char*)"..");
+
+    /* Issue samples next! */
+
+    if (!pv->dupe) {
+      u32 c;
+      for (c=0;c<i_samp_cnt;c++) 
+        if (i_samp[c].type == pv->issue[i].type) break;
+
+      if (c == i_samp_cnt) {
+        i_samp = ck_realloc(i_samp, (c + 1) * sizeof(struct issue_sample_desc));
+        i_samp_cnt++;
+        i_samp[c].type = pv->issue[i].type;
+      }
+
+      i_samp[c].i = ck_realloc(i_samp[c].i, (i_samp[c].sample_cnt + 1) *
+                               sizeof(struct issue_desc*));
+      i_samp[c].i[i_samp[c].sample_cnt] = &pv->issue[i];
+      i_samp[c].sample_cnt++;
+    }
+
+  }
+
+  fprintf(f, "];\n");
+  fclose(f);
+
+  /* Actually crawl children. */
+
+  for (i=0;i<pv->child_cnt;i++) {
+    u8 tmp[32];
+    sprintf((char*)tmp, "c%u", i);
+    if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp);
+    chdir((char*)tmp);
+    output_crawl_tree(pv->child[i]);
+    chdir((char*)"..");
+  }
+
+  if ((!(proc_cnt++ % 50)) || pv->type == PIVOT_ROOT) {
+    SAY(cLGN "\r[+] " cNOR "Counting unique issues: %u", proc_cnt);
+    fflush(0);
+  }
+
+}
+
+
+/* Writes previews of MIME types, issues. */
+
+static int m_samp_qsort(const void* ptr1, const void* ptr2) {
+  const struct mime_sample_desc *p1 = ptr1, *p2 = ptr2;
+  return strcasecmp((char*)p1->det_mime, (char*)p2->det_mime);
+}
+
+static int i_samp_qsort(const void* ptr1, const void* ptr2) {
+  const struct issue_sample_desc *p1 = ptr1, *p2 = ptr2;
+  return p2->type - p1->type;
+}
+
+
+static void output_summary_views() {
+  u32 i;
+  FILE* f;
+
+  f = fopen("samples.js", "w");
+  if (!f) PFATAL("Cannot create 'samples.js'.");
+
+  qsort(m_samp, m_samp_cnt, sizeof(struct mime_sample_desc), m_samp_qsort);
+  qsort(i_samp, i_samp_cnt, sizeof(struct issue_sample_desc), i_samp_qsort);
+
+  fprintf(f, "var mime_samples = [\n");
+
+  for (i=0;i<m_samp_cnt;i++) {
+    u32 c;
+    u8 tmp[32];
+    u32 use_samp = (m_samp[i].sample_cnt > MAX_SAMPLES ? MAX_SAMPLES :
+         m_samp[i].sample_cnt);
+
+    sprintf((char*)tmp, "_m%u", i);
+    if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp);
+    chdir((char*)tmp);
+
+    fprintf(f, "  { 'mime': '%s', 'samples': [\n", m_samp[i].det_mime);
+
+    for (c=0;c<use_samp;c++) {
+      u8 tmp2[32];
+      u8* p = serialize_path(m_samp[i].req[c], 1, 0);
+      sprintf((char*)tmp2, "%u", c);
+      if (mkdir((char*)tmp2, 0755)) PFATAL("Cannot create '%s'.", tmp2);
+      chdir((char*)tmp2);
+      save_req_res(m_samp[i].req[c], m_samp[i].res[c], 0);
+      chdir("..");
+      fprintf(f, "    { 'url': '%s', 'dir': '%s/%s', 'linked': %d, 'len': %d"
+              " }%s\n", js_escape(p), tmp, tmp2,
+              m_samp[i].req[c]->pivot->linked, m_samp[i].res[c]->pay_len,
+              (c == use_samp - 1) ? " ]" : ",");
+      ck_free(p);
+    }
+
+    fprintf(f, "  }%s\n", (i == m_samp_cnt - 1) ? "" : ",");
+    chdir("..");
+  }
+
+  fprintf(f, "];\n\n");
+
+  fprintf(f, "var issue_samples = [\n");
+
+  for (i=0;i<i_samp_cnt;i++) {
+    u32 c;
+    u8 tmp[32];
+    u32 use_samp = (i_samp[i].sample_cnt > MAX_SAMPLES ? MAX_SAMPLES :
+         i_samp[i].sample_cnt);
+
+    sprintf((char*)tmp, "_i%u", i);
+    if (mkdir((char*)tmp, 0755)) PFATAL("Cannot create '%s'.", tmp);
+    chdir((char*)tmp);
+
+    fprintf(f, "  { 'severity': %d, 'type': %d, 'samples': [\n", 
+            PSEV(i_samp[i].type) - 1, i_samp[i].type);
+
+    for (c=0;c<use_samp;c++) {
+      u8 tmp2[32];
+      u8* p = serialize_path(i_samp[i].i[c]->req, 1, 0);
+      sprintf((char*)tmp2, "%u", c);
+      if (mkdir((char*)tmp2, 0755)) PFATAL("Cannot create '%s'.", tmp2);
+      chdir((char*)tmp2);
+      save_req_res(i_samp[i].i[c]->req, i_samp[i].i[c]->res, 0);
+      chdir("..");
+      fprintf(f, "    { 'url': '%s', ", js_escape(p));
+      fprintf(f, "'extra': '%s', 'dir': '%s/%s' }%s\n", 
+              i_samp[i].i[c]->extra ? js_escape(i_samp[i].i[c]->extra) : 
+              (u8*)"", tmp, tmp2, 
+              (c == use_samp - 1) ? " ]" : ",");
+      ck_free(p);
+    }
+
+    fprintf(f, "  }%s\n", (i == i_samp_cnt - 1) ? "" : ",");
+    chdir("..");
+  }
+
+  fprintf(f, "];\n\n");
+  fclose(f);
+
+}
+
+
+/* Copies over assets/... to target directory. */
+
+static u8* ca_out_dir;
+
+static int copy_asset(const struct dirent* d) {
+  u8 *itmp, *otmp, buf[1024];
+  s32 i, o;
+
+  if (d->d_name[0] == '.' || !strcmp(d->d_name, "COPYING")) return 0;
+
+  itmp = ck_alloc(6 + strlen(d->d_name) + 2);
+  sprintf((char*)itmp, "assets/%s", d->d_name);
+  i = open((char*)itmp, O_RDONLY);
+
+  otmp = ck_alloc(strlen((char*)ca_out_dir) + strlen(d->d_name) + 2);
+  sprintf((char*)otmp, "%s/%s", ca_out_dir, d->d_name);
+  o = open((char*)otmp, O_WRONLY | O_CREAT | O_EXCL, 0644);
+
+  if (i >= 0 && o >= 0) {
+    s32 c;
+    while ((c = read(i, buf, 1024)) > 0) write(o, buf, c);
+  } 
+
+  close(i);
+  close(o);
+
+  ck_free(itmp);
+  ck_free(otmp);
+
+  return 0;
+
+}
+
+
+static void copy_static_code(u8* out_dir) {
+  struct dirent** d;
+  ca_out_dir = out_dir;
+  scandir("assets", &d, copy_asset, NULL);
+}
+
+
+/* Writes report to index.html in the current directory. Will create
+   subdirectories, helper files, etc. */
+
+void write_report(u8* out_dir, u64 scan_time, u32 seed) {
+
+  SAY(cLGN "[+] " cNOR "Copying static resources...\n");
+  copy_static_code(out_dir);
+
+  if (chdir((char*)out_dir)) PFATAL("Cannot chdir to '%s'", out_dir);
+
+  sort_annotate_pivot(&root_pivot);
+  SAY("\n");
+
+  collect_signatures(&root_pivot);
+  SAY("\n");
+
+  compute_counts(&root_pivot);
+  SAY("\n");
+
+  SAY(cLGN "[+] " cNOR "Writing scan description...\n");
+  output_scan_info(scan_time, seed);
+
+  output_crawl_tree(&root_pivot);
+  SAY("\n");
+
+  SAY(cLGN "[+] " cNOR "Generating summary views...\n");
+  output_summary_views();
+
+  SAY(cLGN "[+] " cNOR "Report saved to '" cLBL "%s/index.html" cNOR "' ["
+      cLBL "0x%08x" cNOR "].\n", out_dir, seed);
+
+}
diff --git a/report.h b/report.h
new file mode 100644
index 0000000..291ed5d
--- /dev/null
+++ b/report.h
@@ -0,0 +1,38 @@
+/*
+   skipfish - post-processing and reporting
+   ----------------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_REPORT_H
+
+#include "types.h"
+
+extern u8 suppress_dupes;
+
+/* Writes report to index.html in the current directory. Will create
+   subdirectories, helper files, etc. */
+
+void write_report(u8* out_dir, u64 scan_time, u32 seed);
+
+/* Destroys all signatures created for pivot and issue clustering purposes. */
+
+void destroy_signatures(void);
+
+#endif /* !_HAVE_REPORT_H */
diff --git a/same_test.c b/same_test.c
new file mode 100644
index 0000000..946d804
--- /dev/null
+++ b/same_test.c
@@ -0,0 +1,84 @@
+/*
+   skipfish - same_page() test utility
+   -----------------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+
+#include "types.h"
+#include "alloc-inl.h"
+#include "string-inl.h"
+
+#include "crawler.h"
+#include "analysis.h"
+#include "database.h"
+#include "http_client.h"
+#include "report.h"
+
+#ifdef DEBUG_ALLOCATOR
+struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS];
+u32 __AD_trk_cnt[ALLOC_BUCKETS];
+#endif /* DEBUG_ALLOCATOR */
+
+#define MAX_LEN (1024*1024)
+
+u8 p1[MAX_LEN], p2[MAX_LEN];
+
+int main(int argc, char** argv) {
+  static struct http_response r1, r2;
+  s32 l1, l2;
+
+  l1 = read(8, p1, MAX_LEN);
+  l2 = read(9, p2, MAX_LEN);
+
+  if (l1 < 0 || l2 < 0)
+    FATAL("Usage: ./same_test 8<file1 9<file2");
+
+  r1.code = 123;
+  r2.code = 123;
+
+  r1.payload = p1;
+  r2.payload = p2;
+  r1.pay_len = l1;
+  r2.pay_len = l2;
+
+  fprint_response(&r1);
+  fprint_response(&r2);
+
+  debug_same_page(&r1.sig, &r2.sig);
+
+  if (same_page(&r1.sig, &r2.sig))
+    DEBUG("=== PAGES SEEM THE SAME ===\n");
+  else
+    DEBUG("=== PAGES ARE DIFFERENT ===\n");
+
+  return 0;
+
+}
diff --git a/skipfish.c b/skipfish.c
new file mode 100644
index 0000000..1151d18
--- /dev/null
+++ b/skipfish.c
@@ -0,0 +1,457 @@
+/*
+   skipfish - main entry point
+   ---------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <time.h>
+#include <sys/stat.h>
+
+#include "types.h"
+#include "alloc-inl.h"
+#include "string-inl.h"
+
+#include "crawler.h"
+#include "analysis.h"
+#include "database.h"
+#include "http_client.h"
+#include "report.h"
+
+#ifdef DEBUG_ALLOCATOR
+struct __AD_trk_obj* __AD_trk[ALLOC_BUCKETS];
+u32 __AD_trk_cnt[ALLOC_BUCKETS];
+#endif /* DEBUG_ALLOCATOR */
+
+
+void usage(char* argv0) {
+  SAY("Usage: %s [ options ... ] -o output_dir start_url [ start_url2 ... ]\n\n"
+
+      "Authentication and access options:\n\n"
+
+      "  -A user:pass   - use specified HTTP authentication credentials\n"
+      "  -F host:IP     - pretend that 'host' resolves to 'IP'\n"
+      "  -C name=val    - append a custom cookie to all requests\n"
+      "  -H name=val    - append a custom HTTP header to all requests\n"
+      "  -b (i|f)       - use headers consistent with MSIE / Firefox\n"
+      "  -N             - do not accept any new cookies\n\n"
+
+      "Crawl scope options:\n\n"
+
+      "  -d max_depth   - maximum crawl tree depth (%u)\n"
+      "  -c max_child   - maximum children to index per node (%u)\n"
+      "  -r r_limit     - max total number of requests to send (%u)\n"
+      "  -p crawl%%      - node and link crawl probability (100%%)\n"
+      "  -q hex         - repeat probabilistic scan with given seed\n"
+      "  -I string      - only follow URLs matching 'string'\n"
+      "  -X string      - exclude URLs matching 'string'\n"
+      "  -S string      - exclude pages containing 'string'\n"
+      "  -D domain      - crawl cross-site links to another domain\n"
+      "  -B domain      - trust, but do not crawl, another domain\n"
+      "  -O             - do not submit any forms\n"
+      "  -P             - do not parse HTML, etc, to find new links\n\n"
+
+      "Reporting options:\n\n"
+
+      "  -o dir         - write output to specified directory (required)\n"
+      "  -J             - be less noisy about MIME / charset mismatches\n"
+      "  -M             - log warnings about mixed content\n"
+      "  -E             - log all HTTP/1.0 / HTTP/1.1 caching intent mismatches\n"
+      "  -U             - log all external URLs and e-mails seen\n"
+      "  -Q             - completely suppress duplicate nodes in reports\n\n"
+
+      "Dictionary management options:\n\n"
+
+      "  -W wordlist    - load an alternative wordlist (%s)\n"
+      "  -L             - do not auto-learn new keywords for the site\n"
+      "  -V             - do not update wordlist based on scan results\n"
+      "  -Y             - do not fuzz extensions in directory brute-force\n"
+      "  -R age         - purge words hit more than 'age' scans ago\n"
+      "  -T name=val    - add new form auto-fill rule\n"
+      "  -G max_guess   - maximum number of keyword guesses to keep (%d)\n\n"
+
+      "Performance settings:\n\n"
+
+      "  -g max_conn    - max simultaneous TCP connections, global (%u)\n"
+      "  -m host_conn   - max simultaneous connections, per target IP (%u)\n"
+      "  -f max_fail    - max number of consecutive HTTP errors (%u)\n"
+      "  -t req_tmout   - total request response timeout (%u s)\n"
+      "  -w rw_tmout    - individual network I/O timeout (%u s)\n"
+      "  -i idle_tmout  - timeout on idle HTTP connections (%u s)\n"
+      "  -s s_limit     - response size limit (%u B)\n\n"
+
+      "Send comments and complaints to <lcamtuf@google.com>.\n", argv0,
+      max_depth, max_children, max_requests, DEF_WORDLIST, MAX_GUESSES,
+      max_connections, max_conn_host, max_fail, resp_tmout, rw_tmout,
+      idle_tmout, size_limit);
+
+  exit(1);
+}
+
+
+/* Ctrl-C handler... */
+
+static u8 stop_soon;
+
+static void ctrlc_handler(int sig) {
+  stop_soon = 1;
+}
+
+
+/* Main entry point */
+
+int main(int argc, char** argv) {
+  s32 opt;
+  u32 loop_cnt = 0, purge_age = 0, seed;
+  u8 dont_save_words = 0, show_once = 0;
+  u8 *wordlist = (u8*)DEF_WORDLIST, *output_dir = NULL;
+
+  struct timeval tv;
+  u64 st_time, en_time;
+
+  signal(SIGINT, ctrlc_handler);
+  signal(SIGPIPE, SIG_IGN);
+  SSL_library_init();
+
+  /* Come up with a quasi-decent random seed. */
+
+  gettimeofday(&tv, NULL);
+  seed = tv.tv_usec ^ (tv.tv_sec << 16) ^ getpid();
+
+  SAY("skipfish version " VERSION " by <lcamtuf@google.com>\n");
+
+  while ((opt = getopt(argc, argv,
+          "+A:F:C:H:b:Nd:c:r:p:I:X:S:D:PJOYQMUEW:LVT:G:R:B:q:g:m:f:t:w:i:s:o:")) > 0)
+
+    switch (opt) {
+
+      case 'A': {
+          u8* x = (u8*)strchr(optarg, ':');
+          if (!x) FATAL("Credentials must be in 'user:pass' form.");
+          *(x++) = 0;
+          auth_user = (u8*)optarg;
+          auth_pass = x;
+          auth_type = AUTH_BASIC;
+          break;
+        }
+
+      case 'F': {
+          u8* x = (u8*)strchr(optarg, '=');
+          u32 fake_addr;
+          if (!x) FATAL("Fake mappings must be in 'host=IP' form.");
+          *x = 0;
+          fake_addr = inet_addr((char*)x + 1);
+          if (fake_addr == (u32)-1)
+            FATAL("Could not parse IP address '%s'.", x + 1);
+          fake_host((u8*)optarg, fake_addr);
+          break;
+        }
+
+      case 'H': {
+          u8* x = (u8*)strchr(optarg, '=');
+          if (!x) FATAL("Extra headers must be in 'name=value' form.");
+          *x = 0;
+          if (!strcasecmp(optarg, "Cookie"))
+            FATAL("Do not use -H to set cookies (try -C instead).");
+          SET_HDR((u8*)optarg, x + 1, &global_http_par);
+          break;
+        }
+
+      case 'C': {
+          u8* x = (u8*)strchr(optarg, '=');
+          if (!x) FATAL("Cookies must be in 'name=value' form.");
+          if (strchr(optarg, ';'))
+            FATAL("Split multiple cookies into separate -C options.");
+          *x = 0;
+          SET_CK((u8*)optarg, x + 1, &global_http_par);
+          break;
+        }
+
+      case 'D':
+        if (*optarg == '*') optarg++;
+        APPEND_FILTER(allow_domains, num_allow_domains, optarg);
+        break;
+
+      case 'B':
+        if (*optarg == '*') optarg++;
+        APPEND_FILTER(trust_domains, num_trust_domains, optarg);
+        break;
+
+      case 'I':
+        if (*optarg == '*') optarg++;
+        APPEND_FILTER(allow_urls, num_allow_urls, optarg);
+        break;
+
+      case 'X':
+        if (*optarg == '*') optarg++;
+        APPEND_FILTER(deny_urls, num_deny_urls, optarg);
+        break;
+
+      case 'J':
+        relaxed_mime = 1;
+        break;
+
+      case 'S':
+        if (*optarg == '*') optarg++;
+        APPEND_FILTER(deny_strings, num_deny_strings, optarg);
+        break;
+
+      case 'T': {
+          u8* x = (u8*)strchr(optarg, '=');
+          if (!x) FATAL("Rules must be in 'name=value' form.");
+          *x = 0;
+          add_form_hint((u8*)optarg, x + 1);
+          break;
+        }
+
+      case 'N':
+        ignore_cookies = 1;
+        break;
+
+      case 'Y':
+        no_fuzz_ext = 1;
+        break;
+
+      case 'q':
+        if (sscanf(optarg, "0x%08x", &seed) != 1)
+          FATAL("Invalid seed format.");
+        srandom(seed);
+        break;
+
+      case 'Q':
+        suppress_dupes = 1;
+        break;
+
+      case 'P':
+        no_parse = 1;
+        break;
+
+      case 'V':
+        dont_save_words = 1;
+        break;
+
+      case 'M':
+        warn_mixed = 1;
+        break;
+
+      case 'U':
+        log_ext_urls = 1;
+        break;
+
+      case 'L':
+        dont_add_words = 1;
+        break;
+
+      case 'E':
+        pedantic_cache = 1;
+        break;
+
+      case 'O':
+        no_forms = 1;
+        break;
+
+      case 'R':
+        purge_age = atoi(optarg);
+        if (purge_age < 3) FATAL("Purge age invalid or too low (min 3).");
+        break;
+
+      case 'd':
+        max_depth = atoi(optarg);
+        if (max_depth < 2) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'c':
+        max_children = atoi(optarg);
+        if (!max_children) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'p':
+        crawl_prob = atoi(optarg);
+        if (!crawl_prob) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'W':
+        wordlist = (u8*)optarg;
+        break;
+
+      case 'b':
+        if (optarg[0] == 'i') browser_type = BROWSER_MSIE; else
+        if (optarg[0] == 'f') browser_type = BROWSER_FFOX; else
+          usage(argv[0]);
+        break;
+
+      case 'g':
+        max_connections = atoi(optarg);
+        if (!max_connections) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'm':
+        max_conn_host = atoi(optarg);
+        if (!max_conn_host) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'G':
+        max_guesses = atoi(optarg);
+        if (!max_guesses) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'r':
+        max_requests = atoi(optarg);
+        if (!max_requests) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'f':
+        max_fail = atoi(optarg);
+        if (!max_fail) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 't':
+        resp_tmout = atoi(optarg);
+        if (!resp_tmout) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'w':
+        rw_tmout = atoi(optarg);
+        if (!rw_tmout) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'i':
+        idle_tmout = atoi(optarg);
+        if (!idle_tmout) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 's':
+        size_limit = atoi(optarg);
+        if (!size_limit) FATAL("Invalid value '%s'.", optarg);
+        break;
+
+      case 'o':
+        if (output_dir) FATAL("Multiple -o options not allowed.");
+        output_dir = (u8*)optarg;
+
+        rmdir(optarg);
+
+        if (mkdir(optarg, 0755))
+          PFATAL("Unable to create '%s'.", output_dir);
+
+        break;
+
+      default:
+        usage(argv[0]);
+
+  }
+
+  if (access("assets/index.html", R_OK))
+    PFATAL("Unable to access 'assets/index.html' - wrong directory?");
+
+  srandom(seed);
+
+  if (optind == argc)
+    FATAL("Scan target not specified (try -h for help).");
+
+  if (!output_dir)
+    FATAL("Output directory not specified (try -h for help).");
+
+  if (resp_tmout < rw_tmout) 
+    resp_tmout = rw_tmout;
+
+  if (max_connections < max_conn_host)
+    max_connections = max_conn_host;
+
+  load_keywords((u8*)wordlist, purge_age);
+
+  /* Schedule all URLs in the command line for scanning */
+
+  while (optind < argc) {
+
+    struct http_request *req = ck_alloc(sizeof(struct http_request));
+
+    if (parse_url((u8*)argv[optind], req, NULL))
+      FATAL("One of specified scan targets is not a valid absolute URL.");
+
+    if (!url_allowed_host(req))
+      APPEND_FILTER(allow_domains, num_allow_domains,
+                    __DFL_ck_strdup(req->host));
+
+    if (!url_allowed(req))
+      FATAL("URL '%s' explicitly excluded by -I / -X rules.", argv[optind]);
+
+    maybe_add_pivot(req, NULL, 2);
+    destroy_request(req);
+
+    optind++;
+  }
+
+  gettimeofday(&tv, NULL);
+  st_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
+
+  SAY("\x1b[H\x1b[J");
+
+  while ((next_from_queue() && !stop_soon) || (!show_once++)) {
+
+    if ((loop_cnt++ % 20) && !show_once) continue;
+
+    SAY(cYEL "\x1b[H"
+           "skipfish version " VERSION " by <lcamtuf@google.com>\n\n" cNOR);
+
+    http_stats(st_time);
+    SAY("\n");
+    database_stats();
+    SAY("\n        \r");
+
+  }
+
+  gettimeofday(&tv, NULL);
+  en_time = tv.tv_sec * 1000 + tv.tv_usec / 1000;
+
+  if (stop_soon)
+    SAY(cYEL "[!] " cBRI "Scan aborted by user, bailing out!" cNOR "\n");
+
+  if (!dont_save_words) save_keywords((u8*)wordlist);
+
+  write_report(output_dir, en_time - st_time, seed);
+
+#ifdef LOG_STDERR
+  SAY("\n== PIVOT DEBUG ==\n");
+  dump_pivots(0, 0);
+  SAY("\n== END OF DUMP ==\n\n");
+#endif /* LOG_STDERR */
+
+  SAY(cLGN "[+] " cBRI "This was a great day for science!" cNOR "\n\n");
+
+#ifdef DEBUG_ALLOCATOR
+  if (!stop_soon) {
+    destroy_database();
+    destroy_http();
+    destroy_signatures();
+    __AD_report();
+  }
+#endif /* DEBUG_ALLOCATOR */
+
+  return 0;
+
+}
diff --git a/string-inl.h b/string-inl.h
new file mode 100644
index 0000000..82bfab2
--- /dev/null
+++ b/string-inl.h
@@ -0,0 +1,182 @@
+/*
+
+   skipfish - various string manipulation helpers
+   ----------------------------------------------
+
+   Some modern operating systems still ship with no strcasestr() or memmem()
+   implementations in place, for reasons beyond comprehension. This file
+   includes a simplified version of these routines, copied from NetBSD, plus
+   several minor, custom string manipulation macros and inline functions.
+
+   The original NetBSD code is licensed under a BSD license, as follows:
+
+   Copyright (c) 1990, 1993
+   The Regents of the University of California.  All rights reserved.
+
+   This code is derived from software contributed to Berkeley by
+   Chris Torek.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+   2. Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+   3. Neither the name of the University nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+   OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+   SUCH DAMAGE.
+
+ */
+
+#ifndef _HAVE_STRING_INL_H
+#define _HAVE_STRING_INL_H
+
+#include <ctype.h>
+#include <string.h>
+
+#include "types.h"
+
+
+/* Modified NetBSD strcasestr() implementation (rolling strncasecmp). */
+
+static inline u8* inl_strcasestr(const u8* haystack, const u8* needle) {
+  register u8 c, sc;
+  register u32 len;
+
+  if (!haystack || !needle) return 0;
+
+  if ((c = *needle++)) {
+
+    c = tolower(c);
+    len = strlen((char*)needle);
+
+    do {
+      do {
+        if (!(sc = *haystack++)) return 0;
+      } while (tolower(sc) != c);
+    } while (strncasecmp((char*)haystack, (char*)needle, len));
+
+    haystack--;
+
+  }
+
+  return (u8*)haystack;
+
+}
+
+
+/* Modified NetBSD memmem() implementation (rolling memcmp). */
+
+static inline void* inl_memmem(const void* haystack, u32 h_len,
+                               const void* needle, u32 n_len) {
+  register u8* sp = (u8*)haystack;
+  register u8* pp = (u8*)needle;
+  register u8* eos = sp + h_len - n_len;
+
+  if (!(haystack && needle && h_len && n_len)) return 0;
+
+  while (sp <= eos) {
+    if (*sp == *pp)
+      if (memcmp(sp, pp, n_len) == 0) return sp;
+    sp++;
+  }
+
+  return 0;
+
+}
+
+
+/* String manipulation macros for operating on a dynamic buffer. */
+
+#define NEW_STR(_buf_ptr, _buf_len) do { \
+    (_buf_ptr) = ck_alloc(1024); \
+    (_buf_len) = 0; \
+  } while (0)
+
+#define ADD_STR_DATA(_buf_ptr, _buf_len, _str) do { \
+    u32 _sl = strlen((char*)_str); \
+    if ((_buf_len) + (_sl) + 1 > malloc_usable_size(_buf_ptr)) { \
+      u32 _nsiz = ((_buf_len) + _sl + 1024) >> 10 << 10; \
+      (_buf_ptr)  = ck_realloc(_buf_ptr, _nsiz); \
+    } \
+    memcpy((_buf_ptr) + (_buf_len), _str, _sl + 1); \
+    (_buf_len) += _sl; \
+  } while (0)
+
+#define TRIM_STR(_buf_ptr, _buf_len) do { \
+    (_buf_ptr) = ck_realloc(_buf_ptr, _buf_len + 1); \
+    (_buf_ptr)[_buf_len] = 0; \
+  } while (0)
+
+
+/* Simple base64 encoder */
+
+static inline u8* b64_encode(u8* str, u32 len) {
+
+  const u8 b64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                     "abcdefghijklmnopqrstuvwxyz"
+                     "0123456789+/";
+
+  u8 *ret, *cur;
+
+  ret = cur = ck_alloc((len + 3) * 4 / 3 + 1);
+
+  while (len > 0) {
+
+    if (len >= 3) {
+      u32 comp = (str[0] << 16) | (str[1] << 8) | str[2];
+
+      *(cur++) = b64[comp >> 18];
+      *(cur++) = b64[(comp >> 12) & 0x3F];
+      *(cur++) = b64[(comp >> 6) & 0x3F];
+      *(cur++) = b64[comp & 0x3F];
+
+      len -= 3;
+      str += 3;
+
+    } else if (len == 2) {
+      u32 comp = (str[0] << 16) | (str[1] << 8);
+
+      *(cur++) = b64[comp >> 18];
+      *(cur++) = b64[(comp >> 12) & 0x3F];
+      *(cur++) = b64[(comp >> 6) & 0x3D];
+      *(cur++) = '=';
+
+      len -= 2;
+      str += 2;
+
+    } else {
+      u32 comp = (str[0] << 16);;
+
+      *(cur++) = b64[comp >> 18];
+      *(cur++) = b64[(comp >> 12) & 0x3F];
+      *(cur++) = '=';
+      *(cur++) = '=';
+
+      len--;
+      str++;
+
+    }
+
+  }
+
+  *cur = 0;
+  return ret;
+
+}
+
+#endif /* !_HAVE_STRING_INL_H */
diff --git a/types.h b/types.h
new file mode 100644
index 0000000..fdf05d3
--- /dev/null
+++ b/types.h
@@ -0,0 +1,42 @@
+/*
+   skipfish - type definitions
+   ---------------------------
+
+   Author: Michal Zalewski <lcamtuf@google.com>
+
+   Copyright 2009, 2010 by Google Inc. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+ */
+
+#ifndef _HAVE_TYPES_H
+#define _HAVE_TYPES_H
+
+#include <stdint.h>
+
+typedef uint8_t  u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef int8_t   s8;
+typedef int16_t  s16;
+typedef int32_t  s32;
+typedef int64_t  s64;
+
+/* PRNG wrapper, of no better place to put it. */
+
+#define R(_ceil) ((u32)(random() % (_ceil)))
+
+#endif /* ! _HAVE_TYPES_H */