initial import from other repo
authorapache <apache@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Sat, 20 Sep 2008 03:53:32 +0000 (03:53 +0000)
committerapache <apache@ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c>
Sat, 20 Sep 2008 03:53:32 +0000 (03:53 +0000)
git-svn-id: svn://trac.whoisi.com/whoisi/trunk@1 ae879524-a8bd-4c4c-a5ea-74d2e5fc5a2c

259 files changed:
ChangeLog [new file with mode: 0644]
README.txt [new file with mode: 0644]
TODO.txt [new file with mode: 0644]
blacklist_rss.txt [new file with mode: 0644]
controller-1.cfg [new file with mode: 0644]
controller-service [new file with mode: 0755]
dev.cfg [new file with mode: 0644]
devdata.sqlite [new file with mode: 0644]
feed-parse-service [new file with mode: 0755]
firehose-client [new file with mode: 0755]
html-feed-scrape-service [new file with mode: 0755]
lib/__init__.py [new file with mode: 0644]
lib/feedparser.py [new file with mode: 0644]
master-dev.cfg [new file with mode: 0644]
master-service [new file with mode: 0755]
patches/README [new file with mode: 0644]
patches/feedparser-title.patch [new file with mode: 0644]
picasa-poll-service [new file with mode: 0755]
prod.cfg [new file with mode: 0644]
publisher-1.cfg [new file with mode: 0644]
publisher-service [new file with mode: 0755]
runtests.sh [new file with mode: 0755]
sample-prod.cfg [new file with mode: 0644]
services/__init__.py [new file with mode: 0644]
services/command/__init__.py [new file with mode: 0644]
services/command/base.py [new file with mode: 0644]
services/command/controller.py [new file with mode: 0644]
services/command/database.py [new file with mode: 0644]
services/command/delicious.py [new file with mode: 0644]
services/command/download.py [new file with mode: 0644]
services/command/exceptions.py [new file with mode: 0644]
services/command/feedparse.py [new file with mode: 0644]
services/command/flickr.py [new file with mode: 0644]
services/command/htmlscrape.py [new file with mode: 0644]
services/command/identica.py [new file with mode: 0644]
services/command/linkedin.py [new file with mode: 0644]
services/command/newsite.py [new file with mode: 0644]
services/command/newsite.txt [new file with mode: 0644]
services/command/picasa.py [new file with mode: 0644]
services/command/previewsite.py [new file with mode: 0644]
services/command/service.py [new file with mode: 0644]
services/command/setup.py [new file with mode: 0644]
services/command/siterefresh.py [new file with mode: 0644]
services/command/twitter.py [new file with mode: 0644]
services/command/utils.py [new file with mode: 0644]
services/command/xmlnode.py [new file with mode: 0644]
services/config/__init__.py [new file with mode: 0644]
services/master/__init__.py [new file with mode: 0644]
services/master/database.py [new file with mode: 0644]
services/master/feedrefresh.py [new file with mode: 0644]
services/master/flickr.py [new file with mode: 0644]
services/master/linkedin.py [new file with mode: 0644]
services/master/newsite.py [new file with mode: 0644]
services/master/picasa.py [new file with mode: 0644]
services/master/previewsite.py [new file with mode: 0644]
services/master/publisher.py [new file with mode: 0644]
services/master/refreshmanager.py [new file with mode: 0644]
services/master/sitelock.py [new file with mode: 0644]
services/master/states.txt [new file with mode: 0644]
services/master/worker.py [new file with mode: 0644]
services/protocol/__init__.py [new file with mode: 0644]
services/protocol/childlistener.py [new file with mode: 0644]
services/protocols.txt [new file with mode: 0644]
services/publisher/__init__.py [new file with mode: 0644]
services/publisher/lookup.py [new file with mode: 0644]
services/publisher/protocol.py [new file with mode: 0644]
services/publisher/server.py [new file with mode: 0644]
setup.py [new file with mode: 0644]
smoketest.txt [new file with mode: 0644]
sources/apple-touch-icon.png [new file with mode: 0644]
sources/favicon.ico [new file with mode: 0644]
sources/favicon.png [new file with mode: 0644]
sources/whoisi-100.png [new file with mode: 0644]
sources/whoisi-150.png [new file with mode: 0644]
sources/whoisi-200.png [new file with mode: 0644]
sources/whoisi-icon-mini.svg [new file with mode: 0644]
sources/whoisi-icon.svg [new file with mode: 0644]
start-test-db.py [new file with mode: 0755]
start-test-whoisi.sh [new file with mode: 0755]
start-whoisi.py [new file with mode: 0755]
test-ws.cfg [new file with mode: 0644]
test.cfg [new file with mode: 0644]
tests/__init__.py [new file with mode: 0644]
tests/nose/__init__.py [new file with mode: 0644]
tests/nose/data/linkedin/christopherblizzard [new file with mode: 0644]
tests/nose/data/linkedin/clarkbw [new file with mode: 0644]
tests/nose/data/linkedin/johnath [new file with mode: 0644]
tests/nose/data/linkedin/johnlilly [new file with mode: 0644]
tests/nose/data/linkedin/reidhoffman [new file with mode: 0644]
tests/nose/data/linkedin/reidhoffman_added [new file with mode: 0644]
tests/nose/data/linkedin/reidhoffman_empty [new file with mode: 0644]
tests/nose/data/linkedin/reidhoffman_removed [new file with mode: 0644]
tests/nose/data/linkedin/unknown [new file with mode: 0644]
tests/nose/test_linkedin.py [new file with mode: 0644]
tests/nose/test_newsite.py [new file with mode: 0644]
tests/twisted/__init__.py [new file with mode: 0644]
tests/twisted/database.py [new file with mode: 0644]
tests/twisted/local/__init__.py [new file with mode: 0644]
tests/twisted/local/data/GasteroProd [new file with mode: 0644]
tests/twisted/local/data/beef-2.rss2 [new file with mode: 0644]
tests/twisted/local/data/beef-no-ids-2.rss2 [new file with mode: 0644]
tests/twisted/local/data/beef-no-ids.rss2 [new file with mode: 0644]
tests/twisted/local/data/beef.rss2 [new file with mode: 0644]
tests/twisted/local/data/no-link.atom [new file with mode: 0644]
tests/twisted/local/data/relative-links.atom [new file with mode: 0644]
tests/twisted/local/test_commandmanager.py [new file with mode: 0644]
tests/twisted/local/test_feedparse.py [new file with mode: 0644]
tests/twisted/local/test_feedparse_perf.py [new file with mode: 0644]
tests/twisted/local/test_newsite.py [new file with mode: 0644]
tests/twisted/network/__init__.py [new file with mode: 0644]
tests/twisted/network/test_download.py [new file with mode: 0644]
tests/twisted/network/test_feedparse.py [new file with mode: 0644]
tests/twisted/network/test_feedrefresh.py [new file with mode: 0644]
tests/twisted/network/test_flickr.py [new file with mode: 0644]
tests/twisted/network/test_linkedin.py [new file with mode: 0644]
tests/twisted/network/test_linkedin_refresh.py [new file with mode: 0644]
tests/twisted/network/test_newsite.py [new file with mode: 0644]
tests/twisted/network/test_picasa.py [new file with mode: 0644]
tests/twisted/network/test_picasa_preview.py [new file with mode: 0644]
tests/twisted/network/test_picasa_refresh.py [new file with mode: 0644]
tests/twisted/network/test_previewsite.py [new file with mode: 0644]
utils/archive-site-history.py [new file with mode: 0755]
utils/clean_site_history_dups.py [new file with mode: 0755]
utils/clean_site_refresh.py [new file with mode: 0755]
utils/clean_tmp.sh [new file with mode: 0755]
utils/convert-display-cache.py [new file with mode: 0755]
utils/delete_user.py [new file with mode: 0755]
utils/follower_stats.py [new file with mode: 0755]
utils/query_everyone_perf.py [new file with mode: 0755]
utils/utils.cfg [new file with mode: 0644]
whoisi.egg-info/PKG-INFO [new file with mode: 0644]
whoisi.egg-info/SOURCES.txt [new file with mode: 0644]
whoisi.egg-info/dependency_links.txt [new file with mode: 0644]
whoisi.egg-info/not-zip-safe [new file with mode: 0644]
whoisi.egg-info/paster_plugins.txt [new file with mode: 0644]
whoisi.egg-info/requires.txt [new file with mode: 0644]
whoisi.egg-info/sqlobject.txt [new file with mode: 0644]
whoisi.egg-info/top_level.txt [new file with mode: 0644]
whoisi/__init__.py [new file with mode: 0644]
whoisi/api.py [new file with mode: 0644]
whoisi/config/__init__.py [new file with mode: 0644]
whoisi/config/app.cfg [new file with mode: 0644]
whoisi/config/log.cfg [new file with mode: 0644]
whoisi/controllers.py [new file with mode: 0644]
whoisi/json.py [new file with mode: 0644]
whoisi/model.py [new file with mode: 0644]
whoisi/release.py [new file with mode: 0644]
whoisi/search.py [new file with mode: 0644]
whoisi/source/flickr-blank-75x75.svg [new file with mode: 0644]
whoisi/static/css/style.css [new file with mode: 0644]
whoisi/static/css/style.css.orig [new file with mode: 0644]
whoisi/static/images/apple-touch-icon.png [new file with mode: 0644]
whoisi/static/images/event/add-tag-arrow.png [new file with mode: 0644]
whoisi/static/images/event/alias-link-arrow.png [new file with mode: 0644]
whoisi/static/images/event/edit-link-arrow.png [new file with mode: 0644]
whoisi/static/images/favicon.ico [new file with mode: 0644]
whoisi/static/images/header_inner.png [new file with mode: 0644]
whoisi/static/images/info.png [new file with mode: 0644]
whoisi/static/images/ok.png [new file with mode: 0644]
whoisi/static/images/sites/blogger16x16.gif [new file with mode: 0644]
whoisi/static/images/sites/delicious.png [new file with mode: 0644]
whoisi/static/images/sites/feed-icon-16x16.png [new file with mode: 0755]
whoisi/static/images/sites/flickr-blank-75x75.png [new file with mode: 0644]
whoisi/static/images/sites/flickr-favicon.gif [new file with mode: 0644]
whoisi/static/images/sites/home.png [new file with mode: 0644]
whoisi/static/images/sites/identica.png [new file with mode: 0644]
whoisi/static/images/sites/linkedin.gif [new file with mode: 0644]
whoisi/static/images/sites/picasa-favicon.png [new file with mode: 0644]
whoisi/static/images/sites/twitter.png [new file with mode: 0644]
whoisi/static/images/sites/white-16x16.jpg [new file with mode: 0644]
whoisi/static/images/sites/wikipedia.png [new file with mode: 0644]
whoisi/static/images/tg_under_the_hood.png [new file with mode: 0644]
whoisi/static/images/under_the_hood_blue.png [new file with mode: 0644]
whoisi/static/images/whoisi-100.png [new file with mode: 0644]
whoisi/static/images/whoisi-200.png [new file with mode: 0644]
whoisi/static/javascript/addform.js [new file with mode: 0644]
whoisi/static/javascript/follow.js [new file with mode: 0644]
whoisi/static/javascript/jquery.js [new file with mode: 0644]
whoisi/static/javascript/keys.js.in [new file with mode: 0644]
whoisi/static/javascript/person.js [new file with mode: 0644]
whoisi/static/tests/empty.html [new file with mode: 0644]
whoisi/static/tests/empty_feed.html [new file with mode: 0644]
whoisi/static/tests/empty_file.atom [new file with mode: 0644]
whoisi/static/tests/multiple_feeds.html [new file with mode: 0644]
whoisi/static/tests/no-feed-relative-links.atom [new file with mode: 0644]
whoisi/static/tests/no-feed-relative-links.html [new file with mode: 0644]
whoisi/static/tests/no-link.atom [new file with mode: 0644]
whoisi/static/tests/no-link.html [new file with mode: 0644]
whoisi/static/tests/one_entry.atom [new file with mode: 0644]
whoisi/static/tests/relative-feed-relative-links.atom [new file with mode: 0644]
whoisi/static/tests/relative-feed-relative-links.html [new file with mode: 0644]
whoisi/static/tests/relative-links.atom [new file with mode: 0644]
whoisi/static/tests/relative-links.html [new file with mode: 0644]
whoisi/static/tests/relative_feed.atom [new file with mode: 0644]
whoisi/static/tests/relative_feed.html [new file with mode: 0644]
whoisi/static/txt/robots.txt [new file with mode: 0644]
whoisi/summary.py [new file with mode: 0644]
whoisi/templates/__init__.py [new file with mode: 0644]
whoisi/templates/about.mak [new file with mode: 0644]
whoisi/templates/aliases-widget.mak [new file with mode: 0644]
whoisi/templates/api-top-doc.mak [new file with mode: 0644]
whoisi/templates/contact.mak [new file with mode: 0644]
whoisi/templates/delicious-widget.mak [new file with mode: 0644]
whoisi/templates/event.mak [new file with mode: 0644]
whoisi/templates/events.mak [new file with mode: 0644]
whoisi/templates/everyone.mak [new file with mode: 0644]
whoisi/templates/flickr-widget.mak [new file with mode: 0644]
whoisi/templates/follow-byname.mak [new file with mode: 0644]
whoisi/templates/follow-no-entries.mak [new file with mode: 0644]
whoisi/templates/follow.mak [new file with mode: 0644]
whoisi/templates/identica-widget.mak [new file with mode: 0644]
whoisi/templates/index.mak [new file with mode: 0644]
whoisi/templates/linkedin-widget.mak [new file with mode: 0644]
whoisi/templates/login-info.mak [new file with mode: 0644]
whoisi/templates/login-not-found.mak [new file with mode: 0644]
whoisi/templates/master.mak [new file with mode: 0644]
whoisi/templates/name-add-widget.mak [new file with mode: 0644]
whoisi/templates/name-remove-widget.mak [new file with mode: 0644]
whoisi/templates/name-update-widget.mak [new file with mode: 0644]
whoisi/templates/nofollow.mak [new file with mode: 0644]
whoisi/templates/person-add-confirm.mak [new file with mode: 0644]
whoisi/templates/person-add-pick-widget.mak [new file with mode: 0644]
whoisi/templates/person-add.mak [new file with mode: 0644]
whoisi/templates/person-widget.mak [new file with mode: 0644]
whoisi/templates/person.mak [new file with mode: 0644]
whoisi/templates/picasa-widget.mak [new file with mode: 0644]
whoisi/templates/recommendations.mak [new file with mode: 0644]
whoisi/templates/search-widget.mak [new file with mode: 0644]
whoisi/templates/search.mak [new file with mode: 0644]
whoisi/templates/site-add-error-widget.mak [new file with mode: 0644]
whoisi/templates/site-add-pick-widget.mak [new file with mode: 0644]
whoisi/templates/site-add-status-widget.mak [new file with mode: 0644]
whoisi/templates/site-add-widget.mak [new file with mode: 0644]
whoisi/templates/site-remove-widget.mak [new file with mode: 0644]
whoisi/templates/twitter-widget.mak [new file with mode: 0644]
whoisi/templates/unseen-no-entries.mak [new file with mode: 0644]
whoisi/templates/unseen.mak [new file with mode: 0644]
whoisi/templates/weblog-widget.mak [new file with mode: 0644]
whoisi/templates/welcome.kid [new file with mode: 0644]
whoisi/tests/__init__.py [new file with mode: 0644]
whoisi/tests/test_controllers.py [new file with mode: 0644]
whoisi/tests/test_model.py [new file with mode: 0644]
whoisi/utils/__init__.py [new file with mode: 0644]
whoisi/utils/display.py [new file with mode: 0644]
whoisi/utils/fast_api.py [new file with mode: 0644]
whoisi/utils/fast_follow.py [new file with mode: 0644]
whoisi/utils/fast_history.py [new file with mode: 0644]
whoisi/utils/flickr.py [new file with mode: 0644]
whoisi/utils/follow.py [new file with mode: 0644]
whoisi/utils/names.py [new file with mode: 0644]
whoisi/utils/picasa.py [new file with mode: 0644]
whoisi/utils/preview_site.py [new file with mode: 0644]
whoisi/utils/recaptcha.py [new file with mode: 0644]
whoisi/utils/recommendations.py [new file with mode: 0644]
whoisi/utils/site_history.py [new file with mode: 0644]
whoisi/utils/sites.py [new file with mode: 0644]
whoisi/utils/track.py [new file with mode: 0644]
whoisi/utils/twitter.py [new file with mode: 0644]
whoisi/utils/url_lookup.py [new file with mode: 0644]

diff --git a/ChangeLog b/ChangeLog
new file mode 100644 (file)
index 0000000..204eabf
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,3944 @@
+2008-09-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/utils.cfg: New file for utils that includes database
+       config.
+
+       * Update all the config files to have bogus usernames and
+       passwords for final source release.
+
+2008-09-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/xmlnode.py: Add proper license information for
+       this file which was taken from the flickrapi code.  It's
+       MIT/python 2.5.
+
+2008-09-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * Add an MIT license to everything.
+
+2008-09-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * dev.cfg: Include a recaptcha private key placeholder.
+
+       * test-ws.cfg: Include a recaptcha private key placeholder.
+
+       * whoisi/utils/recaptcha.py (recaptcha_check_fail): Use the
+       recaptcha private key defined in the config.
+
+       * whoisi/templates/person.mak: Include keys.js.
+
+       * whoisi/templates/person-add.mak: Include keys.js.
+
+       * whoisi/templates/recommendations.mak: Include keys.js.
+
+       * whoisi/templates/search.mak: Make sure to include keys.js before
+       person.js.
+
+       * whoisi/static/javascript/keys.js.in: File to rename to keys.js
+       where you include your public key for recaptcha.
+
+       * whoisi/static/javascript/addform.js: Use the recaptcha public
+       key defined in keys.js.
+
+       * whoisi/static/javascript/person.js: Use the recaptcha public key
+       defined in keys.js.
+
+       * prod.cfg: Placeholder for recaptcha private key.
+
+       * start-whoisi.py: Warn about missing recaptcha private key.
+
+       * controller-1.cfg: Add placeholders for twitter + flickr account
+       info.
+
+       * tests/twisted/network/test_flickr.py (TestFlickr.test_NewFlickrCache):
+       This test is skipped right now because we don't have a way to pull
+       in the api key.
+
+       * services/command/flickr.py: Get the flickr api key from the
+       config file.
+
+       * services/command/download.py: Get the twitter username and
+       password from the config for the twitter download hack.  Also,
+       bonus bug fix - call clear_cache after urlparse.
+
+       * controller-service: Die if the config doesn't include api keys
+       for flickr or twitter.
+
+2008-09-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/archive-site-history.py: Fix the other three bugs that
+       were moving and deleting the wrong records.
+
+2008-09-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/archive-site-history.py (migrate_records): Argh, it
+       migrated all new records, not all old records.  Fail.
+
+2008-09-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (SiteHistoryArchive): Dummy entry for the
+       archive table.
+
+       * whoisi/controllers.py (Root.l): Pull a url out of the archive if
+       we have to - urls must live forever!
+
+2008-09-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * README.txt: Add site_type_idx index to the site table in a
+       pathetic attempt to get the initial flickr query to go faster.
+
+       * whoisi/utils/fast_history.py: All of these methods now pull out
+       the list of ids and then generate a custom query based on them.
+       Why?  Because mysql's query optimizer just can't get it right and
+       this is all primary key driven - it's _much_ faster.
+
+       * whoisi/templates/unseen.mak: Add the "Caught Up!" button.
+
+       * whoisi/controllers.py (Root.caughtup): Little method that
+       updates the last seen id when we're caught up.
+
+       * whoisi/controllers.py (Root.unseen): Go back to the old
+       behaviour of having a "Caught Up" button.
+
+       * utils/archive-site-history.py: Utility that archives old
+       site_history items so that we have a max of 100 items in the
+       database for any site.
+
+       * tests/twisted/network/test_newsite.py (TestNewSite.confirmRelativeLinksReddit):
+       The reddit urls keep changing - update the test to make it
+       generic.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.gotEntries):
+       Limit the number of entries to 99.
+
+       * services/command/picasa.py (Picasa.photoFeedForUser): Limit the
+       number of picasa results to 99.
+
+2008-08-31  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * README.txt: Add a bunch of new indexes required to make many of
+       the queries go fast.  Like, 4 mins to 0.05 seconds fast.
+
+       * whoisi/utils/fast_history.py: Update every call that uses
+       subqueries to use standard joins instead now that we have proper
+       indexes in place.
+
+       * whoisi/templates/index.mak: Remove beta-quality warning.  We're
+       doing fine.  Mostly.
+
+2008-08-30  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/clean_site_history_dups.py: Utility to scan the entire
+       site_history database and clean out duplicate entries for a site.
+       Takes a long time to run.
+
+2008-08-28  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py (fast_recent_changes_for_follower):
+       Add a check to make sure we return None if there's no follower.
+       (fast_count_items_for_follower): Return None if nothing is
+       returned.
+       (fast_max_item_for_follower): Return None if nothing is returned.
+
+       * whoisi/templates/follow.mak: Remove the caught up link.
+
+       * whoisi/templates/login-info.mak: Limit width of the login
+       message to 60% wide as below.
+
+       * whoisi/templates/unseen.mak: Page for the unseen method.
+
+       * whoisi/templates/master.mak: For every page that is loaded
+       update the unread count.  Shouldn't be here, but it's fine for
+       now.
+
+       * whoisi/templates/unseen-no-entries.mak: New template for the
+       unseen page when there's nothing to show.
+
+       * whoisi/templates/follow-no-entries.mak: Hold the headline to 60%
+       wide to make sure it doesn't overrun the right hand nav info.
+
+       * whoisi/controllers.py (Root.follow): Remove the code from this
+       method that displays unseen bits and updates the counts.
+       (Root.unseen): Independent screen that shows the unseen items.
+
+2008-08-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.follow): When updating the
+       last_history value, make sure it's greater than the current value
+       to keep people from going backwards in time.
+
+2008-08-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py (fast_recent_changes_for_follower):
+       Change this method so it can handle either recent changes or
+       unseen changes with different where clauses.
+       (fast_count_items_for_follower): Add this method to get the recent
+       number of changes for a particular person.
+       (fast_max_item_for_follower): Add this method to get the last item
+       from the database.  Used for follower initialization.
+
+       * whoisi/utils/follow.py: Add count_history() and last_history()
+       methods to get the values from the current follow object.
+
+       * whoisi/templates/follow.mak: Add a hook to display the caught up
+       link on the follow page.
+
+       * whoisi/templates/master.mak: On the sidebar show the unread
+       count.  Add a hook to the sidebar so on the follow page we can
+       show a "caught up" link.  Split various types of actions in the
+       sidebar into their own sections.
+
+       * whoisi/controllers.py (Root.follow): When loading the follow
+       page make sure that we set default values for unread + last item
+       seen.  If someone passes in caught_up and history_id set the
+       values in the database.  Update the unread count on each
+       load (need to fix this later.)  Also pass down the had_start value
+       to indicate if this was the main follow page or looking at old
+       history.  On old history pages we don't show the "caught up" link.
+
+       * whoisi/model.py (Follower): Add last_history and count_history
+       items for unread and last item seen.
+
+       * master-service (print_usage): Add the -p option for publishing
+       updates.
+
+2008-08-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.gotEntries):
+       When inserting a new entry make sure it's not already in the
+       database under a different entry_id.
+
+       * tests/twisted/network/test_newsite.py (TestNewSite.confirmRelativeLinksGitHub):
+       github now uses www.github.com in its feeds.  Update test.
+       (TestNewSite.confirmRelativeLinksReddit): reddit now uses
+       /comments/ for the top of the comments url in feeds.  Update test.
+
+2008-08-18  Joe Shaw <joe@joeshaw.org>
+
+       * whoisi/utils/recommendations.py (get_last_activity): Function to
+       get the last activity for a particular follower.
+
+       * whoisi/utils/recommendations.py (get_recommendations): Decay the
+       value of a particular follower if they haven't visited the site
+       recently.  Anything < 14 days is considered active.  Beyond that
+       there's a 45 day half-life.
+
+2008-08-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/publisher/protocol.py (PublisherProtocol.dataReceived):
+       Check the buffer for the buffer length check, not the line.  Also
+       always return if the header isn't found.  While we're here fix up
+       a couple of error messages to give more relevant information to
+       the other end.
+
+2008-08-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/api.py (ApiController.startRefresh): Add a new
+       startRefresh api that lets you start a site refresh from the
+       outside world.  Not public yet because it doesn't contain
+       protections against starting a billion refreshes.
+       
+2008-08-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/master.mak: Add an API link at the bottom of
+       every page.
+
+       * whoisi/templates/api-top-doc.mak: Add example scripts and clean
+       up a lot of the docs.  Add a table of contents at the top.
+
+2008-08-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/api.py (ApiController.getURLForTinyLink): Add a "title"
+       to the return dictionary for getURLForTinyLink().  Also return
+       url=None if the url isn't found.
+
+2008-08-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/publisher/protocol.py (PublisherProtocol.dataReceived):
+       Add some more useful error messages.
+
+2008-08-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * firehose-client: Add host + port arguments on the command line.
+
+2008-08-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * publisher-service (PublisherService): Deliver the returned
+       object to the client as-is.
+
+       * services/publisher/lookup.py (MasterLookupQueue): Select the
+       site history information we need from all the various tables and
+       put it into a big dictionary for delivery to clients.
+
+       * firehose-client (ClientProtocol.handleMessage): Decode the new
+       weblog message.
+
+2008-08-07  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * master-dev.cfg: Add entries for a publisher.
+
+       * publisher-service: Very simple publisher server that handles
+       multiple clients connected and will publish updates.  Should scale
+       a bit, but not too much.  Connects to the new lookup code, the new
+       publisher protocol and hooks it all together to publish simple
+       updates.
+
+       * master-service: Add a new publish argument to start and handle
+       -p on the command line to publish updates.
+
+       * publisher-1.cfg: Config file for a sample publisher.
+
+       * services/publisher/server.py: Some of the classes required to
+       run a publisher server.  Includes the server protocol code that
+       connects to the main PublisherService class provided by the
+       server.
+
+       * services/publisher/protocol.py: First pass at a protocol class
+       that's used by both client and server.  Handles most of the
+       control code and publishes state information when it changes.
+       Client and server should only have to override to a subset of
+       methods to get something that works pretty well.  (Messages are
+       limited to 128kb each for now.)
+
+       * services/publisher/lookup.py: Code for the publisher that looks
+       up database entries based on ID.  (Master just publishes an ID and
+       it's up to the publisher to turn that into a full message.)
+       Includes an incoming queue that is processed one entry at a time.
+
+       * services/master/database.py (DatabaseManager.getFlickrImages):
+       Only get images for flickr sites that haven't been removed.
+
+       * services/master/feedrefresh.py (FeedRefresh.done): Make sure to
+       return new site history items when we're done with a picasa
+       refresh.
+
+       * services/master/picasa.py (PicasaRefresh.done): Make sure to
+       return new site history items when we're done with a picasa
+       refresh.
+
+       * services/master/publisher.py: Very simple first pass at code
+       that connects and reconnects to publishing services.  Will publish
+       information about new site history and new site items.
+
+       * services/master/worker.py (get_work_hosts): Print out if we're
+       adding a controller host.
+       (WorkManager): Remove TODO information.
+
+       * controller-service: Print out a message on startup that says
+       which port the controller is listening on.
+
+       * firehose-client: First pass at a very simple firehose client
+       that just prints out messages that it gets from the server.  Needs
+       a huge amount of work.
+
+2008-07-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * master-dev.cfg: Defaults for master process.
+
+       * master-service: Use config file for startup.
+
+       * controller-1.cfg: Config file for controller.
+
+       * services/master/database.py: Get database info from config file.
+
+       * services/master/worker.py: Get work hosts from a config file.
+
+       * services/master/refreshmanager.py: Use config file for getting
+       refresh interval.
+
+       * services/config/__init__.py: Global config option.
+
+       * controller-service: Use a config file for config options.
+
+2008-07-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (ProtoManager.start): We don't
+       need the command as an argument anymore when we start.  (It wasn't
+       used anyway.)
+
+       * services/command/siterefresh.py (RefreshSiteDone.done): Return
+       the site_history_new_ids if they are in the state.  Getting ready
+       for live updates.
+
+       * services/command/newsite.py (NewSiteDone.done): Return the
+       site_history_new_ids if they are in the state.  Getting ready for
+       live updates.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand): Track
+       the ids that we insert into the database.  They are put in the
+       state as "site_history_new_ids".
+
+       * services/master/previewsite.py (PreviewSite.startProcess):
+       preview-site -> previewSite, preview-linkedin -> previewLinkedIn,
+       preview-picasa -> previewPicasa.
+
+       * services/master/feedrefresh.py (FeedRefresh): feed-refresh ->
+       feedRefresh.
+
+       * services/master/picasa.py (PicasaRefresh): picasa-refresh ->
+       picasaRefresh.
+
+       * services/master/newsite.py (NewSite.startProcess): new-site ->
+       newSite, new-linkedin -> newLinkedIn, new-picasa, newPicasa.
+
+       * services/master/linkedin.py (LinkedInRefresh): Use
+       linkedInRefresh instead of linkedin-refresh.
+
+       * services/master/flickr.py (FlickrCache): Use flickrCache instead
+       of flickr-cache.
+
+       * services/master/worker.py (Worker.dispatchCommand): Use the name
+       of the method, pass the uuid and pass the command arguments
+       directly instead of serializing them into a string to be
+       re-parsed.  Much cleaner.
+
+       * controller-service (Controller): Change the doCommand + dispatch
+       to use individual methods by name.  This should make it possible
+       to use named arguments in the future and have per-method return
+       values.  Been meaning to do this for months.
+
+2008-07-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/txt/robots.txt: Don't allow robots to access the
+       api.
+
+2008-07-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/download.py (localDownloadPage): Use the whoisi
+       user, not the chrisblizzard user for twitter.
+
+2008-07-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/txt/robots.txt: Don't let robots go to
+       recommendations or genrecommendations.
+
+2008-07-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/recommendations.py: Code from Joe Shaw!  Generates
+       a nice list of recommendations based on the people you're
+       following.  Also contains the unused "most popular" code.
+
+       * whoisi/templates/recommendations.mak: Adapted from Joe's
+       original template.  Gives instructions if you're not following
+       anyone.  Offers to generate a list of recommendations if you are
+       following someone.  Once you have a list it will display it.
+
+       * whoisi/templates/master.mak: Add Recommendations to the list of
+       items on the right hand side.
+
+       * whoisi/controllers.py (Root.recommendations): Page that shows
+       recommendations (paged like the search page.)
+       (Root.genrecommendations): Page that generates recomemndations and
+       stuffs it into the database.
+
+       * whoisi/model.py (FollowerRecommendations): Add
+       FollowerRecommendations to keep track of recommendations.
+
+2008-07-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/aliases-widget.mak: Move the coding comment to
+       the top of the file so it doesn't end up in the output sent to the
+       client.  Oops.
+
+2008-07-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/aliases-widget.mak: Expand group and event
+       aliases to point to search and/or event pages.
+
+       * whoisi/utils/display.py (is_event_alias): Add is_event_alias and
+       is_group_alias which hand back the search or event string if it's
+       one of those kinds of aliases.
+
+2008-07-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (PeopleEvent): Add a banner item.
+
+       * whoisi/controllers.py (Root.e): Add a banner if it's set.
+
+       * whoisi/templates/event.mak: Add a banner to the top of the page
+       if it's set.
+
+2008-07-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/download.py (localDownloadPage): Jam my
+       personal username and password into a request if it is for a
+       twitter.com account.  I have never felt so dirty in all of my
+       life.  Except for that time with the Nun.  But nevermind about
+       that.
+
+2008-07-16  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/follow-no-entries.mak: Limit width to 60%.
+
+       * README.txt: Add an index to the name table.
+
+       * whoisi/utils/fast_history.py (fast_recent_changes_for_event):
+       Timeline for an event.
+
+       * whoisi/templates/person-add.mak: Limit some of the text to 60%
+       width.
+
+       * whoisi/templates/master.mak: Add a link to the events page.
+
+       * whoisi/templates/event.mak: New template for events!
+
+       * whoisi/templates/search.mak: Don't show the "add someone to the
+       site" for people searching for groups or an event.  Also limit
+       some of the text to 60% width.
+
+       * whoisi/templates/events.mak: New events template that describes
+       what events are happening and how you add yourself to one of them.
+
+       * whoisi/templates/nofollow.mak: Set the width for some text to
+       60%.
+
+       * whoisi/controllers.py (Root.everyone): Remove call to
+       datetime.utcnow() that wasn't needed anymore.
+       (Root.e): New method for events!  Uses
+       fast_recent_changes_for_event()
+
+       * whoisi/model.py (PeopleEvent): Add a PeopleEvent item that
+       contains a list of events and if they are active.
+
+2008-07-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.peopleListToFullDisplay): Refactored
+       function that is used from search and the follow display to gather
+       the data for display.
+       (Root.search): Use the refactored display function.
+       (Root.follow): Use the refactored display function.
+
+2008-07-13  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/local/test_feedparse.py (TestFeedParse.test_feedParse):
+       We need to set feed_url in the state.
+       (TestFeedParse.test_stupidFeedParse): Same.
+
+       * tests/twisted/network/test_newsite.py (TestNewSite.confirmRelativeFeed):
+       Actually test to make sure we got the right relative urls resolved
+       to full urls.
+
+       * whoisi/static/tests/relative_feed.html: Use relative_feed.atom.
+
+       * whoisi/static/tests/relative_feed.atom: Test case that includes
+       a <link> that is relative.
+
+       * services/command/newsite.py (NewSiteCreate.createSite): Some
+       debug spew when we're creating the site.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.updateSite):
+       Resolve relative urls when updating the url in the site.
+
+2008-07-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/refreshmanager.py (RefreshManager.getRandomRefreshTime):
+       Change the default time from 30 minutes to 60.
+
+       * services/master/worker.py (Worker.acceptingWork): Change the
+       default depth from 30 to 80 items in the work queue at once.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.gotEntries):
+       Make sure to check if a link is null before trying to see if it's
+       relative.
+
+2008-07-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * blacklist_rss.txt: Add some more blacklist rss feeds.
+
+       * tests/twisted/local/test_feedparse.py (TestFeedParse.test_stupidFeedParse):
+       We now need the url set in the state to resolve relative urls.
+       (TestFeedParse.test_feedParse): Same.
+
+       * tests/twisted/network/test_newsite.py (TestNewSite.test_NewSiteRelativeEntries):
+       New test that tests what happens when we end up with relative
+       entries.
+       (TestNewSite.test_NewSiteRelativeEntriesReddit): Live test
+       vs. Reddit
+       (TestNewSite.test_NewSiteRelativeEntriesGitHub): Live test
+       vs. GitHub
+       (TestNewSite.test_NewSiteRelativeEntriesNoLink): Relative entries
+       with no link in the rss.
+       (TestNewSite.test_NewSiteRelativeEntriesRelativeLink): Testing
+       relative entries with a relative link in the feed.
+
+       * services/command/newsite.py (NewSiteTryURL.loadDone): Use the
+       resolve_relative_url function to resolve a relative url.
+       (NewSiteTryURL.feedLoadDone): If you see a link in the feed make
+       sure to resolve the relative url.
+
+       * services/command/previewsite.py (PreviewSiteDone.doCommand):
+       Resolve any relative urls that might be in the entries.
+
+       * services/command/picasa.py (PicasaSetup.gotNewSite): Get the url
+       as well as the feed so we can resolve relative urls.
+       (PicasaSetup.gotSite): Set the original url in the environment.
+
+       * services/command/utils.py (resolve_relative_url): Utility
+       function to resolve relative urls.
+
+       * services/command/feedparse.py (FeedRefreshSetup.gotNewSite):
+       Make sure to pull the base url for the refresh so we can resolve
+       relative urls.
+       (FeedRefreshSetup.gotFeed): Set the url that we get back and add
+       it to the debugging output.
+       (FeedUpdateDatabaseCommand.doCommand): Add the site id to the
+       debugging spew so we know what site id we're getting updates for.
+       (FeedUpdateDatabaseCommand.gotEntries): Fix up entries that might
+       be relative before we insert or compare them.
+
+2008-07-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * blacklist_rss.txt: Start a list of rss feeds we need to f-ing
+       ban.
+
+       * tests/nose/test_newsite.py (TestNewSite.test_delicious): Tests
+       for delicious url detection.
+       (TestNewSite.test_delicious_preferred): Tests for picking the
+       right delicious feed from the list.
+
+       * whoisi/utils/sites.py (site_value): Add delicious to the sort
+       list.
+
+       * whoisi/templates/delicious-widget.mak: Delicious widget derived
+       from the weblog widget.
+
+       * whoisi/templates/follow.mak: Add delicious.
+
+       * whoisi/templates/everyone.mak: Add delicious.
+
+       * whoisi/templates/person-widget.mak: Add delicious.
+
+       * whoisi/controllers.py (Root.getDisplayDepth): Add delicious.
+       (Root.rendersite): Add delicious.
+
+       * whoisi/static/css/style.css: Delicious entries.
+
+       * whoisi/static/images/sites/delicious.png: Delicious image. 
+
+       * services/command/newsite.py: Add support for delicious.
+
+       * services/command/flickr.py (Flickr.getPreferredFeed): Little bug
+       fix.  Return None if there's no match.
+
+2008-07-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_previewsite.py: Some tests for the
+       previewsite code.  Right now we just test a common case and a
+       preview with no link in the rss to make sure that the feed link is
+       properly updated.
+
+       * tests/twisted/network/test_newsite.py: Add tests (one for
+       relative <link> in a feed, one for relative entries in a feed, one
+       for a combination of the two, one for relative entries with no
+       <link)) that will get filled in when I fix those bugs.
+
+       * whoisi/static/tests/no-link.atom: Test feed without a <link>.
+
+       * whoisi/static/tests/no-link.html: Test page for testing an rss
+       feed with no <link>.
+
+       * smoketest.txt: The site that I was using for testing flickr went
+       away!
+
+       * services/command/newsite.py (NewSiteTryURL.feedLoadDone): Add
+       some more debugging spew so we can diagnose problems later.
+
+       * services/command/previewsite.py (PreviewSiteDone.doCommand):
+       Some feeds don't include a <link> tag so we need to make sure we
+       get the link from the state url instead of strictly out of the
+       feed.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * clean_site_refresh.py: Script that cleans out done and error
+       status from the site_refresh table.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/identica-widget.mak: They changed the text to
+       the title instead of in the summary.  Oops.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/nose/test_newsite.py (TestNewSite.test_identica): Code to
+       test the identi.ca url detection.
+
+       * services/command/newsite.py (NewSiteTryURL.getFeedType): Set the
+       type for identi.ca urls.
+
+       * services/command/identica.py (Identica.isIdentica): New code to
+       detect a identi.ca url.
+
+       * whoisi/utils/sites.py (site_value): Sort identi.ca right after
+       twitter.
+
+       * whoisi/templates/follow.mak: Add identi.ca to the switch.
+
+       * whoisi/templates/identica-widget.mak: Direct copy of the twitter
+       code used to display identi.ca instead.
+
+       * whoisi/templates/twitter-widget.mak: When calling
+       expand_user_ref() make sure to pass in the twitter base url since
+       identi.ca uses it now.
+
+       * whoisi/templates/everyone.mak: Add the identi.ca widget to the
+       switch.
+
+       * whoisi/templates/person-widget.mak: Add the identi.ca widget to
+       the switch.
+
+       * whoisi/controllers.py (Root.getDisplayDepth): Add identi.ca to
+       the switch for display.
+       (Root.rendersite): Add identi.ca to the switch.
+
+       * whoisi/static/images/sites/identica.png: Image for identi.ca
+
+       * whoisi/utils/twitter.py (expand_user_ref): Add a "base_site" so
+       that we can render both twitter and identi.ca messages.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/local/data/no-link.atom: New test feed that
+       doesn't include a <link> in the <feed> section that found a bug in
+       the preview code.
+
+       * whoisi/utils/url_lookup.py (run_db_check): Don't include
+       SiteHistory in the search.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/index.mak: Add a little beta-quality warning.
+
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/contact.mak: New contact page.
+
+       * whoisi/templates/about.mak: More edits that taste like community
+       guidelines.
+
+       
+2008-07-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/about.mak: Add a nice about page instead of the
+       angry one of old.
+
+2008-06-30  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.addpersonstatus): Don't try to
+       search all of the links in the feed for a duplicate.  Turns out
+       that links in google reader, delicious, etc, all point to the same
+       stuff.  So we just look at the link instead of all of the links in
+       the feed.
+
+2008-06-29  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/local/data/GasteroProd: A feed that I found that
+       totally busts the parser.  Good for a later test case.
+
+       * tests/twisted/local/test_feedparse_perf.py (TestFeedParsePerf):
+       Woo, something that iterates with the parser and sees how fast it
+       can go!
+
+       * master-service: Debugging spew for now.
+
+       * services/command/htmlscrape.py (ScrapeLinkCommand): Use the new
+       sm.serviceFailed call.
+
+       * services/command/picasa.py (PicasaPollFeed): Use the new
+       sm.serviceFailed call.
+
+       * services/command/service.py (ParseProcess): Lots of changes here
+       to move from a parse-per-process-start to one where the process
+       sits around and parses over and over again.  The SubService now
+       gets information about when this class is done starting (via
+       getStarted()) and when the process has exited (via getGone()).
+       There's an explicit assert in here that makes sure you can't start
+       a parse when one parse is already in progress.  This already found
+       one bug so it's staying.  When we're done parsing we carefully
+       save the deferred, reset our state, and call the callback.  We do
+       this because the callback can cause another parse on this process
+       to start so we need to be ready for that re-entrant case.
+       (SubService): Minor changes here.  Just accessors to the various
+       functions in ParseProcess.  Note the shutdown() accessor which
+       helps with shutdown.
+       (ServicePool): New class that mangages a pool of processes.  Right
+       now it's hard coded at 2 because that's the smallest number of
+       processes that seems to get a decent perf boost.  Adding more
+       didn't help and it was a good bit faster than one process.
+       Processes move through various states - starting, idle, working
+       and shutting down.  It also has support for checking out a process
+       and checking it back in.  You need to tell it if the process has
+       failed.  So there's some fragility here.  It also keeps stats on
+       how often something has been used.  Note that it supports a
+       shutdown process as well.
+       (ServiceManager): Use the ServicePools instead of just creating a
+       new process every time someone wants access to the service.  It
+       creates the pools on demand.  New callback is serviceFailed()
+       instead of releaseService() when a process fails.  It has a
+       shutdown() call as well and it works!  Yay!
+
+       * services/command/feedparse.py (FeedParseCommand): Call
+       sm.serviceFailed if the parse service fails instead of just
+       returning it to the good queue.
+
+       * services/master/refreshmanager.py (RefreshManager.checkForNewSites):
+       Don't poll sites that have been removed.
+
+       * Random files: Debugging spew everywhere until I feel like the
+       master + controllers are stable and well-tested.
+
+       * services/master/worker.py (WorkManager.dispatchCommands): Fix
+       big performance problem when the command queue is very deep.  We
+       walk the queue based on the available controllers instead of the
+       depth of the queue and go until the controllers are full, not the
+       other way around.  Trying to poll 5000 sites the early way
+       completely locked the master process.  It does fine now.
+
+2008-06-29  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/about.mak: Remove the word stupid.  I felt like
+       it was ruining the entire page.
+
+2008-06-28  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * Touch nearly every template and make sure that things are
+       escaped to avoid XSS problems.  Too many changes to list here.
+       Thanks to Shawn Lauriat <shawn@frozen-o.com> for the great bug
+       reports.
+
+2008-06-28  Joe Shaw  <joe@joeshaw.org>
+
+       * utils/follower_stats.py: Print out the 10 most followed people.
+
+2008-06-28  Joe Shaw  <joe@joeshaw.org>
+
+       * utils/follower_stats.py: Print out some date usage histograms
+       as well.  Clean up the code a bit.
+
+2008-06-28  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/flickr-widget.mak: Missing closing </div> tag.
+
+       * whoisi/templates/twitter-widget.mak: Missing closing </div> tag.
+       Amazed the site rendered at all.
+
+2008-06-28  Joe Shaw  <joe@joeshaw.org>
+
+       * utils/follower_stats.py: Print out some statistics about the
+       userbase and how many people they follow.
+
+2008-06-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/search.mak: Escape unsafe data.
+
+       * whoisi/templates/search-widget.mak: Escape unsafe data.
+
+2008-06-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/delete_user.py: Utility that deletes a user and all data
+       associated with it.
+
+2008-06-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/css/style.css: Add a background-color to the
+       style for the body.
+
+2008-06-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.follow): Catch the IndexError that's
+       generated by history_to_clusters when there's nothing new found
+       and redirect to the follow-no-entries template.
+
+       * whoisi/templates/follow-no-entries.mak: New template for when
+       people are following someone but they haven't posted anything new.
+
+2008-06-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/worker.py (Worker.acceptingWork): Bump the
+       number of possible commands in progress to 30 instead of 15.
+
+       * whoisi/templates/about.mak: Fix some spacing.
+
+2008-06-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/master.mak: Add a footer.
+
+       * whoisi/controllers.py (Root.about): About method.
+
+       * whoisi/templates/about.mak: Silly about page.
+
+2008-06-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/nofollow.mak: Add some text to the follow page
+       that teaches people how to follow others.
+
+2008-06-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/master.mak: If you're following anyone display
+       a "login later" link.
+
+       * whoisi/templates/login-info.mak: Template that displays login
+       info.
+
+       * whoisi/controllers.py (Root.logininfo): New method that gives
+       you a link to log in later.
+
+2008-06-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/api.py: Add getPersonForURL and getURLForTinyLink.
+
+       * Rename whoisi.utils.addperson.py to whoisi.utils.url_lookup.
+       
+       * whoisi/controllers.py: Rename addperson to url_lookup.
+
+2008-06-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * Remove all of widgets/ and all of the .kid files from
+       whoisi/templates.
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/preview_site.py (convert_feed_to_fake_site): Make
+       sure to clamp the max_depth that's passed in to the size that's
+       actually in the feed.
+
+       * whoisi/controllers.py (Root.addpersonstatus): Make the depth
+       that we get for previews the "max depth" instead of the final
+       depth.
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/config/app.cfg: Turn tg.empty_flash to False so we don't
+       return a tg_flash on every json call, even if we didn't set it.
+
+       * whoisi/utils/fast_api.py: Add fast api sql calls.
+
+       * whoisi/api.py: Add an api controller.  Include getMaxPersonID,
+       getPeople and getPerson.
+
+       * whoisi/controllers.py: Add an api contoller to the main
+       controller.
+       (Root.nameadd): Fix bug where we were passing newname instead of
+       name and it was failing with an unknown variable.
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/everyone.mak: Point to the everyone page
+       instead of the follow page for the More... link (thanks, Joe!)
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/twitter-widget.mak: Make a short link when
+       we're not doing a preview and use the full link when we are doing
+       a preview.  Had them reversed.  Oops.
+       
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_follow.py (fast_people_ids_by_name_for_follower):
+       Quick search that sorts by name for a particular follower.
+
+       * whoisi/templates/follow.mak: Add a way to sort by person instead
+       of by time.
+
+       * whoisi/templates/follow-byname.mak: New template to show a list
+       of people sorted by their name instead of entries by time.
+
+       * whoisi/controllers.py (Root.follow): Add new sort by name mode
+       for the follow page.  Use a somewhat fast lookup to get the people
+       ids for this follower, sorted by name.  Use the same code as
+       search to generate a list of paged results and pass it down to the
+       follow-byname template.
+
+       * whoisi/model.py (Follower.get_person_cache): Get at the person
+       cache.  Not used by anything, but it's fine leaving it in here.
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/weblog-widget.mak: Fix problem where untitled
+       topics were escaped so you saw the HTML markup.
+
+2008-06-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/config/app.cfg: Default to Mako for templating.  Set
+       default encoding and output encoding to utf-8.  Also turn off
+       visit and identity tracking since we're not using it for anything.
+
+       * whoisi/controllers.py: Change everything to use Mako templating
+       instead of crappy Kid templating.  Too many changes to document
+       but most of the widget rendering is driven by render_template
+       instead of the random widget rendering code.
+
+       * Tons and tons of changes to templates.  Look in
+       whoisi/templates/*.mak for all the various templates.
+
+2008-06-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/addperson.py (run_db_check): Get rid of the lower()
+       checks in the database query - they didn't add much and were slow
+       as fuck.
+
+2008-06-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.unfollowperson): Make sure to
+       unfollow once per follow/person match in the database.  There can
+       be multiples.
+
+2008-06-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.nameadd): Should be using the alias
+       id for the audit trail, not the person id.
+
+2008-06-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/recaptcha.py (recaptcha_check_fail): Don't check
+       the recaptcha if recaptcha.enabled is set to False in the config
+       file.
+
+       * whoisi/controllers.py (Root.addperson): Add track info to the
+       new site request.
+       (Root.addpersonpick): Make sure to pass along the old track info
+       with the new site request.
+       (Root.addpersonconfirm): Audit when we add a new person and also
+       pass track_info from the old site request.
+       (Root.l): Use function to get tracking info.
+       (Root.siteaddpost): Add track info when adding a site.
+       (Root.siteremove): Add audit trail for removing a site.
+       (Root.nameupdate): Add audit trail for a name change.
+       (Root.nameremove): Add audit trail for removing an alias.
+       (Root.nameadd): Add audit trail for adding an alias.
+       (Root.followperson): Add audit trail for when following someone.
+       (Root.unfollowperson): Add audit trail for when unfollowing
+       someone.
+
+       * whoisi/model.py (NewSite): Add a track_info field to the new
+       site request.  We need this so that we can track the original data
+       that was available when someone wanted to create the new site.
+       Used by NewSiteAudit, eventually.
+       (ChangeAudit): New class that holds auditing data.
+
+       * services/command/controller.py (NewSiteManager): Add
+       NewSiteAudit after NewSiteCreate.
+       (NewLinkedInManager): Same.
+       (NewPicasaManager): Same.
+
+       * services/command/newsite.py (NewSiteSetup): Add the track_info
+       field to what we pull from the database.  Save it in the new_site
+       object saved in the state.
+       (NewSiteAudit): New object that drops an audit item into the audit
+       table for when we make new sites.  Takes info from the state that
+       was added when the new site was pulled from the database.
+
+       * services/command/linkedin.py (NewLinkedInSetup): Add the
+       track_info field to what we pull from the database.  Save it in
+       the new_site object saved in the state.
+
+       * whoisi/utils/track.py (get_request_tracking): Make the tracking
+       code a function.  Returns a tuple of remoteip, proxy + ua that's
+       available in the request.
+
+2008-06-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/tests/test_controllers.py: Add a quick and dirty test for
+       measuring performance.
+
+       * test-ws.cfg: New cfg file for running the webserver for tests.
+
+       * start-test-whoisi.sh: Use the test-ws.cfg script to start up the
+       test server.  We're using the test.cfg config for actual testing.
+
+2008-06-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/query_everyone_perf.py: Simple test that runs the everyone
+       query as fast as possible in a loop.
+
+2008-06-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.p): Add an f=1 optional argument to
+       the /p method to follow someone in one click.
+
+2008-06-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/sites.py (fast_sites_for_person): Ignore sites that
+       have been removed when getting sites for a person.
+
+       * whoisi/utils/addperson.py (run_db_check): When doing a db check
+       for a dup site ignore sites that have been removed.
+
+       * whoisi/utils/fast_history.py: Don't show sites that have been
+       removed for everyone or follow queries.
+
+       * whoisi/controllers.py (Root.siteremove): When removing a site
+       just set the removed flag and set the removed time.
+
+       * whoisi/model.py (Site): Add isRemoved flag and removed date to
+       the site column.  We need this so that we can mark things as
+       removed, but don't actually remove them.  (This is so we can
+       recover later.)
+
+2008-06-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py: For everyone and follow ignore
+       site_history entries that have on_new set to 1.
+
+2008-06-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.insertEntry):
+       When inserting a new entry look to see if there's a new_site flag
+       set in the state.  If there is, we're adding a new site and set
+       the on_new flag for this particular entry.
+
+       * whoisi/model.py (SiteHistory): Add an "on_new" bool flag that
+       tells us if this item was added when the site was first added.
+
+2008-06-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py: Don't bother trying to re-order
+       entries as they come out of the database because the id of the
+       items should be the rough living order of them from here on out.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand): Add
+       an "inserts" array member that keeps track of the entry items we
+       need to insert into the database.
+       (FeedUpdateDatabaseCommand.gotEntries): Change the way that we
+       figure out how to insert items into the database.  Updated items
+       run first and inserts are done one at a time, iterating through
+       the feed list from oldest to newest.  We also force the code to
+       get the id of the new item to make sure it's complete before we
+       move on to the next item.  This gives us a rough approximation of
+       oldest-to-newest in the database based only on the id of the site
+       entry.
+
+2008-06-13  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Make follow + unfollow
+       different.
+
+       * whoisi/widgets/templates/twitter.kid: Make follow + unfollow
+       different.
+
+       * whoisi/widgets/templates/picasa.kid: Make follow + unfollow
+       different.
+
+       * whoisi/widgets/templates/weblog.kid: Make follow + unfollow
+       different.
+
+       * whoisi/widgets/templates/flickr.kid: Make follow + unfollow
+       different.
+
+       * whoisi/controllers.py (Root.followperson): Method now only adds
+       a person to follow, not a toggle.
+       (Root.unfollowperson): New method to stop following a person.
+       (Root.get_follow_text): New method that's used by the follow and
+       unfollow code to tell you how many people you're following.
+
+       * whoisi/model.py: Change set existence testing from if foo to if
+       foo is None.  That was a bug.  Also make add_person a little more
+       resistant to race conditions and make remove_person lossy.
+
+       * whoisi/static/javascript/follow.js: Re-do the way that we attach
+       and update following status.  We now have explicit follow and
+       unfollow classes instead of just a toggle.  We also update the
+       "Follow Person" with a "Working..." while it's working and update
+       everything on the page at once.
+
+       * whoisi/model.py (Follower.cache_people): Use a set constructor
+       with a list comprehension - might go faster this way.
+       (Follower.add_person): Only add a person to this follower if they
+       aren't already in the database.  This isn't completely free of
+       race conditions, but it will help.  If we're already following try
+       to return the current one.
+       (Follower.remove_person): Use .discard instead of .remove in case
+       we try and stop following more than once.  This won't generate an
+       exception.  Lossy, and that's fine.
+
+       * whoisi/utils/follow.py: Add a pile of documentation to the
+       follow filter and follow manager code.  But no code changes.
+
+2008-06-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/search.kid: Set up search template to
+       be used as a header at the top of pages.
+
+       * whoisi/templates/index.kid: Use a hand-created search widget
+       instead of the standard one.  Stick a big logo in there.
+
+       * whoisi/templates/person.kid: Remove std header.
+
+       * whoisi/templates/search.kid: Remove std header.  Add a nice
+       css-styled area that gives result info.
+
+       * whoisi/templates/follow.kid: Remove std header.
+
+       * whoisi/templates/nofollow.kid: Remove std header.
+
+       * whoisi/templates/addform.kid: Remove std header and use search
+       widget instead.  Add a description in bold.
+
+       * whoisi/templates/everyone.kid: Remove std header.
+
+       * whoisi/static/images/*.png: New logos and images for headers.
+
+       * whoisi/templates/master.kid: Get rid of the visibility stuff.
+       Return "no one" if we're not following anyone for the
+       num_friends_text page.  Change the friendslink sidebar to an
+       always visible sidebar.
+
+       * whoisi/controllers.py (Root.followperson): Return "no one" if
+       we're not following anyone.  Also don't bother returning
+       "still_following" to the calling script.  We're not using that
+       flag anymore.
+
+       * whoisi/static/css/style.css: Change the friendslink to
+       nav-sidebar since it contains a pile of stuff now.  Make sure the
+       logo-header is aligned to the bottom.  Add a search-results-info
+       blue background header. to separate it from the content and the
+       header at the top of the page.
+
+       * whoisi/static/javascript/follow.js: Change to follownum to
+       update.  Don't bother hiding and showing since we're always
+       showing now.
+
+2008-06-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/search.kid: Clean up titles and naming.
+
+       * whoisi/templates/follow.kid: Clean up titles and naming.
+
+       * whoisi/templates/addform.kid: Clean up titles and naming.
+
+       * whoisi/templates/everyone.kid: Clean up titles and naming.
+
+       * whoisi/templates/nofollow.kid: Clean up titles and naming.
+
+       * whoisi/static/txt/robots.txt (Disallow): Add /addform and
+       /search to the disallow list.
+
+       * whoisi/templates/search.kid: Put search results in the title.
+
+       * whoisi/templates/person.kid: Use the person's name in the title
+       of the page.
+
+2008-06-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/icons/favicon.ico: Updated favicon.
+
+       * sources/favicon.png: Mini favicon in png form.
+
+       * sources/favicon.ico: Mini favicon.
+
+       * sources/whoisi-icon.svg: Full logo.
+
+       * sources/whoisi-icon-mini.svg: Mini-icon - used for the favicon
+       and eventually for the extension.
+
+2008-06-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/config/app.cfg: Add static pointer to robots.txt.
+
+       * whoisi/static/txt/robots.txt: Add robots.txt.
+
+2008-06-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/preview_site.py (SiteHistoryFakePreview): Remove
+       some debugging spew.
+
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/follow.py: Remove debugging spew.
+       
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * prod.cfg: Make server.environment="production" so people don't
+       get random stack traces.
+
+       * whoisi/templates/nofollow.kid: Simple "you're not following
+       anyone" page.
+
+       * whoisi/controllers.py (Root.p): If looking up a person doesn't
+       work raise cherrypy.NotFound (a 404.)
+       (Root.l): If looking up a short link fails, return a 404.
+       (Root.follow): If we're not following anyone return a graceful not
+       following anyone error page.
+
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/search.kid: Get rid of the number on the next
+       and previous links - it wasn't accurate.
+
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/search.kid: Show the full number of results
+       when displaying search results.  Also make sure to add a link to
+       the add page when there are no results.
+
+       * whoisi/controllers.py (Root.search): Pass down how many search
+       results there were and where we are in that process.
+
+       * whoisi/static/css/style.css: Add a small text subheader for
+       under the search results.
+       
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/css/style.css: Add a b.search-result-header for
+       the search results page.
+
+       * whoisi/controllers.py (Root.search): Add page handling to the
+       search results.
+
+       * whoisi/templates/search.kid: Add page handling to the search
+       results page.
+
+       * whoisi/widgets/templates/person.kid: Remove the "this is me" and
+       "spam" until they are done.
+
+2008-06-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.addpersonconfirm): Make sure that
+       before allowing a person to be added that the old new_site is in a
+       "preview_done" state.  This should at prevent someone from adding
+       a person without having looked at a preview.
+
+       * smoketest.txt: Add a set of simple smoketests.
+
+2008-06-10  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/widgets.py (AliasesWidget): Add "other_names"
+       argument to the aliases widget.
+
+       * whoisi/controllers.py (Root.addpersonstatus): Add a
+       "feed_not_found" error handler.
+       (Root.nameremove): Add use fast_names_for_person to get the other
+       names and pass them down into the widget to render.
+       (Root.nameadd): Same.
+
+2008-06-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Use all the passed in data
+       to render site objects.
+
+       * whoisi/widgets/templates/twitter.kid: Use the passed in
+       other_names instead of generating it locally.
+
+       * whoisi/widgets/templates/picasa.kid: Use the passed in
+       other_names instead of generating it locally.
+
+       * whoisi/widgets/templates/personaddconfirm.kid: New confirm
+       widget to add a person.
+
+       * whoisi/widgets/templates/linkedin.kid: Support previews.
+
+       * whoisi/widgets/templates/aliases.kid: Use the passed in
+       other_names instead of generating it locally.
+
+       * whoisi/widgets/templates/weblog.kid: Passing down site history
+       means we don't have to figure it out here.  Support previews.
+
+       * whoisi/widgets/templates/flickr.kid: Passing down site history
+       means we don't have to figure it out here.  Support previews.
+
+       * whoisi/widgets/templates/personaddpickfeed.kid: New code to pick
+       a feed.  Stolen from the new site pick code.
+
+       * whoisi/widgets/widgets.py (PersonWidget): Pass down all the new
+       items by name.
+       (PersonAddPickFeedWidget): New widget to show a feed pick in the
+       preview screen.
+       (PersonAddConfirm): New widget to show a confirm question when
+       someone is ready to add a new person.
+
+       * whoisi/utils/sites.py (fast_sites_for_person): Fast query to get
+       sites for a person id.
+       (SiteFake): New fake site object.
+
+       * whoisi/utils/preview_site.py (convert_feed_to_fake_site):
+       Convers a feed from a preview into a fake site and site_history
+       objects.
+       (convert_linkedin_to_fake_site): Converts a linkedin preview query
+       to a simple fake site object.
+
+       * whoisi/utils/site_history.py (history_to_clusters): Remove some
+       debugging spew.
+
+       * whoisi/utils/names.py: New classes (NameFake) which is a fake
+       name object for a fast name lookup query.
+
+       * whoisi/utils/addperson.py: New code to look up either a simple
+       url or a complete feed and see if it's already in the database.
+       Tries to resolve / vs. no / issues.  Is case-insensitive which is
+       probably wrong.
+
+       * whoisi/utils/fast_history.py (fast_recent_changes_for_follower):
+       Use SiteHistoryFakeFollower instead of SiteHistoryFake (which is
+       now a root class that's used by a couple of other classes.)
+       (fast_recent_changes_for_everyone): Same.
+       (fast_site_history_for_site): Get site history for a specific site
+       except faster.
+       (PersonFake): Make the lookups here flat instead of using
+       sub-objects.
+       (SiteFake): Same.
+       (SiteHistoryFake): Same.
+       (SiteHistoryFakeBySite): Class that knows how to look up site
+       history info by name from a query.
+
+       * whoisi/utils/flickr.py (flickr_fill_thumbnails): Use entries
+       directly instead of passing in the entries and length to render.
+
+       * whoisi/templates/person.kid: New code to pass down all of the
+       various vars that are now passed in instead of rendered by the
+       site templates directly.
+
+       * whoisi/templates/search.kid: Don't include the new person form
+       directly, instead jump to the add person page.
+
+       * whoisi/templates/follow.kid: New code that passes down the
+       site_history object.
+
+       * whoisi/templates/addform.kid: New person add form.  Hard codes
+       some js files, probably shouldn't, but it works for now.
+
+       * whoisi/templates/everyone.kid: New code that passes down the
+       site_history object.
+
+       * whoisi/controllers.py (Root.search): Use fast search methods to
+       get people, names, sites, site_history and anything else we can.
+       (Root.addform): New method that returns a person add form.
+       (Root.addperson): First call to create a new person.  Has a
+       captcha that comes into it and will return state to kick off the
+       add person cycle.  Checks the URL to see if it's valid and also
+       checks the db for possible matches to the url that's passed in.
+       (Root.addpersonstatus): The heart of the cycle to add a new
+       person.  Checks for error state, if a preview is still in
+       progress, if someone needs to pick a url, if a preview is
+       complete, and will render a preview.
+       (Root.addpersonpick): Code that lets someone pick a url.  Creates
+       a new new_site request from the old one to get the master service
+       to pick it up.
+       (Root.addpersonconfirm): End of the add person progress.  Creates
+       a new person and sends back a message to redirect.
+       (Root.p): Clean up the new_site vs. site race condition.  Use fast
+       queries to render a person.
+       (Root.siterefresh): Use the new fast site history code.
+       (Root.getDisplayDepth): Add a fast lookup path for getting the
+       right depth to look up site history.  This is better than hard
+       coding it everywhere.
+       (Root.rendersite): New code that uses the separate site_history
+       instead of letting the actual site widget figure it out.  Much
+       cleaner.
+
+       * whoisi/search.py (fast_people_ids_by_name): Fast query that will
+       try and return a set of person_ids for a name query.  Not that
+       much faster than before, but somewhat.
+       (SearchService.prettifyName): Don't touch queries that include '@'
+       or ':'
+
+       * whoisi/static/css/style.css: Move out the width of the edit
+       wrappers to 540px to handle the personadd page.
+
+       * whoisi/static/javascript/addform.js: New JS code to handle the
+       person add form.  Basically includes an inner loop to cycle
+       through the process of adding a new person.
+
+       * services/command/controller.py: New classes for preview for
+       linkedin, picasa and feeds.
+
+       * services/command/newsite.py (NewSiteSetup.gotNewSite): New code
+       to handle getting urls for previews as well as for a new state.
+       This just kicks off the state process for a new site.
+       (NewSiteTryURL.getPreferredFeed): Support getting Flickr preferred
+       feeds.  Yay!
+       (NewSiteTryURL.feedLoadDone): Make sure to get the feed type after
+       the feed load is done.  We use it later in both the preview done
+       code and the new site done code.
+       (NewSiteTryURL.getFeedType): Moved here from the createsite code.
+       (NewSiteCreate.createSite): Use the site type from the state
+       instead of calling getFeedType().
+
+       * services/command/flickr.py: Change classes to allow us to pass
+       in a photo id into the class that will get the flickr thumbnail
+       address.  This lets us call it a few times from the preview code
+       instead of having to get it from the database and put it in the
+       state.
+       (FlickrPreviewThumbnails): This class will take a parsed feed and
+       get the thumbnails for every one that's in it.
+       (Flickr.isFlickrURL): New class that will return if a particular
+       url is a flickr url.
+       (Flickr.getPreferredFeed): Returns a preferred feed for flickr.
+
+       * services/command/previewsite.py (PreviewSiteDone): Class that
+       takes the data out of the state and shoves into the new_site
+       table.  Basically stores a feed, the type of feed and/or the
+       current state and saves it.  It tries to work around the mysql 65k
+       rowsize limit by limiting the number of entries to 6 in total.
+
+       * services/command/twitter.py (Twitter.isTwitterURL): Make sure to
+       return False if it's not a twitter feed.
+       (Twitter.getPreferredFeed): Spit out some debugging spew if a
+       preferred feed is detected.
+
+       * services/command/picasa.py (PicasaPreviewLoadFeed): A command
+       that takes the filename of the parsed picasa feed and shoves into
+       the state variable.
+
+       * services/command/linkedin.py (LinkedInPreviewSave): New code to
+       save a linkedin preview to the new_site table.
+
+       * services/master/previewsite.py: New master classes that handle
+       previewing sites.  Basically a copy of the newsite class for
+       masters as well.  Knows a little bit about how to turn some URLs
+       into specific requests.  (LinkedIn and Picasa, basically.)  Copies
+       lots of detection code from newsite.py and should probably be
+       merged into a single class at some point.
+       
+       * services/master/database.py (DatabaseManager.getPreviewSites):
+       New methods to handle getting preview sites out of the database
+       and dispatch them.
+
+       * controller-service: Implement the new preview commands.
+
+2008-05-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.l): Track the useragent field for a
+       clickthrough.
+
+       * whoisi/model.py (ClickThrough): Add a useragent field to the
+       click through.
+
+2008-05-19  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.l): Make sure that we look at the
+       X-Forwarded-For header for the remote ip because of the proxy
+       server.
+
+2008-05-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.l): Insert a record when someone
+       clicks to another site using our tinyurl-like scheme.
+
+       * whoisi/model.py (ClickThrough): New ClickThrough object/table
+       that tracks when people click through a tinyurl-style url.  Tracks
+       who did it, when they did it, what their IP was and if there was a
+       referer.
+
+2008-05-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+       
+       * whoisi/templates/master.kid: Add google analytics tracking code
+       to the master template.
+
+2008-05-14  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py: For the everyone and follower
+       functions sort the entries by time and then reverse them so that
+       we return a time-stable list of links.
+
+       * whoisi/templates/follow.kid: Pass the minimum number to the
+       start for the next page by walking the clusters.
+
+       * whoisi/templates/everyone.kid: Pass the minimum number to the
+       start for the next page by walking the clusters.
+
+2008-05-13  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/picasa.kid: Change _new to _blank.
+
+       * whoisi/widgets/templates/weblog.kid: Change _new to _blank.
+
+       * whoisi/widgets/templates/flickr.kid): Change _new to _blank.
+
+       * whoisi/utils/display.py (expand_href): Change _new to _blank.
+
+       * whoisi/utils/twitter.py (expand_user_ref): Change _new to _blank.
+
+2008-05-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py: Add optional start arg to the follow
+       item.  Use the fast recent changes method to get everything in one
+       call.
+
+       * whoisi/utils/fast_history.py: Add a fast history query for
+       people to load their history quickly.
+
+       * whoisi/templates/follow.kid: Add a More... link at the bottom.
+
+2008-05-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/fast_history.py: Some deep hacks that do a single
+       query with fake classes to get history for everyone.  Basically
+       loads everything we might need to display that data into fake
+       classes that resolve to array offsets in the data we loaded.
+
+       * whoisi/templates/everyone.kid: Include a More... link at the
+       bottom of the page to see more history for everyone.
+
+       * whoisi/controllers.py: Change everyone method to include an
+       optional start argument.  Use the new
+       fast_recent_changes_for_everyone call to get the history for
+       everyone.
+
+2008-05-12  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/download.py (localDownloadPage): Add user agent
+       for whoisi.
+
+2008-05-07  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/follow.py (add_person): Fix problem where the first
+       time you followed someone the link didn't show up in the browser.
+       Make sure to set the current follow in the cherrypy session after
+       we create the follower for the first time.
+
+       * whoisi/widgets/templates/twitter.kid: Use a short link for the
+       url to a twitter entry on the time display.
+
+       * whoisi/widgets/templates/picasa.kid: Use the short link for the
+       url to a picasa photo.
+
+       * whoisi/widgets/templates/weblog.kid: Use the short link for the
+       url to a weblog entry on the time display.
+
+       * whoisi/widgets/templates/flickr.kid: Use the short link for the
+       url to a flickr photo.
+
+       * whoisi/utils/display.py (short_link_ref): New function that
+       takes an id and returns a link to it.
+
+       * whoisi/utils/flickr.py (flickr_fill_thumbnails): Add the id to
+       what we return for a set of flickr items.
+
+       * whoisi/controllers.py: Add an 'l' method that takes a
+       hex-encoded link and redirects to an actual site.  How we drive
+       our tinyurl-like functionality.
+
+       * whoisi/static/css/style.css: Add short-link rules so that we can
+       make short links show as as black, not blue.
+
+2008-05-06  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py: Sigh.  Add unicode() calls to any widgets
+       that are rendered and passed back as part of a json result.  Work
+       around problems where utf-8 encoded byte strings were being
+       re-encoded as utf-8 and getting corrupted.
+
+2008-05-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * prod.cfg: server.webpath should not have been set to the full
+       website - just disable it now since our paths all start with '/'
+       anyway.
+
+2008-05-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Change /person refs to /p.
+
+       * whoisi/widgets/templates/twitter.kid: Change /person refs to /p.
+
+       * whoisi/widgets/templates/picasa.kid: Change /person refs to /p.
+
+       * whoisi/widgets/templates/weblog.kid: Change /person refs to /p.
+
+       * whoisi/widgets/templates/flickr.kid: Change /person refs to /p.
+
+       * whoisi/controllers.py: Change /person refs to /p.
+       
+2008-05-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+        * prod.cfg: Woot.  Config file that handles the proxy setup stuff
+       for running behind an apache reverse proxy.
+
+2008-05-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * html-feed-scrape-service (ScrapeProtocol.runCommand): Remove
+       finally: block that only closed a file that would automatically be
+       closed.  Needed for python 2.4.
+
+       * feed-parse-service (FeedParseProtocol.runCommand): Remove
+       finally: block that only closed a file that would automatically be
+       closed.  Needed for python 2.4.
+
+       * picasa-poll-service (PicasaProtocol.runCommand): Remove finally:
+       block that only closed a file that would automatically be closed.
+       Needed for python 2.4.
+
+       * services/command/setup.py (FileToStateCommand.doCommand): Remove
+       exception block that we don't need.  File objects clean themselves
+       up.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/flickr.py (FlickrUpdateDatabase.doCommand):
+       Oops, forgot to json encode the actual thumbnail location.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/clean_tmp.sh: Simple shell script that cleans up /tmp -
+       run it if you're refreshing websites or you'll run out of disk
+       pretty quickly.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * utils/convert-display-cache.py: Utility here for the sake of
+       history.  Converts the old flickr display_cache to the new
+       json-enabled one.  DO NOT RUN AGAINST A PRODUCTION DATABASE.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (FlickrCacheManager): Attach the
+       flickr cache error handler.
+
+       * services/command/flickr.py (FlickrCacheError): New class to
+       handle flickr errors when getting thumbnails. It now catches the
+       xmlrpclib.Fault that's generated when there's an error.  If it's
+       the "photo not found" or "permission denied" error then we save
+       that to the database for later processing.
+
+       * whoisi/utils/flickr.py (flickr_fill_thumbnails): Use the new
+       format for the display_cache - serialized json data.  If there's a
+       thumb var, set it.  If not, it's probably an error and just show
+       the grey box but don't indicate that it needs a refresh.  NULL
+       still means refresh later.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/worker.py (Worker.commandComplete): Get rid of a
+       pile of latency - start a new command after the last one completes
+       until waiting for the next tick.  Should process commands a lot
+       faster now.
+
+2008-05-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/sitelock.py: Remove debug spew.
+
+       * services/master/worker.py (WorkManager.reviveDeadWorkers): Fix
+       bug where connecting workers would end up in limbo because they
+       would be left off both the dead and connecting worker lists.  We
+       fix this by making sure that we put everything on the connecting
+       list before trying to process any of them and make sure the dead
+       pool is actually empty before processing.
+
+2008-04-28  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.feedLoadDone): Call
+       the done callback directly instead of creating the site object.
+       (NewSiteCreate): New class that does what the end of NewSiteTryURL
+       used to do.  It takes all the state from TryURL and creates the
+       site object.  No changes to the functional code, just wrap it in a
+       new command.  We're doing this to get ready for being able to
+       "preview" a URL when we add a new person.  We don't want to
+       actually create the site object in that case.
+
+2008-04-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/follow.py (FollowManager.follow_for_id): Make sure
+       to update the last_visit time for a user when getting the cookie.
+
+2008-04-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/javascript/person.js: Make sure that we don't load
+       a new captcha when removing a name or changing a primary name if
+       there's already one in progress.  Also make sure to return false;
+       from all of the handlers if we don't load the captchas otherwise
+       it tries to load urls.
+
+2008-04-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Changes for new name
+       editing code.  Get rid of the old text areas and stick with a
+       simple link that loads the form.
+
+       * whoisi/widgets/templates/nameupdate.kid: New widget for name
+       update.
+
+       * whoisi/widgets/widgets.py (NameUpdateWidget): New name update
+       widget.
+
+       * whoisi/controllers.py (Root.nameupdate): Handle captcha
+       failures.  Also return the new name when we're done.  Used by the
+       JS.
+       (Root.nameupdateform): Code to return an empty form to update the
+       name.
+
+       * whoisi/static/javascript/person.js: Add hooks to change name
+       based on a captcha + loaded form.  Remove code for old name update
+       system.
+
+2008-04-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/siteremove.kid: New form for removing a
+       site.
+
+       * whoisi/widgets/widgets.py (SiteRemoveWidget): Add
+       site_remove_widget.
+
+       * whoisi/controllers.py (Root.siteremove): Return an error if the
+       captcha didn't verify.
+       (Root.siteremoveform): New method that returns a form + captcha.
+
+       * whoisi/static/javascript/person.js: Add hooks to get a captcha
+       when removing a site from a person.
+
+2008-04-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/nameremove.kid: New form for removing a
+       name.
+
+       * whoisi/widgets/widgets.py (NameRemoveWidget): New name remove
+       widget.
+
+       * whoisi/controllers.py: Add new "nameremoveform" method that
+       returns a form for removing a name and also change "nameremove" so
+       that it checks for captcha validity.
+
+       * whoisi/static/javascript/person.js: Add hooks to the name remove
+       code so that it loads a form to load a name instead of just
+       removing it directly.
+
+2008-04-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/nameadd.kid: Don't use spans and add a
+       recaptcha widget.  Wrap with the nice person-edit-wrapper div so
+       it has a nice look.  Add error text and re-render newname if it's
+       there.
+
+       * whoisi/widgets/widgets.py (NameAddWidget): Add "error_text" and
+       "newname" as arguments to the nameadd widget.
+
+       * whoisi/controllers.py (Root.nameadd): Check captcha results
+       against service and return the form if there's an error.
+
+       * whoisi/static/css/style.css: Wrap name adds with a nice editing
+       div.
+
+       * whoisi/static/javascript/person.js: Add hooks to pass captcha
+       data when adding a new name.  Also support handling a non-done
+       status when adding a name and re-show the widget if there was a
+       captcha error.
+
+2008-04-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/siteadd.kid: Change the size from 50 to
+       45 so it doesn't overrun on ff3/linux.
+
+2008-04-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.followperson): Fix bug where
+       stopping following a person wasn't working.  Need to pass the
+       person id to follow.is_following_person() not the whole person
+       object.
+
+2008-04-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/siteaddpickfeed.kid: Use
+       site-add-wrapper div so it has a nice look.
+
+       * whoisi/widgets/templates/recaptcha.kid: New widget to display a
+       recaptcha.  Not used yet.
+
+       * whoisi/widgets/templates/siteadderror.kid: Use the
+       site-add-wrapper div so it has a nice look.
+
+       * whoisi/widgets/templates/siteaddstatus.kid: Use the
+       site-add-wrapper div so it has a nice look.
+
+       * whoisi/widgets/templates/siteadd.kid: Re-do a lot of this so
+       that it uses the new yellow look, handles error text and includes
+       a target for a recaptcha ajax load.  Add a cancel button.
+
+       * whoisi/widgets/widgets.py (ExtJSLink): New class used for
+       external JS locations (instead of just local ones.)
+       (PersonWidget): Person now requires the recaptcha widget.
+       (SiteAddWidget): New param for this widget: error_text.
+       (RecaptchaWidget): New widget (not used yet.)
+
+       * whoisi/utils/recaptcha.py (recaptcha_check_fail): New function
+       to check if a recaptcha failed.  Returns None if there was no
+       error.
+
+       * whoisi/controllers.py (Root.siteaddpost): Check adding a site
+       vs. a recaptcha and return an error if it doesn't pass.
+
+       * whoisi/static/css/style.css: New div.site-add-wrapper class that
+       has a yellowish background with a border so we can tell where the
+       edges of the form are.  Don't use div.url-pick-list anymore so we
+       remove it from here.
+
+       * whoisi/static/javascript/person.js: Add onCaptchaError callback
+       to handle a "captcha_error" message from the server when getting a
+       status update.  Break out attaching add site form events into its
+       own function so we can do it from more than one place.  New
+       function "request_recaptcha" to load a recaptcha widget when we
+       add a new site form.  Add a cancel button so that we can stop
+       adding a new account.  Pass recaptcha info to the server when we
+       submit the form.  Make sure that if a captcha is in progress we
+       don't try and add more than one site at a time - the recaptcha
+       interfaces don't allow it.
+
+2008-04-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/display.py (confirm_escape): Deep hack that needs
+       to be fixed.  URLs are everything until a spare or the end of the
+       line.  Fixed a twitter entry like http://google.com&lt; that would
+       end up generating invalid HTML.
+
+2008-04-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/twitter.py (expand_user_ref): Add the '-' symbol to
+       the list of characters we expand for urls.
+
+2008-04-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/search.py (SearchService.peopleByName): Support searching
+       aliases.  This includes looking for exact and partial matches in
+       aliases and searching on the right hand side of a group alias.
+       You can also do something like mozilla: and see an entire group.
+
+2008-04-20  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_picasa.py: Make sure to test for the
+       "page_not_found" error is set when we failed to find a site.
+
+       * tests/twisted/network/test_newsite.py: Add new conditions to
+       tests to make sure that we can tell the difference between a page
+       not found or an invalid feed or a feed not found on a page.
+
+       * tests/twisted/network/test_linkedin.py: Add new conditions to
+       tests to make sure that error codes are properly set when we fail
+       to add a new linkedin page.
+
+       * whoisi/widgets/widgets.py (SiteAddErrorWidget): New site add
+       error widget.
+
+       * whoisi/controllers.py (Root.person): Make sure that we don't
+       load errors or done new sites when we look for sites in progress.
+       (Root.siteaddstatus): Return errors when we fail to add a new
+       site!
+
+       * whoisi/static/css/style.css: Add new div.error type that draws
+       text as red.
+
+       * whoisi/static/javascript/person.js: Add a handler when we get an
+       "error" status so that we log something reasonable.
+
+       * services/command/exceptions.py (FeedNotFoundError): Add another
+       error type that we'll use to distinguish between not finding a
+       feed and not being able to parse a feed.
+
+       * services/command/newsite.py (NewSiteError.handleError): Handle
+       various exceptions and try and put a code in the error field that
+       makes sense.
+
+       * services/command/picasa.py (PicasaPollFeed.parseError): Return a
+       PageNotFoundError() when there's an error so we can return it to
+       the user.
+
+2008-04-20  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/twitter.py (get_text): Add exception handler in
+       case twitter hands us an empty string and we try to parse it.
+
+2008-04-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/master.kid: Remove the auto-focus code from the
+       master template and move it into the index page - which is the
+       only place we want it.
+
+       * whoisi/templates/index.kid: See above.
+
+2008-04-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/flickr.kid: Use personID instead of
+       person.id.  Also pass the id to is_following_person() instead of
+       the full person object.
+
+       * whoisi/widgets/templates/weblog.kid: Use personID instead of
+       person.id.  Also pass the id to is_following_person() instead of
+       the full person object.
+
+       * whoisi/widgets/templates/picasa.kid: Use personID instead of
+       person.id.  Also pass the id to is_following_person() instead of
+       the full person object.
+
+       * whoisi/widgets/templates/twitter.kid: Use personID instead of
+       person.id.  Also pass the id to is_following_person() instead of
+       the full person object.
+
+       * whoisi/widgets/templates/person.kid: Use personID instead of
+       person.id.  Also pass the id to is_following_person() instead of
+       the full person object.
+
+       * whoisi/utils/site_history.py:
+       (get_recently_changed_site_history_for_follower): Vastly improve
+       the queries that we do by doing a single query to collect all the
+       data instead of lots of little ones.  Also use SiteHistory.siteID
+       instead of SiteHistory.site.id which was triggering lots of extra
+       queries.
+
+       * whoisi/utils/follow.py: Updates to use the new Follower
+       interfaces in the model - pass more through to the model.
+
+       * whoisi/model.py: Add code to the Follower object to cache the
+       people who are associated with the follower.  Also include
+       interfaces to add and remove people to the follower - use these so
+       that the cache is coherent.
+
+2008-04-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/config/app.cfg: Change the identify failure url to
+       '/userlogin' instead of '/login'.
+
+       * whoisi/utils/follow.py (login): New method that sets the current
+       follower on the request method and sets the cookie.
+
+       * whoisi/templates/login_not_found.kid: New error page if a login
+       isn't found.
+
+       * whoisi/controllers.py: Move 'login' and 'logout' methods to
+       'userlogin' and 'userlogout' since we're not using the identity
+       methods for the main login.  Add new 'login' method that looks for
+       a private key on the command line and calls the follower login
+       method if it's connected to a method.  From there it redirects to
+       the '/follow' page.  If the follower isn't found it falls back to
+       an error page.
+
+       * whoisi/static/css/style.css: Add a style for h2.error - make it
+       red.
+
+2008-04-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Use the <span> tag for the
+       link-action links instead of putting the class on the links
+       themselves.
+
+       * whoisi/static/javascript/person.js: Change add site link to
+       include the span that includes it now when loading the new form.
+       Also the same when we're done loading the new site and also when
+       we're editing aliases on the person edit page.
+
+       * whoisi/templates/person.kid: Don't show the search form on the
+       person edit page.  It's just confusing.
+
+2008-04-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Fix how we get entries.
+
+       * whoisi/widgets/templates/picasa.kid: Fix how we get entries.
+
+       * whoisi/widgets/templates/weblog.kid: Fix how we get entries.
+
+       * whoisi/widgets/templates/flickr.kid: Fix how we get entries.
+
+2008-04-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Support follow display
+       type.
+
+       * whoisi/widgets/templates/picasa.kid: Support follow display
+       type.
+
+       * whoisi/widgets/templates/weblog.kid: Support follow display
+       type.
+
+       * whoisi/widgets/templates/flickr.kid: Support follow display
+       type.
+
+       * whoisi/templates/follow.kid: Pass in the "follow" display type
+       instead of time.
+
+2008-04-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Add code to show "stop
+       following" if you're following someone.
+
+       * whoisi/widgets/templates/weblog.kid: Make the follow
+       functionality work for weblogs.
+
+       * whoisi/widgets/widgets.py (PersonWidget): Pull in the new
+       follow.js code for the person widget.
+
+       * whoisi/utils/site_history.py:
+       (get_recently_changed_site_history_for_follower): Crazy code that
+       starts with a follower, gets the people they are following, gets
+       the sites owned by those people and then the recent site history
+       for each ones of those sites.
+       (get_recently_changed_site_history): Change the call to get the
+       recent site history for everyone to just use the added field
+       instead of the publish and update dates - much more reliable if
+       we're running constant refreshes.
+
+       * whoisi/utils/follow.py: The heart of the new follow code.  Most
+       of this is the FollowManger class which is a singleton class and
+       is called from multiple threads in the server.  It knows how to
+       look up cookie IDs and set cookies.  It also sets the per-thread
+       cherrypy.request.follow object that's used from inside of the web
+       server.  Access to follow information is all done through this
+       util class instead of through the web server.  A lot of this code
+       is borrowed from the turbogears visit code and even starts up a
+       thread to do caching.  We don't do that caching yet but we can if
+       it becomes an issue.
+
+       * whoisi/templates/follow.kid: New template for following.
+       Basically the same as the everyone page until we add person-based
+       lists as well as date-driven lists.
+
+       * whoisi/templates/everyone.kid: Just jam the follow.js script in
+       here for now.
+
+       * whoisi/templates/master.kid: Show the friendslink bit if the
+       person is following anyone.
+
+       * whoisi/controllers.py (follow): New method that is the landing
+       point for how you follow a set of people.  It uses the new
+       site_history methods and then collects them into clusters.  Same
+       as with the everyone page.
+       (Root.followperson): Method that lets you toggle if you want to
+       follow a person or not.  Only exposed through json.  Returns new
+       content and the number of people that are being followed.
+
+       * whoisi/model.py (Visit): Make sure that we're using utcnow() for
+       the created date.
+       (Group): utcnow for created.
+       (User): utcnow for created.
+       (Follower): New class that describes someone who follows someone
+       else.  Includes hashes for the cookie, a private and public key,
+       the ability to store an email address and if they are associated
+       with a particular person on the site.  Also track an exprires date
+       and when they were created.  Also includes some classmethods for
+       looking up by the various key types.
+       (FollowPerson): New class that maps a follower to people they are
+       following.
+
+       * whoisi/static/javascript/follow.js: Javascript file to handle
+       following functionality.  Simple classes and ajax.
+
+       * start-whoisi.py: Add hooks to the startup and shutdown points in
+       the server so we can register our global cherrypy filter to catch
+       and set the following cookie.
+
+2008-03-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/controllers.py (Root.everyone): Pass the search_widget in
+       as a param to trigger jquery getting included.  Ugh.
+
+       * whoisi/templates/everyone.kid: Don't need to include the search
+       widget from here since it's passed in from the controller now.
+
+2008-03-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/twitter.py (expand_user_ref): Add _ to the
+       characters to expand for a twitter user.
+
+2008-03-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/summary.py (SummaryCreator.output): Call confirm_escape()
+       on text nodes we output.  Fix val's blog and havoc's blog that
+       included & characters.
+
+2008-03-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/newsite.py (NewSite.startProcess): Bug fix.
+       Adding new linkedin sites wasn't working because we were passing
+       the wrong command to the contoller process.  The controller
+       process wasn't returning an error when we did that and was
+       silently returning success.
+
+       * services/command/exceptions.py (BadCommandException): Exception
+       that's derived from pb.Error so we can throw it across processes.
+
+       * controller-service (Controller.remote_doCommand): Make things a
+       little more robust by using if/elif instead of just if and fall
+       through to throwing an exception if the master passes down a bad
+       command.
+
+2008-03-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (SiteHistory): Change getLastTouched to check
+       the time reported by a feed against the added time in case clock
+       skew pushes entries into the future.  This happens in the real
+       world.  I AM LOOKING RIGHT AT YOU ZE FRANK.
+
+2008-03-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Add code that expands href
+       and @something style references.
+
+       * whoisi/utils/display.py (expand_href): Code that expands
+       http://foo urls for use on twitter.
+
+       * whoisi/utils/twitter.py (expand_user_ref): Utility that expands
+       @something references for twitter pages.
+
+2008-03-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * master-service: Code to do automatic refreshes is checked in.
+       Add a -r or --refresh to schedule refreshing sites.
+
+       * services/master/refreshmanager.py: New file that manages when we
+       refresh sites.  It will schedule any site that hasn't been
+       refreshed on startup for immediate refresh.  Any sites that have
+       been refreshed in the last half an hour will be scheduled for a
+       refresh at a random time in the next half an hour.  Not the most
+       efficient code in the universe but it should work well for the
+       time being.
+
+2008-03-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Everything I just said is
+       a lie.  Sometimes twitter data is escaped, sometimes it is not.
+       Yay for the intarweb.
+
+       * whoisi/utils/display.py (confirm_escape): Add gt + lt to the
+       list of things we look for before escaping.
+
+2008-03-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Twitter entries are
+       already escaped so don't escape them again.  Fixes things like
+       '<8' and '->' in twitter displays.
+
+2008-03-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/search.kid: Add a nice "everyone" and
+       "random" link to the search widget.
+
+       * whoisi/widgets/templates/twitter.kid: Code to support time-based
+       display based on entries that are passed in.
+
+       * whoisi/widgets/templates/picasa.kid: Code to support time-based
+       display based on entries that are passed in.
+
+       * whoisi/widgets/templates/weblog.kid: Code to support time-based
+       display based on entries that are passed in.
+
+       * whoisi/widgets/templates/flickr.kid: Code to support time-based
+       display based on entries that are passed in.
+
+       * whoisi/widgets/widgets.py: Pass in 'display_entries' to each of
+       the site widgets.
+
+       * whoisi/utils/site_history.py: New file to help generate site
+       history.  Code that will generate a list of site history newer
+       than a passed in date.  Also code that will take a list of site
+       history objects, order them based on their age and cluster them to
+       the site they belong.
+
+       * whoisi/utils/flickr.py (flickr_fill_thumbnails): Pass in the len
+       and the entries so that we can treat the list as an array.
+
+       * whoisi/templates/person.kid: Use the search widget so we get the
+       nice random/everyone links under the box.
+
+       * whoisi/templates/everyone.kid: New template that returns an
+       "everyone" page.  Right now it's pretty brute force, doesn't
+       pageinate.  But it's a start.
+
+       * whoisi/controllers.py (Root.everyone): New everyone url that
+       will return all the activity for the last day.  Not very fast or
+       complete, but it works reasonably well.
+       (Root.random): New method that returns a random person.  Fun!
+
+       * whoisi/model.py (Person.getRandom): New method that returns a
+       random person id.
+       (SiteHistory.getLastTouched): Better way of calculating the last
+       touched value.  Should be faster, too.
+
+       * services/master/newsite.py (NewSite.pollDone): Fix bug where a
+       new flickr add wasn't triggering a flickr thumbnail refresh.
+
+2008-03-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/linkedin.py (LinkedInCreateCommand.doCommand):
+       Fix a bug test cases found!  Make sure to set the "type" in the
+       state so that we can return it to the master service once a new
+       site is added.
+
+       * services/command/picasa.py (PicasaCreateCommand.doCommand): Fix
+       the same bug in the picasa code.
+
+2008-03-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * master-service (MasterService.start): Create a
+       SiteLock() (site_lock) object during startup.
+
+       * services/command/controller.py (ProtoManager.succeeded): Make
+       sure to pass through any return values that might be returned by a
+       command.
+
+       * services/command/newsite.py: Return the new feed type and
+       site_id when returning from a new site addition.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.done):
+       Return the site_id when we're done.
+
+       * services/master/database.py: Only do a full scan for unfinished
+       flickr photos once at startup.  Pass down the site_id for
+       refreshes.
+
+       * services/master/feedrefresh.py: Subclass from Command.  Also
+       refresh flickr images for a flickr site if there was a change.
+
+       * services/master/picasa.py: Subclass from Command.
+
+       * services/master/newsite.py: Subclass from Command.  Also, if we
+       add a flickr site kick off a new flickr refresh command instead of
+       expecting the master process to find it by polling everything.
+       Also support return values from the controller that's running the
+       job thanks to the change to perspective broker (yay, return
+       values!)
+
+       * services/master/sitelock.py: New code that maintains a global
+       lock on which site has work being done in it.  It's attached to
+       the master process + object.  Uses a simple set to maintain which
+       sites currently have jobs attached to them.
+
+       * services/master/linkedin.py: Subclass from Command.
+
+       * services/master/flickr.py: Subclass from Command.
+
+       * services/master/worker.py Lots of changes to allow the Command
+       class to be a base class for commands on the backend.  Also
+       supports locking for sites so that we don't run more than one
+       command per site at the same time.  Also support return values to
+       the caller for a command.  We also support losing connections from
+       the perspective broker code and restarting jobs.  Important note:
+       re-adding to the worker queue happens from the lost connection
+       handler, not from the error code for an individual job where an
+       error is ignored.  Otherwise we can end up with more than one copy
+       of a job in the queue.
+
+2008-03-18  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py: Simple changes to support using
+       deferreds instead of a line-based protocol.
+
+       * services/master/worker.py: Tons of work here to use perspective
+       broker instead of line-based protocols.  Get rid of the
+       WorkerProtocol and use standard deferreds instead.  Gets the root
+       object from the worker and makes calls to a remote doCommand
+       method.  No change to the arguments or formats yet - that will
+       come later when we want to restructure a bit.  (Should move to a
+       method per type at some point.)  Support the worker going away by
+       catching pb.PBConnectionLost when a command returns an error.
+       Also catch bp.DeadReferenceError when we callRemote() to the
+       remote end (untested.)
+
+       * controller-service: Refactor code to return deferreds directly
+       and be a perspective broker service instead of having a custom
+       line-based service.  Much more sane now.
+
+       * services/protocol/controller.py: Removed because we replaced it
+       with the perspective broker code.
+
+2008-03-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.doCommand): Bug fix -
+       make sure that we pass the url as a str(), not unicode to make
+       sure that the DownloadCommand doesn't freak out.
+       (NewSiteTryURL.startSecondDownload): Same with not passing
+       unicode.
+       (NewSiteTryURL.getFeedType): Call urlparse.clear_cache() after
+       parsing the url - workaround as usual.
+
+       * services/command/twitter.py (Twitter.isTwitterURL): Bug fix -
+       call urlparse.clear_cache() to work around bugs in urlparse
+       module.
+
+       * whoisi/model.py (Person): Change the other_names to a
+       MultipleJoin() instead of being a RelatedJoin().  Just easier to
+       work with, even if you end up with more data.
+       (Name): Same as with Person - change to a ForeignKey() instead of
+       a RelatedJoin().
+
+       * whoisi/static/javascript/person.js: New support for js methods
+       to support name editing and alias addition and removal.
+
+       * whoisi/static/css/style.css: New style for div.other-names that
+       is for the alias display.
+
+       * whoisi/controllers.py: Support an optional 'mode' argument for
+       the person display method.  Right now it's either 'edit' or
+       defaults to 'full'.  New siteremove method.  New nameupdate
+       method.  New nameremove method.  New nameaddform method.  New
+       namedadd method.  All for supporting name editing and aliases.
+
+       * whoisi/templates/person.kid: Pass down the display value from
+       the controller so we know if we're editing or not.  Also don't
+       show the search form if we're editing a person.
+
+       * whoisi/widgets/widgets.py: New name_add_widget and
+       aliases_widget.
+
+       * whoisi/widgets/templates/person.kid: Lots of changes to support
+       editing of a person entry.  Remove the wikipedia entry that was
+       commented out.
+
+       * whoisi/widgets/templates/nameadd.kid: New form for editing a
+       person's primary name.  Only used during editing.
+
+       * whoisi/widgets/templates/aliases.kid: New widget to display
+       aliases - supports editing or display.
+
+       * whoisi/widgets/templates/twitter.kid: Support for 'edit'
+       display.
+
+       * whoisi/widgets/templates/picasa.kid: Support for 'edit' display.
+
+       * whoisi/widgets/templates/linkedin.kid: Support for 'edit'
+       display.
+
+       * whoisi/widgets/templates/weblog.kid: Support for 'edit' display.
+
+       * whoisi/widgets/templates/flickr.kid: Support for 'edit' display.
+
+2008-03-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * lib/feedparser.py: Patch that fixes <media:title> entries
+       overwriting global <title> setting.  Also see
+       http://code.google.com/p/feedparser/issues/detail?id=18
+
+2008-03-11  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * patches/feedparser-title.patch: Patch that fixes feeds with that
+       include <media:title> (like stuart.)
+
+       * patches/README: Description of patches.
+
+       * lib/feedparser.py: Add a local copy of feedparser 4.1 so we can
+       add some patches.  Sigh.
+
+       * feed-parse-service: Use the local copy of feedparser.
+
+2008-03-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/tests/one_entry.atom: New test feed with one item
+       in it - cribbed right from Wikipedia (so we know it's valid -
+       right?)
+
+       * whoisi/static/tests/relative_feed.html: New test page with a
+       relative URL to a feed.
+
+       * tests/twisted/network/test_newsite.py: New test that adds a feed
+       with a relative URL.
+
+       * services/command/newsite.py: Relative URL handling for feeds!
+       Yay!  Found an awesome bug in the urlparse code in python related
+       to the cache (basically it would return unicode typed strings when
+       parsing a non-unicode string in some cases.)  Also clean up the
+       way that we detect if a page has is a valid html file and how we
+       deal with it if it's not.
+
+       * services/command/download.py: Print out the url being downloaded
+       - useful for debugging!
+
+2008-03-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * html-feed-scrape-service: Only pull out link information if
+       we're inside the <head> element.  This keeps us from accidentally
+       picking up link information from other parts of the
+       document (especially if it happens to be an rss feed!) and leaving
+       us with false elements.  Frank Hecker's blog triggered this one. [
+       NEEDS TEST CASE. ]
+
+2008-03-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/summary.py: Code that probably needs test cases more than
+       anything else in the entire universe.  When calling output make
+       sure to call end_block() so that any leftover pieces are shoved
+       into a block.  This fixes display problems on spot's blog and
+       doesn't seem to affect other sites that I looked at (about 60 of
+       them in my personal database.)
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/worker.py: Add some cheap timings to each
+       command so we can start to get a sense of how long it takes to
+       process a job.
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (RefreshManager): Convert to
+       using DownloadCommand instead of FeedDownloadCommand.
+
+       * services/command/feedparse.py: Get rid of the
+       FeedDownloadCommand command and convert the one caller to use the
+       standard DownloadCommand.  It had awful error handling and wasn't
+       needed anyway.
+
+       * services/master/worker.py: Add very simple rate limiting to how
+       many jobs we send to a worker.  Limited to 10 jobs at once for
+       now.
+
+       * tests/twisted/network/test_feedrefresh.py: Add two more tests -
+       timeout and a 404 test.
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/linkedin.py (LinkedInUpdateCommand.insertDone):
+       Make sure to update the last_update field in the database whenever
+       there's a change in the data.
+
+       * tests/twisted/network/test_linkedin_refresh.py: Add checking for
+       update of lastUpdate on the model whenever there's an update of
+       the linkedin current data.  This should also fix the spurious
+       errors that happened when running some of the linkedin tests as
+       well.
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (PicasaRefreshManager): Add the
+       RefreshSiteError error handler.
+       
+       * tests/twisted/network/test_picasa_refresh.py: Add two test cases
+       for refreshing picasa - a success and a failure.
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (RefreshManager): Add the
+       RefreshSiteError error handler.
+
+       * tests/twisted/network/test_feedrefresh.py: Two tests added for a
+       feed refresh: one success and one failure.
+
+2008-03-08  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/javascript/person.js: Changes to support adding
+       the person from the header instead of from the bottom of a
+       person's display.  This actually simplifies the code a bit.  Also
+       fix a bonus bug I ran across where the site-add-pick code was
+       being run because I was checking for non-null instead of non-zero
+       array len.
+
+       * whoisi/controllers.py (Root.search): Pass down the person_widget
+       as one of the arguments.  (Can probably do this directly from the
+       person widget, but we'll fix all that up later.)
+
+       * whoisi/templates/search.kid: Instead of using our own home-grown
+       person display use the person widget and just pass down the
+       display type.  This also means we can add new sites directly from
+       the search page.
+
+       * whoisi/templates/person.kid: Add a display="full" argument to
+       the person widget when we're displaying the person.
+
+       * whoisi/widgets/widgets.py (PersonWidget): Add a "display"
+       argument to the person widget.
+
+       * whoisi/widgets/templates/person.kid: Move the "Add a New Site"
+       link to the top of the person display widget so that it's more
+       visible.  This also means that when you're adding a new site that
+       the editing is done at the top of the entry instead of the bottom
+       which feels much more natural.  Also add support for passing in
+       the display type so we can re-use this template from the search
+       page.  Fix the display of the person so it links to the actual
+       person's page instead of to nothing.
+
+2008-03-07  Bryan W Clark  <clarkbw@gnome.org>
+
+       * services/command/service.py (SubService): Removed the hardcoded
+       home directory "/home/blizzard/src/whoisi/" and did a path lookup
+       using the current flie.  We need the root directory so we have to
+       join the current file with 2 ".."'s.  Also needed to use the
+       os.path.join in the start function, replacing the self.dir +
+       self.name since now the self.dir doesn't have a needed trailing
+       slash for directory addition.
+
+2008-03-07  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL): Change the way that
+       we finish loading a feed.  In the case where someone gave us a url
+       that turned out to be html and we scraped out a feed we now handle
+       downloading that feed from NewSiteTryURL directly.  We used to
+       pass that off to another command, but that meant we created a
+       half-finished Site object which could leave a mess behind.  Now we
+       don't create the Site object unless we know that we got a valid
+       URL and a valid RSS feed.  Also clean up the way that we set "url"
+       and "feed_url" so that it's consistent before we hit tryFeed
+       and/or createSite.  This fixes not only the site creation bug but
+       also means that we properly return errors whenever someone passes
+       us an invalid URL or an invalid/unreadable feed.  Yay!
+
+       * services/command/controller.py (NewSiteManager): Remove the
+       FeedDownloadCommand - we do that from inside of NewSiteTryURL
+       instead.
+
+2008-03-06  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (Site.getOrderedHistory): Add a second sort that
+       will sort by the 'touched' column.  This is for blogs (I AM
+       LOOKING AT YOU LUKE MACKEN) that don't have any date information
+       in their feeds.
+
+2008-03-06  Christopher Blizzard  <blizzard@0xdeadbeef.com>    
+
+       * services/command/newsite.py (NewSiteTryURL.loadDone): Use an
+       exception to generate the need_pick signal when adding a new site.
+       This will use the new exception and pass the feed data up to the
+       handler through the exception's data member.  Remove the
+       needPick() method and handlers as they are not needed anymore.
+       (NewSiteError.handleError): Add code to handle the need pick
+       exception and update the new_site table with the right data.  Also
+       add a different callback that will return success from the error
+       handler.  (Confused yet?)
+
+       * services/command/base.py (CommandManager.subSuccess): Sweep away
+       the final remnants of the Old Republic by removing the state
+       "stop" and "error" checks.  If we want to stop or generate an
+       error we use exceptions.  Like adults.
+
+       * tests/twisted/network/test_newsite.py: New test for returning
+       multiple feeds and making sure they are valid.
+
+2008-03-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/flickr.kid: Add size information to the
+       image so that there's some feedback before images finish loading
+       and it doesn't just look like a blank empty space.
+
+       * whoisi/widgets/templates/picasa.kid: Same.
+
+2008-03-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/static/tests/*: Add some files for testing errors in the
+       new site code.
+
+       * services/command/controller.py (NewSiteManager): Add the error
+       handler to the new site code.  Yay!
+
+       * services/command/exceptions.py (InvalidFeedError): New exception
+       for an invalid exception.
+
+       * services/command/newsite.py: Convert old state["error"] style
+       error handling to the new twisted errback handling.  In the
+       process fix a couple of bugs.  Some errors like an invalid url
+       actually return an error now.  Invalid RSS needs work as the test
+       cases reveal.
+
+       * tests/twisted/network/test_newsite.py: Add some tests for
+       loading an page not found, a test for loading a page with no feed
+       in it, a test for a link to a feed that's invalid and a direct
+       link to an invalid feed.  Right now the html page -> invalid test
+       still fails because it turns out the code is broken (and it's in
+       the todo list.)
+
+2008-03-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/twitter.kid: Change the order of
+       display so that the "7 minutes ago" comes after the message, just
+       like every other type of element.
+
+2008-03-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/display.py (confirm_escape): Add html entities to
+       the possible escape list - see shaver's blog.
+
+2008-03-04  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.getFeedType): Fix bug
+       where I wasn't passing the url to isTwitterURL - breaking adding
+       new sites.
+
+2008-03-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/nose/test_newsite.py: Add simple tests that test the
+       preferredURL handling for twitter.
+
+       * services/command/newsite.py (NewSiteTryURL.loadDone): Add hooks
+       to get the preferred feed when there's a list of feeds.
+       (NewSiteTryURL.getPreferredFeed): Actual code that picks the right
+       feed.  This is the hook where we should add version stuff too.
+       Most feeds have common names (Atom, Atom 1.0, RSS, RSS 0.9) and
+       they are all very regular - should be easy to figure out the feed
+       we should be using automatically.  We could also just drop
+       anything with the word "comment" in it.
+
+       * services/command/twitter.py: New file for twitter-related
+       command stuff.  Right now it just contains a helper class that
+       lets you detect if you're looking at a twitter url and a class
+       that will pick out the preferred feed from a list of twitter
+       feeds.
+
+2008-03-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/utils/display.py (confirm_escape): New function that
+       looks at a string and makes sure it's escaped.  Yes, this should
+       go in on the backend but a python stack trace is always sad-making
+       no matter what.
+
+       * whoisi/widgets/templates/weblog.kid: Make sure that urls and
+       titles are escaped.
+
+2008-03-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_download.py: New 307 test case.
+
+       * services/command/feedparse.py (FeedDownloadCommand.doCommand):
+       Use localDownloadPage instead of the one in the twisted web
+       client.
+
+       * services/command/download.py: Make a localDownloadPage method
+       that's a copy of downloadPage found in the twisted web client api.
+       We need to do this because we need to add a 307 handler.  Phik's
+       blog for some reason returns a 307 for the feed and it was just
+       erroring out.  As Joe points out, that's probably not valid for a
+       HTTP/1.0 request like twisted makes, but whatever.  We should
+       handle it.
+
+2008-03-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.getFeedType): Fix bug
+       where urls like 'flickr.com/photos/...' weren't being recognized
+       as flickr feeds.
+
+2008-03-03  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/widgets/templates/person.kid: Make sure to pass the
+       display="full" to the widgets.
+
+       * whoisi/widgets/templates/twitter.kid: Display param.
+
+       * whoisi/widgets/templates/picasa.kid: Display param.
+
+       * whoisi/widgets/templates/weblog.kid: Display param.
+
+       * whoisi/widgets/templates/linkedin.kid: Display param.
+
+       * whoisi/widgets/templates/flickr.kid: Display param.
+
+       * whoisi/widgets/widgets.py: Change all the widgets to support the
+       "display" param.  (Probably not needed, but it's here anyway.)
+
+       * whoisi/widgets/templates/personadd.kid: Move code to render the
+       person add text from this widget as it's used in the search with
+       results as well as without now.
+
+       * whoisi/templates/index.kid: -> whoisi.com
+
+       * whoisi/templates/search.kid: Lots of code and look changes so
+       that the search results are pleasing.  We render sites for people
+       now using the new display="search" param to a site render widget.
+       I also added code that toggles the "Add X not found here" add
+       field so we can add people from the search page.
+
+       * whoisi/controllers.py: Add code that passes down a
+       "display=full" to sites when they are being rendered from a new
+       site result.  Otherwise we just get the summaries when we've added
+       a new site.
+
+2008-03-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_feedparse.py: Add a pile of notes
+       about what we need to add here for tests.
+
+       * tests/twisted/local/test_feedparse.py: Two tests that test at a
+       very high level parsing and updating the database.  Right now
+       there are some simple assertions to make sure that data is being
+       put into the database and that the right number of entries is
+       there.  Not comprehensive at all.  Also contains a test with a
+       feed without IDs in it to test that code in the feedparser.py
+       module.  Similarly incomplete, but it did find some bugs in the
+       code I wrote.  Data for this test in the data/ dir.
+       
+       * whoisi/model.py: Add a note about what lastUpdate means to the
+       Site object.  Add a 'touched' value to the SiteHistory object.
+
+       * services/command/database.py: We no longer catch database error
+       when they happen and try to reconnect to the database.  The
+       underlying bindings seem to do this whenever the server goes away
+       anyway.  And it didn't work.
+
+       * services/command/feedparse.py: Lots of changes here with a few
+       goals: we only update items when there's an actual change.  We
+       also set a 'touched' timestamp on a site_history item when we make
+       an insert or update on it.  We also support feeds that don't
+       include IDs (which would have broken us _badly_.)  Also should be
+       a lot more robust in a lot of areas.  Also, when we're doing a set
+       of inserts + updates we also handle errors at the database level a
+       lot more gracefully.  Had to re-read the docs on DeferredList a
+       few times.  In the past last_update on the site object used to be
+       compared to the feed last update and we would use it to skip the
+       entire update process.  I don't trust feeds to do that right, so
+       we're using it instead as a "last time something changed on the
+       site" timestamp.  We only update it when we do an insert or update
+       on the site_history.
+
+2008-02-27  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_picasa.py: New tests for picasa
+       additions.  Just a simple success + failure test for now.
+
+       * services/command/feedparse.py: Just some formatting cleanups.
+
+       * services/command/service.py (ParseProcess.lineReceived): Make
+       sure to return twisted failure objects instead of strings.  If we
+       got a string that we recognize from the subprocess, don't keep
+       looking for matches.  If an error returns an arg, pass it along
+       with the exception.
+
+       * services/command/picasa.py (Picasa.userForPath): Remove dead
+       code that had a return before it - it was never called.
+
+       * services/command/newsite.py (NewSiteError.handleError): Fix bug
+       where we were adding the callback to the wrong deferred.  (How did
+       this ever work?)
+
+       * services/command/exceptions.py (ServiceSubprocessError): New
+       exception that's thrown when we have a subprocess failure of some
+       kind.
+
+       * services/command/controller.py (NewPicasaManager): Add the
+       NewSiteError error handler.
+
+2008-02-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand): Bug
+       fix.  We added some columns to the site_history table and we do
+       have a bit of code that does select * and then picks off data by
+       offset.  So flickr photos were coming up with bogus data in their
+       display cache.
+
+2008-02-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/network/test_linkedin_refresh.py: Add a check to
+       make sure that the lastPoll value in the model is set after we do
+       a refresh.
+
+       * services/command/siterefresh.py (RefreshSiteDone): Add code so
+       that we update last_poll in the database after we're done with a
+       refresh.  Needs site_id set in the state, which all of the refresh
+       commands do.
+
+2008-02-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/linkedin.py: Convert our update command to use
+       "site_id" instead of "id" in the state so we can use it later to
+       update the database.
+
+2008-02-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/feedparse.py (FeedRefreshSetup): Take the site
+       refresh id and get the site id before finishing.  Also save the
+       site refresh id so that we can use it to update the database
+       later.
+
+       * services/master/database.py (DatabaseManager.startRefresh):
+       Change calling convention to just pass down the id, not the
+       site_id.
+
+       * services/master/feedrefresh.py (FeedRefresh.startProcess): Don't
+       update the database when we're done with a refresh - that's up to
+       the controller now.
+
+       * services/command/controller.py (RefreshManager): Use the
+       RefreshSiteDone() call to update our status when we're done with a
+       refresh.
+
+2008-02-24  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.createSite): Set the
+       "created" time when adding a site.
+
+       * services/command/picasa.py (PicasaCreateCommand.doCommand): Set
+       the "created" time when adding a site.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand): Set
+       the time for the "added" field when adding a site history entry.
+
+       * whoisi/model.py (Site): Add a "created" field to the site so we
+       know when it was added to the database.
+
+2008-02-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/controller.py (PicasaRefreshManager): Add the
+       RefreshSiteDone() command to the end of the chain - it updates the
+       state in the database.
+
+       * services/command/picasa.py: Use the site_refresh id instead of
+       the site id.  Requires looking up the site.  Store that id in the
+       state as site_refresh_id which is used when we're done with a
+       refresh to set the done or error state.
+
+       * services/master/database.py: Pass down the id of the
+       site_refresh, not the id of the site.
+
+       * services/master/picasa.py: Use site_refresh id instead of the
+       site id when telling the controller to start a refresh for picasa.
+
+2008-02-23  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/siterefresh.py: New file that contains some
+       commands related to refreshing sites.  RefreshSiteDone() and
+       RefreshSiteError() know how to update the site_refresh table with
+       the right status.
+
+       * services/command/controller.py (LinkedInRefreshManager): Add an
+       error handler.  Call RefreshSiteDone() once we're done with a
+       refresh to update the database with a "done" flag.
+
+       * services/command/linkedin.py (RefreshLinkedInSetup): Add another
+       call to the database to translate the site_refresh_id to the
+       site_id.  We save the site_refresh_id in the state because it's
+       used in the error and complete case once the linkedin refresh is
+       done.
+       (LinkedInScrapeCommand.doCommand): Testing hooks to test
+       add/change/delete when we parse linkedin entries.
+       
+       * services/master/database.py (DatabaseManager.startRefresh): Only
+       pass the id of the refresh, not the site id.
+
+       * services/master/linkedin.py: Change the way we start a linkedin
+       refresh to use the refresh id instead of the site id.  This way
+       the controller can update the status once it's done with something
+       useful (an error, for example) instead of it being done in the
+       master code.  Stop updating the site_refresh table from the
+       linkedin code.  Will do the other refreshes later.
+
+       * whoisi/model.py (SiteRefresh): Add an 'error' field so we can
+       save what kind of error we had.  Will be used later.
+
+       * tests/twisted/network/test_linkedin.py: Bug fix in
+       confirmCreateTimes() - make sure to assert that we have a site
+       before calling sync() on it.
+       
+       * tests/twisted/network/test_linkedin_refresh.py: Set of tests for
+       testing linkedin refreshes.  Simple tests so far that test a
+       simple success, a failure, no changes, an addition, and a
+       deletion.  Also checks to make sure that the current entry has not
+       changed and that the changelog that's created contains accurate
+       information.
+
+2008-02-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/local/test_newsite.py: New site test moved to the
+       network tests.
+
+       * tests/twisted/local/test_commandmanager.py: Tests for the
+       command manager.  Tests what happens if you raise an exception
+       from a command or throw an errback.  Has success tests as well.
+
+       * tests/twisted/network/test_download.py: Add a check for
+       RUN_LONG_TESTS before running the connection timeout test.  Will
+       display [SKIPPED] if it's being skipped.
+
+       * tests/twisted/network/test_newsite.py: One simple test that adds
+       a new site and tries to run it to completion.  Needs tons and tons
+       more tests.
+
+       * tests/twisted/network/test_linkedin.py: New tests that test
+       adding a new linkedin site.  Includes pretty good coverage, but
+       far from complete.  Tests a lot of error conditions as well by
+       poking errors into the command manager.
+
+       * runtests.sh: Set a variable RUN_LOG_TESTS=1 if you want a long
+       test to run (connection timeouts, etc.)
+
+       * services/command/controller.py (NewLinkedInManager): Add
+       NewSiteError as the error_handler on the manager.
+
+       * services/command/newsite.py (NewSiteError): New class that is
+       the error handler for newsite style commands.  Only used by the
+       linkedin new code so far.  Will set the "error" flag for a new
+       site based on the original id that was passed into the command
+       manager.
+
+       * services/command/download.py: the DownloadCommand now handles a
+       couple of test states that will cause test failures.  It will also
+       return a proper Failure object if a call or download fails.
+
+       * services/command/linkedin.py: Adding a new linkedin site now has
+       proper error checking and test cases.  Yay!  Lots of use of the
+       getTest(0 call to generate errors in a lot of places.  Inserting a
+       new site now properly sets the dates on the site object.
+
+       * services/command/exceptions.py: New file that contains
+       exceptions used by commands.  Just contains a PageNotFoundError
+       exception so far.
+
+       * services/command/base.py: Solid error handling for the
+       CommandManager class.  You can now set a error_handler on the
+       command manager that will be called when there's an exception or
+       failure.  We now properly handle direct exceptions from doCommand
+       calls to subcommands and properly handle errbacks and callbacks
+       from subcommands.  Subcommands are now required to return proper
+       Failure objects via twisted.  You can also poke a value into the
+       "testfail" state in order to generate failures in commands.  Also
+       the BaseCommand class now has a getTest() method that will return
+       the testfail value.
+
+       * whoisi/model.py (Site): Add 'created' column so we can know when
+       a site was added.
+       
+2008-02-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * tests/twisted/local/test_newsite.py: First unit test for the
+       entire NewSiteManager code.  Many more to follow.
+
+       * tests/twisted/network/test_download.py: Unit tests for the
+       download code.  Test some various connection failures, DNS lookup
+       failures and a commented out connection timed out error.  Really
+       just testing the framework more than the code itself.
+
+       * start-test-db.py: Startup script for creating a new, clean test
+       database and starting it up.  Need this for automated tests.
+
+       * start-test-whoisi.sh: Startup script for starting a test
+       instance of the web server.  Used for running autmated tests.
+
+       * services/command/newsite.py: Minor changes - move the deferred
+       creation to __init__ and comment out some debug spew.
+
+       * services/command/base.py (BaseCommand.doCommand): Change the
+       signature to require a state that's passed in.  Everyone uses it,
+       might as well change the base signature.
+
+       * services/command/download.py: Change the download command to be
+       a little more like the other commands.  If there's an error in
+       doCommand() directly try to catch it and still pass it as part of
+       an errback so we don't need any special handling.  Use callbacks
+       that are member methods instead of global functions so we can use
+       the right errback as well.
+
+       * services/command/database.py: Add an arg to the
+       DatabaseCommandManager startup code that takes the connection
+       parameters to be used.
+
+       * controller-service: Add code to pass in the connection type to
+       the DatabaseCommandManager - needed to connect to test databases.
+
+2008-02-16  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * controller-service: Moved code to controller.py.
+
+       * services/command/controller.py: Move code from
+       controller-service to this file so that we can build tests aroudn
+       it.  controller-service is a very simple chunk of code now.
+
+       * services/command/download.py (DownloadCommand.doCommand): If
+       someone doesn't pass in the url as an argument then try and get it
+       from the state instead.  Need to fix this later and have one way
+       to do it.
+
+       * services/master/newsite.py (NewSite.normalize): Add code to
+       recognize a linkedin.com/pub/ style public url.
+
+       * whoisi/utils/flickr.py: Make sure to escape the title for an
+       image - it can contain html and make the xhtml stuff in tg freak
+       out.
+
+       * tests/nose/test_newsite.py (TestNewSite.test_linkedin): Add a
+       test for the linkedin.com/pub/* style url.
+
+2008-02-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * feed-parse-service (FeedParseProtocol.runCommand): Make sure to
+       set an empty "display_cache" item on an entry item.
+
+       * whoisi/widgets/templates/person.kid: Add support for picasa.
+
+       * whoisi/widgets/templates/picasa.kid: Template for picasa
+       widgets.  Pretty similar to the flickr widget.
+
+       * whoisi/widgets/templates/flickr.kid: Render 10 images (two rows)
+       instead of one.
+
+       * whoisi/widgets/widgets.py: New code to support the picasa
+       widgets.
+
+       * whoisi/utils/sites.py (site_value): Add picasa after flickr in
+       the order of sites as they are rendered.
+
+       * whoisi/utils/picasa.py (picasa_get_summary): Not sure we need
+       this.  Return a summary or "" if there isn't one.
+
+       * whoisi/controllers.py: Render the picasa widget when someone has
+       it in their feed.
+
+       * tests/nose/test_newsite.py: Code to test linkedin and picasa new
+       sites.  Make sure that picasa urls are parsed into usernames
+       properly.
+
+       * picasa-poll-service: New service that will get the picasa data
+       feeds and parse out the updates.  Largely a wrapper for code in
+       the picasa command.
+
+       * services/command/picasa.py: New picasa commands and support.
+       Parse usernames into urls.  Code that will also parse the output
+       from a google data feed and output it into the same format that
+       the feedupdate command will suck into the database.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.doCommand):
+       
+       Add a new state "feed_parsed_filename" that lets us pass in a
+       filename instead of just passing it in as an arg to the command.
+       Fix issues where an update will remove cached display_info that's
+       stored in the row.
+
+       * services/master/database.py: New code to support picasa.
+
+       * services/master/picasa.py: New class to handle picasa refreshes.
+
+       * services/master/newsite.py: Remove bogus docs.  Delete code that
+       called startPoll() - that code hasn't existed for a while.  Add
+       support for picasa.
+
+       * controller-service: Support for picasa feeds.
+
+2008-02-06  Christopher Blizzard <blizzard@0xdeadbeef.com>
+
+       * Add linkedin widget for display.
+
+       * Add support to all the services to refresh linkedin
+       pages (different than feeds.)
+
+2008-02-05  Christopher Blizzard <blizzard@0xdeadbeef.com>
+
+       * Add support for twitter to the feed and display code.
+
+       * Start adding code to support linkedin.
+
+       * Start adding some simple test cases to start testing the
+       linkedin code which is not done yet.  This is a huge chunk of
+       code.  Most everything is in tests/ - need to support nose-style
+       and trial-style tests.
+
+2008-01-25  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.feedLoadDone): Fix
+       bug where the link that's included in a feed isn't used to update
+       the URL in the database.
+
+       * whoisi/widgets/templates/weblog.kid: Move the widget into its
+       own div so we can put properties on the link-collection-item div.
+       We'll use this at some point in the future once weblog items
+       contain images.
+
+       * whoisi/widgets/templates/flickr.kid: Put the widget in its own
+       <div> that is removed so we can put properties on the
+       link-collection-item div.  Use the new thumbnails helper function
+       to fill in known thumbnails.
+
+       * whoisi/utils/flickr.py (flickr_fill_thumbnails): Helper function
+       for the flickr widget that's used to figure out which thumbnails
+       are found and which ones aren't.
+
+       * whoisi/source/flickr-blank-75x75.svg: Source image for the
+       "blank" flickr image that's used when we don't have the thumbnail
+       location yet.
+
+       * whoisi/controllers.py (Root.rendersite): Add new method that
+       takes a site and finds the right widget to render it.
+       (Root.siteaddstatus): Use the rendersite method to pick the right
+       kind of render widget so we can support more than just weblogs.
+       (Root.siterefresh): Super-simple method that just returns a
+       link-collection-item based on the id.  Used by the JS refresh code.
+
+       * whoisi/static/css/style.css: Fix long-standing typo
+       "margin-botton" -> "margin-bottom."
+
+       * whoisi/static/javascript/person.js: Add code so that we can use
+       console.log in production code.  When we're adding a new site and
+       we've finished loading look to see if we need to refresh it after
+       the fact for images, etc.  Needed for the new flickr code and will
+       eventually be needed when we add images to the blog entries.  New
+       prototype class "RefreshSite" that handles updating one
+       link-collection-item on a person page.  Automatically add it
+       during page load to any l-c-i that contains needs-refresh="True"
+       on the item.  Also needs the site-id set as a property on the
+       l-c-i.
+
+2008-01-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/newsite.py (NewSiteTryURL.feedLoadDone): Update
+       the site url from the feed link.  This was a bug that I found when
+       reading the code.
+       (NewSiteTryURL.createSite): When creating the site make sure that
+       we get the feed type.  Default is "feed".
+       (NewSiteTryURL.getFeedType): New method that looks at the urls
+       that are being passed in and returns a feed type.
+
+       * services/command/flickr.py: New file that has code that caches
+       thumbnail urls for flickr images.
+
+       * services/master/database.py (DatabaseManager): New support for
+       polling flickr images to get thumbnail urls.
+
+       * services/master/newsite.py (NewSite.normalize): Dead code that
+       identifies flickr - I'm leaving here for historical purposes.
+
+       * LEGAL.txt (flickr): Notes about flickr terms of service and API
+       key.
+
+       * whoisi/widgets/templates/flickr.kid: Add first pass flickr
+       widget.  It needs a lot of work but it will display a flickr
+       photos in the database.
+
+       * whoisi/widgets/widgets.py (SiteFlickrWidget): Support for the
+       flickr widget.
+
+       * whoisi/widgets/templates/person.kid: Support for the new flickr
+       widget.  Also pull out the flickr demo html.
+
+       * whoisi/model.py (SiteHistory): Add a "display_cache" member that
+       caches information that we need for display.  For example,
+       thumbnail urls for images from flickr.
+
+       * services/controller-service: Changes to support flickr.
+
+       * services/controller-service (FlickrCacheManager): New method
+       that knows how to go out and find the thumbnail url for a new
+       image.
+
+       * services/controller-service (LocalControllerProtocol.doCommand):
+       New "flickr-cache" command that gets the flickr thumbnail url for
+       an image id.
+
+       * services/master/flickr.py (FlickrCache.startProcess): New class
+       that will start a flickr cache action.
+
+       * services/command/xmlnode.py: Convenience class that comes from
+       the flickrapi code to convert flickr responses into a walkable
+       tree.
+
+2008-01-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/feedparse.py (FeedRefreshSetup): New class that
+       sets up the state for a feed refresh from a command and the
+       database.
+       (FeedUpdateDatabaseCommand.doCommand): Save the state when
+       refreshing a feed.
+       (FeedUpdateDatabaseCommand.start): Add option to the state to
+       force an update and make sure that we return after kicking off
+       updating all entries.
+       (FeedUpdateDatabaseCommand.getBestContent): Add method to get the
+       best content off of a content node.  Right now it just looks for a
+       text/html entry, which is totally bogus.  Needs work.
+       (FeedUpdateDatabaseCommand.insertEntry): Make sure to add the
+       content to a database entry.
+       (FeedUpdateDatabaseCommand.updateEntry): Make sure to add the
+       content to a database entry.
+
+       * services/master/database.py (DatabaseManager.getRefreshSites):
+       Just get the refresh information from the refresh table directly.
+       (DatabaseManager.startRefresh): Don't include the feed in the
+       argument list (it comes from the database now.)
+
+       * services/master/feedrefresh.py (FeedRefresh.startProcess): Don't
+       include the feed in the command (we get it from the database.)
+
+       * services/controller-service (RefreshManager.__init__): Fixup to
+       support refreshing feeds with new setup, download and parse
+       commands.
+       
+       * whoisi/model.py (SiteHistory.getText): Helper function that
+       returns the content or the summary, whichever one is available.
+
+       * whoisi/static/css/style.css: Add div.weblog-summary to the list
+       of block elements that are indented.  It just wasn't visually
+       clear enough if there wasn't an indentation on the summary for a
+       weblog entry.
+
+       * whoisi/widgets/templates/weblog.kid: Changes to support summary
+       code.  Assumes that the summary will always generate valid XML!
+
+       * whoisi/summary.py: New class that does a quick summary of a
+       weblog.  Tries to limit the size of it to under 150 words or so
+       and will tell you when there's undisplayed data.  Very simple and
+       needs work.
+
+2008-01-16  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * README.txt: Add a note about setting up the my.cnf file properly
+       before creating tables.
+
+2008-01-15  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * dev.cfg: Add ?charset=utf8 to the dburi so that we connect in
+       utf8 mode and things come back in unicode.
+
+       * services/command/download.py (DownloadCommand.doCommand): Force
+       the URL to be in ascii, not unicode.  The URL downloader can't
+       cope.
+
+       * services/command/database.py (DatabaseCommandManager.start):
+       Make sure to connect to the database with utf8 as the charset.
+
+       * services/command/feedparse.py (FeedDownloadCommand.doCommand):
+       Force the url to be in ascii, not unicode.  The URL downloader
+       can't cope.
+
+       * whoisi/model.py: Comment out the wrapper functions that we're
+       not going to use since the data in the database is in utf-8 and is
+       trusted.
+       (Site.getOrderedHistory): New function that will
+       return a set of site entries in reverse order.
+       (SiteHistory.getAge): New function that will return the age for an
+       entry in a friendly text format.
+       (SiteHistory.getLastTouched): New function that will return the
+       last time an entry was touched based on the updated + published
+       entries.
+
+       * whoisi/static/images/sites/feed-icon-16x16.png: New icon for a
+       feed.
+
+       * whoisi/widgets/templates/person.kid: Remove mockup text.  Only
+       use the weblog widget if the type for a site is a feed.
+
+       * whoisi/widgets/templates/weblog.kid: Set up to be able to show
+       entries without a title or without content.  Use the feed icon
+       instead of the blogger.com icon.  Remove mockup static text.  Use
+       the new site.getOrderedHistory() to get the history for a site.
+       Also display the age in a friendly manner.
+
+2008-01-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/person.kid: Move everything over to the
+       dedicated .js file in static/javascript/person.js.  Fix typeo on
+       target URL: should have been "/search", not "search".  Add
+       new_sites object to the person widget during rendering.
+
+       * whoisi/templates/search.kid: Use pretty_search instead of just
+       the search name.  Use a widget instead of a hand-coded search
+       form.
+
+       * whoisi/controllers.py (Root.search): Use the pretty names when
+       displaying the results of a search.
+       (Root.personadd): New method that adds a new person to the
+       database and then rediects them to the new url.
+       (Root.person): New code that pulls new sites out of the database
+       and displays them along with the rest of the normal sites that a
+       person has listed.  Note that the ordering is important because in
+       progress sites can be added to the site list in between queries.
+       (Root.siteaddpost): Add a "status" flag to the return json
+       objects.  It's used by the JS to figure out what the next step is.
+       Also let the JS in the page handle the hiding/showing of the add
+       link.
+
+       * whoisi/search.py (SearchService.prettifyName): New method that
+       will take a search name and make a "pretty" version of it.  For
+       example, "chris blizzard" becomes "Chris Blizzard".  We use it for
+       display and when we jam things in the database for the first time.
+       Try and make things look nice.
+
+       * whoisi/model.py (NewSite): Add some dead code here.  It's a
+       decent example of how to do some complex stuff so I'm leaving it
+       in even though I never ended up using it.
+
+       * whoisi/widgets/templates/siteaddpickfeed.kid: Change the classes
+       here to use url-pick and url-pick-list so that we don't step on
+       the item collections used for a lot of transversal in the JS for
+       the page.
+
+       * whoisi/widgets/templates/person.kid: Support for rendering
+       in-progress loads (because this is the page you land on when you
+       add a person for the first time!)  Also add explicit support for
+       sites that are in the "pick_url" state and display that widget
+       instead of just the loading status widget.
+
+       * whoisi/widgets/widgets.py (PersonWidget): Require the person.js
+       static file.  Also pass in the new_sites into the widget so we can
+       render in-progress stuff.
+       (SiteWeblogWidget): Don't pass in a Widget object here.
+       (PersonAddWidget): New widget that lets you add a person based on
+       a site and name.
+
+       * whoisi/widgets/templates/personadd.kid: New widget for
+       displaying a form to add a person based on a URL.
+       
+       * whoisi/static/javascript/person.js: New file that contains all
+       the JS for the person widget.  Uses class-like things, or at least
+       as well as JS supports such things.  Lots of stuff taken from the
+       person.kid file but object-i-fied.
+
+       * whoisi/static/css/style.css: add div.url-pick-list and
+       div.url-pick classes so that link-collection-item classes can be
+       identified from JS and jQuery.
+
+2008-01-02  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/states.txt: New file that talks about the states
+       that are managed by the master process.  Only one in here so far
+       is the new_site table.
+
+2007-12-31  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/feed-parse-service (FeedParseProtocol.runCommand): Add
+       the feed's "link" field to the list of things that we catch.  This
+       is often the high level URL that's associated with a site and the
+       one that we will display to users.
+
+       * services/controller-service: Big change.  Move most of the new
+       site handling down into the controller service.  The master now
+       just tells the controller about a new site and it does most of the
+       hard work.
+       (NewSiteManager): New class that handles the new site handling.
+       (LocalControllerProtocol.doCommand): Support "new-site" as a
+       command.
+
+       * services/command/newsite.py (NewSiteSetup): New command and new
+       file to support getting the new site out of the database and
+       processing it.  Much improved vs. the master method since it does
+       most of its processing in a single class and can make
+       decisions.  (No round trips and changes that have to go back to
+       the master process.)
+
+       * services/command/newsite.txt: Text file that describes some
+       strategies for discovering feeds on text.  We don't do a lot of
+       these thing yet, but do do some of them.
+       * services/command/base.py (CommandManager.subSuccess): Important
+       change here.  When a subcommand returns a string in its state
+       called "error" stop processing.  Eventually we'll add an error
+       handler for each command (not done yet) which we'll call when we
+       get an error.  This is important because it means that in general
+       errors should be reported here and that errbacks should only be
+       used when there's an internal error of some kind.  Uncaught
+       exception, etc.
+
+       In addition we support a "stop" flag set in the state which will
+       just stop processing without an error.  The only code that uses
+       this so far is the new-site command can stop processing because it
+       needs the user to pick a url from a list of feeds that are
+       available.
+
+       * services/command/feedparse.py (FeedDownloadCommand): New command
+       that will download a url.  Same as the download service except it
+       will look in the state for a key that points to a filename for an
+       already downloaded file.  This happens during new-site when we
+       download a url and it's detected as a feed.  We don't want to
+       re-download it.
+
+       * services/command/feedparse.py (FeedParseCommand): Change this
+       command to look in the state for "try_url_parsed_feed_filename".
+       Same as in the download command we might have already parsed this
+       download and we should just use that instead of re-parsing.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.doCommand):
+       Look in the state for "site_id" instead of just "id"
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.start):
+       Pull out the url as well as the last update when checking to see
+       if the feed needs to be updated.  We'll need this later to update
+       the "url" field in the site description.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.gotUpdate):
+       Support changes for updating the URL.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand.updateSite): 
+       Update the url field in the site object with the "link" object
+       from the feed, if it happens to be set.
+
+       * services/html-feed-scrape-service: Change the way that we search
+       for links in HTML in an attempt to make it easier to detect an RSS
+       url vs. an HTML URL that's passed in when adding a site.  Now we
+       return a flag called "looks_like_html" that is set when someone
+       passes us an html page that includes the <html> and <head> tags.
+       Also handle a parse exception which is what we get when we try to
+       parse an RSS feed.  Generally if looks_like_html is False and
+       feed_url is also empty then it's not HTML.
+
+       * services/master/newsite.py: Rip out most of the new site
+       handling and put it into the controller service.  The only thing
+       that this does now is poll for new site requests and pass them off
+       to one of the controllers.
+
+       * services/master/database.py: Clean up the naming of some of the
+       variables.  Also poll for sites in the new_site table that have
+       "url_picked" in them and hand them off to the new-site handler
+       since it knows how to handle those.  The url_picked state on the
+       new_site handler is what is put in there when a user picks a feed
+       from a list.
+
+2007-12-29  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/feedrefresh.py (FeedRefresh.startProcess): New
+       file and class that turns requests in the database into work for
+       the master process and dispatches them.
+
+       * services/command/setup.py (FileToStateCommand.doCommand): Only
+       close the file if we opened it.
+
+       * services/command/feedparse.py (FeedUpdateDatabaseCommand): New
+       class that handles updating entries in the database.  This thing
+       is brute fucking force right now.  Just looks at what we currently
+       have in the database (yay, select *) and updates everything it can
+       or inserts new entries.  If we get a feed that's missing ids (they
+       apparently exist) this code will crap itself.  Needs a lot of work.
+
+       * services/master/database.py (DatabaseManager.__init__): Add
+       members to track when we're doing a refresh.
+       (DatabaseManager.getNewWork): Query the database for new sites to
+       refresh when getting new work.
+       (DatabaseManager.getRefreshSites): Call to query the database for
+       refreshing sites.
+       (DatabaseManager.gotRefresh): Same.
+       (DatabaseManager.startRefresh): Same.
+
+       * services/master/newsite.py (NewSite.linkResult): Add explicit
+       values to the last_update and last_poll params when we're
+       inserting a new site into the database.
+
+       * services/feed-parse-service (FeedParseProtocol.parsedTimeToSeconds):
+       Change this to return a simple array of 6 values that we care
+       about.  Used to return seconds since the epoch but this handles a
+       wider range and is easier to work with.  Confirmed that this is in
+       UTC.
+
+       * services/controller-service (RefreshManager.__init__): Call
+       FeedUpdateDatabaseCommand() once we're done downloading a feed.
+       Yay!
+       (LocalControllerProtocol.doCommand): Add the database manager to
+       the call to the refresh manager.
+
+       * whoisi/templates/person.kid (pick_site): Fix typo.
+
+       * whoisi/model.py (SiteHistory): Add note about having to add a
+       hash for a site to make things faster.
+       (SiteRefresh): New class that is a database table to trigger
+       refreshes.
+
+2007-12-28  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/person.kid: Tons of javascript changes here.
+       Fix all the variables to be local instead of defaulting to
+       global.  (Bleh.  Too much python.)  Change setup_add_handlers() to
+       just setup_handlers because we do more than just set up the add
+       handlers in here.  Add support for the timeout on the status
+       widget or if the user clicks on the link to update it sooner.
+       Also add a handler for the picker.  Factor out some of the calls
+       from clicks into their own functions (status_update() and
+       pick_site()).
+
+       * whoisi/controllers.py (Root.siteaddform): siteaddform replaces
+       siteaddpre.  This is the form that's loaded when someone wants to
+       add a new site to a person.
+       (Root.siteaddpost): Create a NewSite object to request that a new
+       site be added to the database.  Also return the new status widget.
+       (Root.siteaddstatus): New method to handle queries from a page to
+       get status on a new feed that was just added.  This will either
+       return the new entry + another "Add Another Site" or it will ask
+       the user to pick a feed if there are more than one of them.
+       (Root.siteaddpick): New method to handle the result of someone
+       picking from more than one feed.  It updates the status for the
+       site request and cycles back to the status widget.
+
+       * whoisi/widgets/templates/siteaddpickfeed.kid: New widget
+       template that asks you to pick from multiple feeds for a site.
+
+       * whoisi/widgets/templates/siteaddstatus.kid: New widget template
+       that shows status as a new site is loaded.
+
+       * whoisi/widgets/widgets.py (SiteAddStatusWidget): New widget for
+       a "Loading..." indicator after someone adds a new site.
+       (SiteAddPickFeedWidget): Widget that lets you pick from multiple
+       feeds when more than one is offered.
+
+2007-12-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/master/database.py (DatabaseManager.runInteraction):
+       New method that takes a callable and a query to run in twisted's
+       interaction code for databases.  Function will be called from the
+       db's thread and it's there to do advanced stuff with the cursor
+       object.
+       (DatabaseManager.getNewSites): Add the person_id to the list of
+       things we want back from the new_site object.  We'll need it to
+       make the NewSite object later and attach the new site to the
+       person.
+       (DatabaseManager.gotNewSites): Same with the new person attribute.
+       (DatabaseManager.newSite): Same with the new person attribute
+
+       * services/master/newsite.py (NewSite.__init__): Add title, person
+       and site_id to the things we track.
+       (NewSite.startProcess): We don't need to pass in a data= argument.
+       That shouldn't have been checked in.
+       (NewSite.startProcess): Track the person that was passed into the
+       new site object.
+       (NewSite.linkResult): When we get a link we have to make sure
+       there's only one listed.  If there's more than one listed ask the
+       user to pick it (code not done yet, but it does update the
+       database.)  If we have one feed then create a new Site object and
+       attach the person to it.  We wait for notification that the site
+       object has been created.
+       (NewSite.newSiteInteraction): New callback for the database.  We
+       need this so that we can pick out the cursor's .lastrowid property
+       that mysql hands back.  We then update the new_site entry in the
+       database with that id when we update it with the feed refresh.
+       (NewSite.needPick): New function that updates the database with an
+       indication that the user needs to pick which feed to use.
+       Callback code isn't done yet to support all of this yet.
+       (NewSite.pickResult): Callback to let us know that the database is
+       updated with the need for picking.  This is where new code will
+       go.
+       (NewSite.pickResultFailed): Failed callback for updating the
+       new_site object.
+       (NewSite.siteSetupDone): Callback for when the site object has
+       been created.  Once we have that, and its ID, we kick off the feed
+       refresh command.
+       (NewSite.siteSetupFailed): Failure callback for a new site being
+       setup.
+       (NewSite.refreshDone): Callback to let us know that a feed refresh
+       has taken place.  Also updates the new_site table with a "done"
+       status and adds the site_id so that the new site can be shown to
+       the user.
+       (NewSite.refreshFailed): Failure callback for a refresh.  This
+       will need a bunch of code.
+       (NewSite.doneFinished): Final callback for finishing a site update
+       with state and site id information.
+       (NewSite.doneFailed): Failure callback for final update.
+
+       * services/controller-service (RefreshManager.__init__): Pass in
+       the service manager when initializing the refresh object and pass
+       it down to the feed parser.  It runs as a different program.
+       (LocalControllerProtocol.doCommand): Pass in the service manager
+       to the RefreshManager object.
+
+       * whoisi/model.py (NewSite): Add a ForeignKey('Person') to the new
+       site object.  We need to know which person a new site has to be
+       attached to.  Also add a ForeignKey('Site') that we add
+       later (default=None) once we know that the site has a feed.
+
+2007-12-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * services/command/download.py (DownloadCommand.__init__): Set a
+       name so that debugging code will work well.
+       (DownloadCommand.doCommand): Check that an incoming URL is
+       actually a url using formencode.  Also handle the state var as the
+       first argument to the command.
+
+       * services/command/database.py (DatabaseCommandManager): New code
+       that will run queries on behalf of commands.
+
+       * services/command/htmlscrape.py (ScrapeLinkCommand.__init__)
+       Support the new debugging name.
+       (ScrapeLinkCommand.doCommand): Remove some debugging output.
+       (StateFeedToDatabaseCommand): New class that takes information out
+       of the state about feeds that were found and puts them into the
+       database (the new_site table.)
+
+       * services/command/setup.py: A couple of simple setup commands.
+       IDURLSetupCommand takes an id and url as input and puts them into
+       the state so we can use them much later.  FileToStateCommand takes
+       json info out of a file and stuffs it directly into the state.
+       Useful for external commands that only drop a file and makes it
+       easy to pass that info back to another command.
+
+       * services/command/service.py: Lots of random debug info changes.
+       (ParseProcess.errReceived): Getting data on stderr isn't a fatal
+       error, but re-print it.
+
+       * services/command/feedparse.py: This is the protocol side of the
+       feed parser service.  Just proxies to an external process using
+       the ServiceManager.
+
+       * services/command/base.py (CommandManager.__init__): Add a new
+       "state" variable that we can use across commands.
+       (CommandManager.processCommands): Output interesting debug data as
+       we process commands.  Catch + report exceptions that are sent back
+       when we try and run various commands.  Re-raise them when we do so
+       that the protocol handler can report an error.  Also add the state
+       variable to the calls to the various subcommands.  They can use
+       state to pass information from one command to the next, or across
+       commands.
+       (CommandManager.subSuccess): Add code that outputs success
+       information and return values.
+       (BaseCommand): In __init__() always set a name to support the
+       debug code.
+       (BaseCommand.doCommand): Add the state variable to any calls to
+       doCommand()
+
+       * services/html-feed-scrape-service (ScrapeParser.handle_starttag):
+       Fix the tag handling so that we can keep track of generator and
+       pingback tags. We'll want those at some point for stat gathering.
+       (ScrapeProtocol.runCommand): Output the feed_url, pingback +
+       generator fields.
+       (ScrapeProtocol.runCommand): Make sure to return if someone sends
+       a bad command.
+
+       * services/master/database.py: New service that will connect to a
+       database and runs queries on behalf of other classes in a twisted
+       way (i.e. with deferreds.)  It's also where new work is generated
+       from the database.  i.e. it polls the new_site table looking for
+       jobs that need to be run.  At some point it should also handle
+       disconnects and then reconnect when things go bad.  I need to
+       figure out error chaining to make that possible so it doesn't do
+       it right now.
+
+       * services/master/newsite.py: Newsite is a class that's created
+       whenever someone adds a new site to a person on the web site.
+       Code in the database service in the master will create one of
+       these new objects if it runs into a new site in the database.
+       This class keeps track of the various jobs that need to be
+       executed in order to turn a simple URL into a site that we can
+       display on the site.  This is somewhat fragile right now as if we
+       restart the master service stops running or a controller service
+       dies it's likely we're going to lose this object and it won't be
+       finished.  Probably need to have something that looks for old
+       stuff during startup and re-creates all these objects to drive
+       state.  Or just resets everything and assumes it is new.  Much
+       refactoring to come to this, I think.
+
+       * services/master/worker.py: Most of the code that master drives
+       from the master-service.  Includes classes for each worker that we
+       connect to (each controller-service, really), includes code that
+       will try and farm out jobs to them and handle errors.  Also
+       includes a class for each command that's executed.
+
+       * services/feed-parse-service: First pass at a feed parser.  Takes
+       a file that's been downloaded as input, uses python-feedparser to
+       parse it and then dumps the interesting stuff to a file that's a
+       big json structure.  Seems to work reasonably well.  Doesn't do
+       any updating of the database or anything like that.  A different
+       command will do that.
+
+       * services/master-service: New service that listens for things to
+       do and then farms them out to controllers.  Will check for new
+       work every few seconds, reconnect to controllers when they go away
+       and try to keep track of who is handling what jobs.  Should also
+       try and restart jobs that fail on a particular controller and
+       resubmit them later.  Needs lots of love.
+
+       * services/protocols.txt: Updated to reflect very simple master <>
+       controller protocol.
+
+       * services/controller-service: Lots of work here.  Include support
+       for lots of new commands - html scraping, feed parsing, some
+       simple setup commands and database requests.
+       (ProtoManager.start): Add code that supports the uuid, command and
+       arguments to the start code for the base class for managers that
+       also report through the protocol handler to the master.  This is
+       used by classes that derive from ProtoManager and will report that
+       a job has started.
+       (ProtoManager.succeeded): Report command + uuid instead of just a
+       url.
+       (ProtoManager.failed): Report command + uuid instead of just a
+       url.
+       (LinkScrapeManager.__init__): The link scrape manager now has a
+       bunch of different subcommands that together will scrape the links
+       out of a particular URL.
+       (RefreshManager.__init__): Start of a feed refresh.  Just does set
+       up + download right now.
+       (LocalControllerProtocol.doCommand): When starting up the
+       LinkScrapeManager pass in the service manager (to manage
+       processes) and the DatabaseCommandManager (to manage database
+       requests that might be done.)
+       (LocalControllerProtocol.doCommand): Pass in the command + uuid to
+       the RefreshManager to start it.  Code not done yet at all.
+       (ControllerServerFactory.__init__): During init fire up the
+       database manager to establish connections to the database.
+
+       * services/protocol/controller.py (ControllerProtocol.lineReceived):
+       Add a "shutdown" command to the protocol to let someone shut down
+       the controller completely.  "quit" now closes that connection but
+       leaves the controller running.  Protocol now comes in the form of
+       <command> <uuid> <args> so we need code to track that.  Arguments
+       don't have to include a url so we don't validate against it.  We
+       pass all that down to the doCommand() method that keeps track of
+       everything as it is running.  Also try and put in some error
+       handling that reports errors we can follow up on.
+
+       * whoisi/model.py (Site): Remove bogus comment.
+       (SiteHistory): Add some more members to try and get a decent DB
+       model for a site's entries.  Includes title, link, entry_id (id
+       field in many feeds), the published and updated dates, summary and
+       content.  Also make sure that summary and content both go through
+       accessors that encode as base64.
+       (NewSite): Add the site_id to associate a new site request with a
+       site lazily (we don't have a foreign key relationship anymore.
+       Also add the url, the status (really the state), data that might
+       be included as part of the request processing and any errors
+       generated.  Nothing uses the error field yet because we don't have
+       good error processing.
+
+2007-12-17  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (NewSite): Remove the owner.  That will be
+       managed by an external process, not in the database with the new
+       services.
+
+       * services/ First pass at services.  Writen in twisted, lots of
+       new classes and a couple of commands.  Still very much a work in
+       progress.
+
+2007-12-09  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/person.kid (site_add_post_submit): Jump up to
+       the div that includes the link-collection-item class and use that
+       as the base to find the input elements instead of just walking up
+       a random number of parents.  Should work better into the future.
+       (site_add_pre_loaded): Same.  Also when rendering the person
+       widget don't pass in all the various widgets needed to render the
+       person.  The person widget will pull in as needed.  (See below.)
+
+       * whoisi/controllers.py (Root.person): Don't pass in all of the
+       different widgets needed to render a person.  The person widget
+       pulls them in by itself.
+
+       * whoisi/model.py (Site): Add a lastPoll member that indicates the
+       last time that we actually polled a site.
+       (NewSite): Add NewSite model item that kicks off an initial poll
+       of a web site once it's added to the database.  I suspect that
+       this will go away or change quite a bit.
+
+       * whoisi/static/css/style.css: Add a div.weblog-entry item that
+       has the same layout as the link-collection but lets us use classes
+       from jQuery later.
+       
+       * whoisi/widgets/templates/person.kid: Add the widgets directly to
+       an import statement for the person widget.  We shouldn't have to
+       tell code that calls this widget that they need to pass in a pile
+       of widgets since those other widgets all render parts of the
+       person object anyway.  Also change the link-collection-item div so
+       that it's rendered by the widget, not in the html in the template.
+
+       * whoisi/widgets/templates/search.kid: Use ${search or ''} form to
+       make sure that the value attribute ends up on the input element,
+       even if it's empty.
+
+       * whoisi/widgets/templates/weblog.kid: Weblog stub that will
+       eventually be the stub for a weblog entry.
+
+       * whoisi/widgets/templates/siteaddlink.kid: Change the site add
+       link to include the entire link-collection-item div instead of
+       just the stuff inside the div.  Easier to handle updates this way
+       in the long run.
+
+       * whoisi/widgets/templates/siteadd.kid: link-collection-item
+       change.
+
+       * whoisi/widgets/widgets.py (SiteWeblogWidget): New stub widget
+       that will be the template for web log entries.
+
+2007-12-05  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (Site): Add 'type' column that is the type of
+       site it is (flickr, feed, linkedin) as opposed to the type of
+       feed (atom, rss, scrape.)
+
+       * whoisi/templates/person.kid: Pass in the site_add_link when
+       displaying the person widget when it's passed from the controller.
+       (site_add_post_loaded): Callback for when the URL is submitted to
+       server for addition to a person.
+       (site_add_post_submit): Callback for when we want to submit the
+       URL to the server.
+       (site_add_pre_loaded): Callback for when the URL add form is
+       loaded.
+       (setup_add_handlers): Called when the document is first loaded and
+       also after a URL is submitted to the servers.  Sets up callbacks
+       on all the "add site" links.
+       ($(document).ready): Don't set up handlers directly, use
+       setup_add_handlers() helper.
+
+       * whoisi/controllers.py (Root.person): Add the
+       site_add_link_widget to the list of widgets that we pass into the
+       person template.
+       (Root.siteaddpre): Move to a JSON method instead of a method that
+       just returns a fragment of HTML.
+       (Root.siteaddpost): Set up as a JSON method that returns the HTML
+       fragment for the link for now and will eventually add a link to a
+       person.  UI flow kind of works now, though.
+
+       * whoisi/templates/person.kid: Change the icons to point to
+       /static locations so they start showing up.
+
+       * whoisi/widgets/templates/siteadd.kid: Update the widget to use a
+       button type, space properly, add some text and pass along the url
+       and the person who is being added.
+
+       * whoisi/widgets/templates/siteaddlink.kid: New widget template.
+
+       * whoisi/widgets/widgets.py (SiteAddLinkWidget): New widget that
+       returns a "add new site" widget.  We'll be using it in both the
+       form and returned as part of an ajax query.
+
+2007-11-26  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/index.kid: Use the search widget, not hand
+       coded HTML.
+
+       * whoisi/templates/person.kid: Template to display a single
+       person.  Playing with some ajax site adding code but not done yet.
+       Uses the person widget to display a person.
+
+       * whoisi/templates/search.kid: Change search form to add a new
+       site if no matches are found.  (Just a stub for now.)
+
+       * whoisi/templates/master.kid: Change initial focus to find the
+       first form on a page.
+
+       * whoisi/controllers.py (Root.index): Change the index page to use
+       the search widget instead of a hard coded HTML form.
+       (Root.search): Change the search to use the search widget.
+       (Root.person): Simple method to get a person and hand it off to
+       the right widget + form.
+       (Root.siteaddpre): Method to get HTML to add a site in a form.
+       Needed so that we can eventually add some anti-spam measures here.
+       (Root.siteaddpost): Method that will add a site.  Stub for now.
+
+       * whoisi/search.py (SearchService.peopleByName): Change the search
+       service so it returns ordered results - first by exact match and
+       then other possible matches.
+
+       * whoisi/static/javascript/jquery.js: Add jquery 1.2.1 for use in
+       pages.
+
+       * whoisi/widgets/templates/person.kid: Widget that displays a
+       person.  Heavily in progress and will need a lot of changes.
+       Taken directly from the mockups for now.
+
+       * whoisi/widgets/templates/search.kid: Widget for the search box
+       that shows up on a lot of pages.
+
+       * whoisi/widgets/templates/siteadd.kid: Widget for adding a
+       site (in progress.)
+
+       * whoisi/widgets/widgets.py: Start adding widgets that we'll use
+       to build the site.
+
+2007-11-22  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/index.kid: Name the search fields.
+
+       * whoisi/templates/search.kid: Dump result names.  Not even
+       slightly done yet.
+
+       * whoisi/templates/master.kid: Javascript function to focus the
+       first form element.  Needs cleanup to add hooks for onload()
+       functions so that it's extensible.
+
+       * whoisi/controllers.py (Root): Start on the search method.
+       Redirects back to '/' if there's no query.  Calls into the search
+       service to get a set of results.
+
+       * whoisi/search.py: Add simple sql-driven search service until we
+       bring something more awesome online.
+
+2007-11-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py (Person): Move aliases -> other names.  Fix
+       sites to be a MultipleJoin, not a RelatedJoin since it's
+       one-to-many, not many-to-many.
+       (Name): Add a RelatedJoin back to Person so that we can go in
+       either direction.
+       (Site): Fix url to be notNone=true.  Add default=None to other
+       parts so they can be populated.
+
+       * whoisi/controllers.py (Root.search): New search method exposed
+       to the world.
+
+       * whoisi/templates/search.kid: New search result page.
+       
+       * whoisi/static/css/style.css: Whack this file completely and move
+       it to the new look.
+
+       * whoisi/controllers.py (Root.index): Move from the classic
+       welcome page to the search page.
+
+       * whoisi/templates/index.kid: New index file to start searching.
+       Very simple.
+
+2007-11-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/templates/master.kid: Rip out everything we don't need.
+
+2007-11-21  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * whoisi/model.py: Remove unused template from the model file.
+
+2007-11-16  Christopher Blizzard  <blizzard@0xdeadbeef.com>
+
+       * Add basic models and try and get the database initialized.
+
diff --git a/README.txt b/README.txt
new file mode 100644 (file)
index 0000000..bc11a82
--- /dev/null
@@ -0,0 +1,39 @@
+
+whoisi
+
+This is a TurboGears (http://www.turbogears.org) project. It can be
+started by running the start-whoisi.py script.
+
+database setup
+
+Include the following lines in your /etc/my.cfg before setting up the database:
+
+[client]
+default-character-set=utf8
+
+[mysqld]
+default-character-set=utf8
+
+Extra packages:
+gdata from source http://ideasuite.com/~blizzard/whoisi/tgz/
+sqlobject
+turbogears
+MySQL-python (at least version 1.2.2 to avoid problems with inserting unicode strings)
+twisted (2.5) - fight with the twisted.web program - installs to the wrong libdir on 64 bit systems?
+
+Create indexes by hand:
+
+create index site_feed_idx on site (feed(128));
+create index site_url_idx on site (url(128));
+create index name_name_idx on name (name(32));
+create index site_history_new on site_history (on_new);
+create index name_person_idx on name (person_id);
+
+create index follow_person_follower_id_person_id_idx on follow_person (person_id, follower_id);
+create index follow_person_follower_id_person_id_idx2 on follow_person (follower_id, person_id);
+
+create index site_current_idx on site (person_id, is_removed, id);
+create index site_history_current_idx on site_history (site_id, on_new, id);
+
+create index site_type_idx on site (type, is_removed);
+
diff --git a/TODO.txt b/TODO.txt
new file mode 100644 (file)
index 0000000..a7d656e
--- /dev/null
+++ b/TODO.txt
@@ -0,0 +1,174 @@
+REQUIRED FOR SOURCE RELEASE:
+
+1. Put license headers on all the files.
+2. Check for licenses for feedparser and move it to a feedparser + patch model.
+3. Check the mini-dom stuff - it's imported code - use BeautifulSoup instead?
+X. Move password and key stuff to configs.
+5. Set up trac.
+6. Write up a doc on what's required to run it.
+7. Write up a doc on how to add a new site.
+8. Write up a doc that describes the overall architecture.
+
+REQUIRED TO LAUNCH:
+
+X. Add "Follow" links to all the various types of sites
+X. Finish following
+X. Email follow info (so people can log back in.)
+X. CAPTCHAs
+X. Error reporting for new site adds
+X. Smarter about feeds (flickr, dropping comments and relative ATOM vs. RSS)
+X. Add "this is me" functionality
+X. Add pagination to search results page
+X. Search in aliases as well as names
+X. "Following" page should have sort by name and by date
+XX. Error page for person not found
+XX. Exception error handler for all pages
+XX. Error page for /follow if you aren't following anyone
+X.  Change /person to /p
+X.  Add tg.url() support for everything so we can jam apache in there
+X.  Fix display code so that the everyone page and the follow page
+don't make a billion queries.
+X.  Fix bug where you can't stop following someone
+X.  Make sure that cookie dates are updated when someone accesses the
+site.
+XX. Change person add procedure to it checks for dups and doesn't add
+a person before the site is loaded and also has a CAPTCHA.
+XX. First time you follow someone the friendslink doesn't show up.
+XX: Fix unicode characters not working in names when you add a person
+(http://www.flickr.com/photos/lesec/)
+XX: Fix new sites so that they have a flag for first entries added to
+the database - otherwise it floods the everyone and follow page
+XX: Fix ordering for inserts for entries so they are sorted by date
+and ditch the code in the front end to re-order entries by date per
+page.
+XX: Search page needs to not make a crapload of queries.
+XX: Add simple auditing.
+XX: Make delete a flag, don't actually delete
+XX: Fix ordering problem when adding site entries and get rid of
+re-ordering in follow + everyone page.
+XX: Fix the add-site overwhelmes the everybody page by adding a flag
+to the site_history to indiciate if it's first run or not.
+XX: If you click on one person's entry in more than one place on the
+/everyone page it will add them and then remove them.  Needs to not be
+a toggle.  This is a nasty race condition in the follow code.
+XX: Person 446 - unicode names for people don't work
+XX: Add a favicon and icons on the master template
+XX: Add a footer with an about page
+XX: Add robots.txt
+XX: Add one-click following
+XX: Add a super-simple API.
+XX: Switch to Mako.
+
+o Not fully-qualifying urls like http://whoisi.com/p/1114
+
+o Add the f=1 to a person page that lets you follow them easily
+
+o Add an addsite + url argument to a person so it's easy to add
+  a new site.
+
+o Button from John
+
+o Sitemap for google
+
+o Can't add sites that don't include a <link> in the <feed> section.
+Or at least the preview code fails.
+
+o Fix getPersonForURL() - removing the sitehistory from run_db_check
+will make it a lot less useful.
+
+o Add a hash to the add person stuff - too easy to break out of that
+with just an ID.
+
+o http://www.justanotherjen.com/ - looks like it gives back HTML
+unless you specify that you want rss content types?!
+
+o Look at steven garrity's entry about "Overheard at Canadian Tire" -
+has bad unicode rendering
+
+o email hashing thing for users who do "this is me"
+
+o Time-based picasa photos don't show up with the name in front of the
+picasa link
+
+o Paul Graham's website/weblog puts the <link> tag down in the body.  Fail.
+
+o add refresh coalessing to the master service for sites that are down
+
+o Also check the url field for "comment" when doing the feed detection
+- venky's blog contains bad names but does include info in the actual
+url
+
+o livejournal entries don't include a link to rss - have to strip off
+a specific entry url and just use the base hostname
+
+o Check paul frield's blog - it uses a port 8080 call and it fails
+silently?
+
+o Fix linkedin changes so they show up as part of a cluster and as
+part of the timeline
+
+o Figure out the right level of indentation (link-collection-item?)
+
+o Put names in front of all the results
+
+o Add options to the master and contoller process so they will connect
+to the test db (for bryan!)
+
+o Add error handling to the front end
+
+o Add aliases to the name search
+
+o Move the various parsing processes to use pb and set them up to be
+re-used instead of started up and shut down after each job
+
+o Add captchas to everything
+
+o This url won't render entries: http://hecker.org/ - no idea why not
+
+o Fix flickr so that it doesn't ask about the feed type - probably
+want to just convert over to using the api instead of using the feed.
+API provides everything we need, anyway and it goes through the key so
+it's a lot more reliable.
+
+o Change the way that we do an initial person add - we need to verify
+that a site is valid before we do the person creation - can we do that
+from the initial add page and then redirect once the site and person
+have been created?
+
+o Move to using feedparser to download a feed - gets charsets right
+
+o Add support for content types other than text/html ()
+
+o Add the "follow" functionality
+- UI
+- cookies
+
+o Work on the timeline display code
+
+o Figure out how errors propagate to the UI
+
+o Set up automation for refreshing feeds
+
+o Make sure we're using tg.url() everywhere we're building urls so we
+will get the proper urls when we go to deploy behind apache
+
+o Start using the rb stuff to inter-process communication for the
+workers - done for the master <-> controller communications.
+
+o Add logging to the worker processes
+
+o deleted example on flickr
+
+           id: 5325
+      site_id: 186
+        title: This on goes to @thinguy
+         link: http://www.flickr.com/photos/cote/2333863644/
+     entry_id: tag:flickr.com,2005:/photo/2333863644
+        added: 2008-03-14 20:49:03
+      touched: 2008-03-14 20:49:03
+    published: 2008-03-14 20:39:41
+      updated: 2008-03-14 20:39:41
+      summary: NULL
+      content: <p><a href="http://www.flickr.com/people/cote/">cote</a> posted a photo:</p>
+<p><a href="http://www.flickr.com/photos/cote/2333863644/" title="This on goes to @thinguy"><img src="http://farm4.static.flickr.com/3218/2333863644_f18b287bbf_m.jpg" width="240" height="192" alt="This on goes to @thinguy" /></a></p>
+display_cache: NULL
diff --git a/blacklist_rss.txt b/blacklist_rss.txt
new file mode 100644 (file)
index 0000000..863b337
--- /dev/null
@@ -0,0 +1,16 @@
+http://friendfeed.com/?auth=1G3a9hvyTN1ab2WB&format=atom
+
+http://meneame.net/comments_rss2.php
+
+http://blip.tv/rss
+
+http://www.istockphoto.com/istock_rss.php
+
+http://friendfeed.com/?auth=e6ylEzZmBmpCGfpn&format=atom
+
+http://del.icio.us/rss/
+- check for "rss by tag" from del.icio.us
+
+http://en.wikipedia.org/wiki/Special:RecentChanges
+http://en.wikipedia.org/w/index.php?title=Special:RecentChanges&feed=atom
+
diff --git a/controller-1.cfg b/controller-1.cfg
new file mode 100644 (file)
index 0000000..2cd9834
--- /dev/null
@@ -0,0 +1,17 @@
+[listen]
+port=11500
+
+[db]
+host=localhost
+#user=user
+#passwd=passwd
+db=whoisi
+port=3306
+
+# Fill in these sections with your keys
+#[flickr]
+#api_key=something
+
+#[twitter]
+#username=something
+#password=something
diff --git a/controller-service b/controller-service
new file mode 100755 (executable)
index 0000000..a383824
--- /dev/null
@@ -0,0 +1,138 @@
+#!/usr/bin/python
+
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from services.command.controller import NewSiteManager, RefreshManager, \
+    NewLinkedInManager, LinkedInRefreshManager, PreviewLinkedInManager, \
+    NewPicasaManager, PicasaRefreshManager, PicasaPreviewManager, \
+    FlickrCacheManager, PreviewSiteManager
+
+from twisted.internet import reactor, protocol
+from twisted.spread import pb
+from services.command.service import ServiceManager
+from services.command.database import DatabaseCommandManager
+
+import sys, getopt
+import services.config as config
+
+class Controller(pb.Root):
+    def service_setup(self):
+        self.connection_type = "MySQLdb"
+        self.connection_dict = dict(cp_reconnect=True,
+                                    host=config.get("db", "host"),
+                                    user=config.get("db", "user"),
+                                    passwd=config.get("db", "passwd"),
+                                    db=config.get("db", "db"),
+                                    port=config.getint("db", "port"),
+                                    charset="utf8")
+
+        self.sm = ServiceManager()
+        self.dcm = DatabaseCommandManager()
+        self.dcm.start(self.connection_type, self.connection_dict)
+
+    def remote_newSite(self, uuid, *args):
+        ns = NewSiteManager(self.sm, self.dcm)
+        return ns.start(uuid, *args)
+
+    def remote_newLinkedIn(self, uuid, *args):
+        nl = NewLinkedInManager(self.dcm)
+        return nl.start(uuid, *args)
+
+    def remote_newPicasa(self, uuid, *args):
+        np = NewPicasaManager(self.dcm, self.sm)
+        return np.start(uuid, *args)
+
+    def remote_picasaRefresh(self, uuid, *args):
+        pr = PicasaRefreshManager(self.dcm, self.sm)
+        return pr.start(uuid, *args)
+
+    def remote_feedRefresh(self, uuid, *args):
+        rm = RefreshManager(self.sm, self.dcm)
+        return rm.start(uuid, *args)
+
+    def remote_linkedInRefresh(self, uuid, *args):
+        lr = LinkedInRefreshManager(self.dcm)
+        return lr.start(uuid, *args)
+
+    def remote_previewSite(self, uuid, *args):
+        ps = PreviewSiteManager(self.sm, self.dcm)
+        return ps.start(uuid, *args)
+
+    def remote_previewLinkedIn(self, uuid, *args):
+        pl = PreviewLinkedInManager(self.dcm)
+        return pl.start(uuid, *args)
+
+    def remote_previewPicasa(self, uuid, *args):
+        pr = PicasaPreviewManager(self.dcm, self.sm)
+        return pr.start(uuid, *args)
+
+    def remote_flickrCache(self, uuid, *args):
+        fcm = FlickrCacheManager(self.dcm)
+        return fcm.start(uuid, *args)
+
+# command line handling
+def print_usage():
+    print("Usage: %s -c <configfile>" % sys.argv[0])
+    print("\t-c, --config=<configfile> - config file     (required)")
+    sys.exit(2)
+
+try:
+    opts, args = getopt.getopt(sys.argv[1:], "c:", ["config="])
+except getopt.GetoptError:
+    print_usage()
+
+config_file = None
+
+for o, a in opts:
+    if o in ("-c", "--config"):
+        config_file = a
+
+if config_file is None:
+    print_usage()
+
+try:
+    config.read(config_file)
+except Exception, e:
+    print("Failed to load config file %s at line %d" % (config_file, e.lineno))
+    print_usage()
+
+# Check and warn if we don't have the right api keys and/or usernames
+# and passwords
+try:
+    check_flickr = config.get("flickr", "api_key")
+    check_twitter_user = config.get("twitter", "username")
+    check_twitter_password = config.get("twitter", "password")
+except:
+    print("Make sure that you have a flickr key and twitter username and password\nset in the config.")
+    print_usage()
+
+# fire up the controller and let it go
+port = config.getint("listen", "port")
+c = Controller()
+c.service_setup()
+reactor.listenTCP(port, pb.PBServerFactory(c))
+
+print("listening for commands on port %d" % port)
+
+reactor.run()
+
diff --git a/dev.cfg b/dev.cfg
new file mode 100644 (file)
index 0000000..8301598
--- /dev/null
+++ b/dev.cfg
@@ -0,0 +1,70 @@
+[global]
+# This is where all of your settings go for your development environment
+# Settings that are the same for both development and production
+# (such as template engine, encodings, etc.) all go in 
+# whoisi/config/app.cfg
+
+# DATABASE
+
+# pick the form for your database
+# sqlobject.dburi="postgres://username@hostname/databasename"
+# sqlobject.dburi="mysql://username:password@hostname:port/databasename"
+# sqlobject.dburi="sqlite:///file_name_and_path"
+
+# If you have sqlite, here's a simple default to get you started
+# in development
+#sqlobject.dburi="sqlite://%(current_dir_uri)s/devdata.sqlite"
+sqlobject.dburi="mysql://user:passwd@localhost:3306/whoisi?charset=utf8&debug=True"
+
+
+# if you are using a database or table type without transactions
+# (MySQL default, for example), you should turn off transactions
+# by prepending notrans_ on the uri
+# sqlobject.dburi="notrans_mysql://username:password@hostname:port/databasename"
+
+# for Windows users, sqlite URIs look like:
+# sqlobject.dburi="sqlite:///drive_letter:/path/to/file"
+
+# SERVER
+
+# Some server parameters that you may want to tweak
+# server.socket_port=8080
+
+# Enable the debug output at the end on pages.
+# log_debug_info_filter.on = False
+
+server.environment="development"
+autoreload.package="whoisi"
+
+# Auto-Reload after code modification
+# autoreload.on = True
+
+# Set to True if you'd like to abort execution if a controller gets an
+# unexpected parameter. False by default
+tg.strict_parameters = True
+
+# replace this with your private recaptcha key
+# whoisi.recaptcha_private_key = ""
+
+# LOGGING
+# Logging configuration generally follows the style of the standard
+# Python logging module configuration. Note that when specifying
+# log format messages, you need to use *() for formatting variables.
+# Deployment independent log configuration is in whoisi/config/log.cfg
+[logging]
+
+[[loggers]]
+[[[whoisi]]]
+level='DEBUG'
+qualname='whoisi'
+handlers=['debug_out']
+
+[[[allinfo]]]
+level='INFO'
+handlers=['debug_out']
+
+[[[access]]]
+level='INFO'
+qualname='turbogears.access'
+handlers=['access_out']
+propagate=0
diff --git a/devdata.sqlite b/devdata.sqlite
new file mode 100644 (file)
index 0000000..7439e4b
Binary files /dev/null and b/devdata.sqlite differ
diff --git a/feed-parse-service b/feed-parse-service
new file mode 100755 (executable)
index 0000000..39fe3d5
--- /dev/null
@@ -0,0 +1,139 @@
+#!/usr/bin/python
+
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from twisted.internet import stdio, reactor
+from services.protocol.childlistener import ChildListener
+
+import lib.feedparser as feedparser
+import simplejson
+import tempfile
+import os
+import sys
+import traceback
+
+class FeedParseProtocol(ChildListener):
+    def runCommand(self, command, arg):
+        if command != "parse":
+            self.sendLine("bad command")
+            return
+
+        tmpfilename = None
+        tmpfd = None
+        feed = None
+
+        try:
+            tmpfd, tmpfilename = tempfile.mkstemp()
+            tmpfd = os.fdopen(tmpfd, "wb")
+        except:
+            send.sendLine("parse failed internal")
+            return
+
+        try:
+            d = feedparser.parse(arg)
+            data = {}
+
+            # Pull data from the feed.  Add defaults where it makes
+            # sense.
+            """
+            feed.version will tell you the version of the rss in question
+                http://feedparser.org/docs/version-detection.html
+            feed.title
+            feed.link
+            feed.subtitle
+            feed.updated_parsed
+            feed.id
+            feed.image ?
+            entries
+            e.title
+            e.link
+            e.id
+            e.published_parsed
+            e.updated_parsed
+            e.summary
+            e.content
+            e.enclosures (?) http://feedparser.org/docs/uncommon-rss.html
+            e.contributors (?) http://feedparser.org/docs/uncommon-atom.html
+
+            to check for existence use something like
+            feed.has_key('foo') or use feed.get('foo', <default>)
+            """
+
+            data["version"] = d.version
+            data["title"] = d.feed.get("title", None)
+            data["subtitle"] = d.feed.get("subtitle", None)
+            data["link"] = d.feed.get("link", None)
+            data["last_update"] = self.parsedTimeToSeconds(d.feed, "updated_parsed")
+            data["feed_id"] = d.feed.get("id", None)
+            data["feed_image"] = d.feed.get("image", None)
+
+            data["entries"] = []
+
+            for e in d["items"]:
+                le = {}
+                le["title"] = e.get("title", None)
+                le["link"] = e.get("link", None)
+                le["entry_id"] = e.get("id", None)
+                le["published"] = self.parsedTimeToSeconds(e, "published_parsed")
+                le["updated"] = self.parsedTimeToSeconds(e, "updated_parsed")
+                le["summary"] = e.get("summary", None)
+                le["content"] = e.get("content", None)
+                le["display_cache"] = None
+                data["entries"].append(le)
+
+            tmpfd.write(simplejson.dumps(data))
+
+        except:
+            self.sendLine("parse failed internal")
+            traceback.print_exc(file=sys.stderr)
+            return
+
+        self.sendLine("parse done %s" % tmpfilename)
+
+    def parsedTimeToSeconds(self, feed, name):
+        """
+        Feedparser has a "parsed time" that's the usual time 9 tuple
+        value that's found in python's time() module.  All values are
+        in GMT but we really want the first 6, which represent the
+        date and time in a form that the json serializer can handle.
+        """
+        x = feed.get(name, None)
+        if x is None:
+            return None
+        return x[:6]
+            
+    def connectionLost(self, reason):
+        if (reactor.running):
+            reactor.stop()
+        ChildListener.connectionLost(self, reason)
+
+    def connectionMade(self):
+        self.sendLine("ready")
+        ChildListener.connectionMade(self)
+scrapeProtocol = FeedParseProtocol()
+
+stdioWrapper = stdio.StandardIO(scrapeProtocol)
+
+# start accepting requests
+reactor.run()
+
diff --git a/firehose-client b/firehose-client
new file mode 100755 (executable)
index 0000000..e66651a
--- /dev/null
@@ -0,0 +1,81 @@
+#!/usr/bin/python
+
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from twisted.internet import reactor, protocol, defer
+from services.publisher.protocol import PublisherProtocol
+
+import simplejson
+import sys
+
+class ClientProtocol(PublisherProtocol):
+    def __init__(self):
+        self.state = self.STATE_START
+        PublisherProtocol.__init__(self)
+
+    def connectionMade(self):
+        print("connected")
+
+    def stateChanged(self, state):
+        if state == self.STATE_IDLE:
+            print("in idle state, asking to start the firehose")
+            # request to start firehose
+            self.sendState(self.STATE_FIREHOSE)
+        if state == self.STATE_FIREHOSE:
+            print("server now in firehose mode.")
+
+    def handleMessage(self, msg):
+        print("%s: (%s)\n\t%s\n\t%s\n\t%s" % (msg["id"],
+                                    msg["exts"]["whoisi.com"]["person_id"],
+                                    msg["author"]["name"],
+                                    msg["atom-entry"].get("title", None),
+                                    msg["atom-entry"].get("link", None)))
+
+
+class ClientProtocolFactory(protocol.ClientFactory):
+    protocol = ClientProtocol
+
+    def buildProtocol(self, addr):
+        p = self.protocol()
+        p.factory = self
+        return p
+
+    def clientConnectionLost(self, connector, reason):
+        print("Lost connection: %s" % reason.getErrorMessage())
+
+    def clientConnectionFailed(self, connector, reason):
+        print("Connection failed: %s" % reason.getErrorMessage())
+
+# get the host and port
+host = None
+port = None
+try:
+    host = sys.argv[1]
+    port = int(sys.argv[2])
+except:
+    print("Usage %s: host port" % sys.argv[0])
+    sys.exit(1)
+
+reactor.connectTCP(host, port, ClientProtocolFactory())
+
+reactor.run()
diff --git a/html-feed-scrape-service b/html-feed-scrape-service
new file mode 100755 (executable)
index 0000000..4052a4e
--- /dev/null
@@ -0,0 +1,155 @@
+#!/usr/bin/python
+
+# Copyright (c) 2007-2008 Christopher Blizzard <blizzard@0xdeadbeef.com>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from twisted.internet import stdio, reactor
+from services.protocol.childlistener import ChildListener
+from HTMLParser import HTMLParser, HTMLParseError
+
+import simplejson
+import tempfile
+import os
+import sys
+import traceback
+
+class ScrapeParser(HTMLParser):
+    def reset(self):
+        self.links = []
+        self.generator = None
+        self.pingback = None
+        self.looks_like_html = False
+        self.got_html = False
+        self.in_head = False
+        HTMLParser.reset(self)
+
+    def handle_starttag(self, tag, attrs):
+        if tag == "html":
+            self.got_html = True
+
+        if tag == "head":
+            self.in_head = True
+            if self.got_html:
+                self.looks_like_html = True
+
+        if tag == "meta":
+            if not self.in_head:
+                return
+
+            generator = None
+            content = None
+            for key, value in attrs:
+                if key == "name" and value == "generator":
+                    generator = True
+                if key == "content":
+                    content = value
+            if generator and content:
+                self.generator = value
+
+        if tag == "link":
+            if not self.in_head:
+                return
+
+            type = None
+            href = None
+            alternate = False
+            pingback = None
+            title = None
+            for key, value in attrs:
+                if key == "rel" and value == "alternate":
+                    alternate = True
+                if key == "href":
+                    href = value
+                if key == "type":
+                    type = value
+                if key == "title":
+                    title = value
+                if key == "rel" and value == "pingback":
+                    pingback = True
+            if alternate is True and href:
+                self.links.append([href, type, title])
+            if pingback is True and href:
+                self.pingback = href
+
+    def handle_endtag(self, tag):
+        if tag == "head":
+            self.in_head = False
+
+class ScrapeProtocol(ChildListener):
+    def runCommand(self, command, arg):
+        if command != "parse":
+            self.sendLine("bad command")
+            return
+
+        # argument should be a file to open
+        f = None
+        tmpfilename = None
+        tmpfd = None
+
+        # open the tmpfile first
+        try:
+            tmpfd, tmpfilename = tempfile.mkstemp()
+            tmpfd = os.fdopen(tmpfd, "wb")
+        except:
+            self.sendLine("parse failed internal")
+            return
+
+        try:
+            f = open(arg, "r")
+            # ...and parse it
+
+            # XXX this should really be reading a little bit of a time
+            # instead of loading the whole file into memory
+            d = f.read()
+            s = ScrapeParser()
+            try:
+                s.feed(d)
+            except HTMLParseError:
+                # probably not an html file, but "looks_like_html"
+                # will inform the consumer that it wasn't html
+                pass
+            # just for debugging
+            output = dict(feed_url=s.links, pingback=s.pingback, generator=s.generator,
+                          looks_like_html = s.looks_like_html)
+            tmpfd.write(simplejson.dumps(output))
+        except:
+            self.sendLine("parse failed internal")
+            traceback.print_exc(file=sys.stderr)
+            return
+
+        self.sendLine("parse done %s" % tmpfilename)
+    
+    def connectionLost(self, reason):
+        if (reactor.running):
+            reactor.stop()
+        ChildListener.connectionLost(self, reason)
+
+    def connectionMade(self):
+        self.sendLine("ready")
+        ChildListener.connectionMade(self)
+
+scrapeProtocol = ScrapeProtocol()
+
+stdioWrapper = stdio.StandardIO(scrapeProtocol)
+
+# start accepting requests
+reactor.run()
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/lib/feedparser.py b/lib/feedparser.py
new file mode 100644 (file)
index 0000000..a470df2
--- /dev/null
@@ -0,0 +1,2866 @@
+#!/usr/bin/env python
+"""Universal feed parser
+
+Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
+
+Visit http://feedparser.org/ for the latest version
+Visit http://feedparser.org/docs/ for the latest documentation
+
+Required: Python 2.1 or later
+Recommended: Python 2.3 or later
+Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
+"""
+
+__version__ = "4.1"# + "$Revision: 1.92 $"[11:15] + "-cvs"
+__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE."""
+__author__ = "Mark Pilgrim <http://diveintomark.org/>"
+__contributors__ = ["Jason Diamond <http://injektilo.org/>",
+                    "John Beimler <http://john.beimler.org/>",
+                    "Fazal Majid <http://www.majid.info/mylos/weblog/>",
+                    "Aaron Swartz <http://aaronsw.com/>",
+                    "Kevin Marks <http://epeus.blogspot.com/>"]
+_debug = 0
+
+# HTTP "User-Agent" header to send to servers when downloading feeds.
+# If you are embedding feedparser in a larger application, you should
+# change this to your application name and URL.
+USER_AGENT = "UniversalFeedParser/%s +http://feedparser.org/" % __version__
+
+# HTTP "Accept" header to send to servers when downloading feeds.  If you don't
+# want to send an Accept header, set this to None.
+ACCEPT_HEADER = "application/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1"
+
+# List of preferred XML parsers, by SAX driver name.  These will be tried first,
+# but if they're not installed, Python will keep searching through its own list
+# of pre-installed parsers until it finds one that supports everything we need.
+PREFERRED_XML_PARSERS = ["drv_libxml2"]
+
+# If you want feedparser to automatically run HTML markup through HTML Tidy, set
+# this to 1.  Requires mxTidy <http://www.egenix.com/files/python/mxTidy.html>
+# or utidylib <http://utidylib.berlios.de/>.
+TIDY_MARKUP = 0
+
+# List of Python interfaces for HTML Tidy, in order of preference.  Only useful
+# if TIDY_MARKUP = 1
+PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"]
+
+# ---------- required modules (should come with any Python distribution) ----------
+import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
+try:
+    from cStringIO import StringIO as _StringIO
+except:
+    from StringIO import StringIO as _StringIO
+
+# ---------- optional modules (feedparser will work without these, but with reduced functionality) ----------
+
+# gzip is included with most Python distributions, but may not be available if you compiled your own
+try:
+    import gzip
+except:
+    gzip = None
+try:
+    import zlib
+except:
+    zlib = None
+
+# If a real XML parser is available, feedparser will attempt to use it.  feedparser has
+# been tested with the built-in SAX parser, PyXML, and libxml2.  On platforms where the
+# Python distribution does not come with an XML parser (such as Mac OS X 10.2 and some
+# versions of FreeBSD), feedparser will quietly fall back on regex-based parsing.
+try:
+    import xml.sax
+    xml.sax.make_parser(PREFERRED_XML_PARSERS) # test for valid parsers
+    from xml.sax.saxutils import escape as _xmlescape
+    _XML_AVAILABLE = 1
+except:
+    _XML_AVAILABLE = 0
+    def _xmlescape(data):
+        data = data.replace('&', '&amp;')
+        data = data.replace('>', '&gt;')
+        data = data.replace('<', '&lt;')
+        return data
+
+# base64 support for Atom feeds that contain embedded binary data
+try:
+    import base64, binascii
+except:
+    base64 = binascii = None
+
+# cjkcodecs and iconv_codec provide support for more character encodings.
+# Both are available from http://cjkpython.i18n.org/
+try:
+    import cjkcodecs.aliases
+except:
+    pass
+try:
+    import iconv_codec
+except:
+    pass
+
+# chardet library auto-detects character encodings
+# Download from http://chardet.feedparser.org/
+try:
+    import chardet
+    if _debug:
+        import chardet.constants
+        chardet.constants._debug = 1
+except:
+    chardet = None
+
+# ---------- don't touch these ----------
+class ThingsNobodyCaresAboutButMe(Exception): pass
+class CharacterEncodingOverride(ThingsNobodyCaresAboutButMe): pass
+class CharacterEncodingUnknown(ThingsNobodyCaresAboutButMe): pass
+class NonXMLContentType(ThingsNobodyCaresAboutButMe): pass
+class UndeclaredNamespace(Exception): pass
+
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
+sgmllib.special = re.compile('<!')
+sgmllib.charref = re.compile('&#(x?[0-9A-Fa-f]+)[^0-9A-Fa-f]')
+
+SUPPORTED_VERSIONS = {'': 'unknown',
+                      'rss090': 'RSS 0.90',
+                      'rss091n': 'RSS 0.91 (Netscape)',
+                      'rss091u': 'RSS 0.91 (Userland)',
+                      'rss092': 'RSS 0.92',
+                      'rss093': 'RSS 0.93',
+                      'rss094': 'RSS 0.94',
+                      'rss20': 'RSS 2.0',
+                      'rss10': 'RSS 1.0',
+                      'rss': 'RSS (unknown version)',
+                      'atom01': 'Atom 0.1',
+                      'atom02': 'Atom 0.2',
+                      'atom03': 'Atom 0.3',
+                      'atom10': 'Atom 1.0',
+                      'atom': 'Atom (unknown version)',
+                      'cdf': 'CDF',
+                      'hotrss': 'Hot RSS'
+                      }
+
+try:
+    UserDict = dict
+except NameError:
+    # Python 2.1 does not have dict
+    from UserDict import UserDict
+    def dict(aList):
+        rc = {}
+        for k, v in aList:
+            rc[k] = v
+        return rc
+
+class FeedParserDict(UserDict):
+    keymap = {'channel': 'feed',
+              'items': 'entries',
+              'guid': 'id',
+              'date': 'updated',
+              'date_parsed': 'updated_parsed',
+              'description': ['subtitle', 'summary'],
+              'url': ['href'],
+              'modified': 'updated',
+              'modified_parsed': 'updated_parsed',
+              'issued': 'published',
+              'issued_parsed': 'published_parsed',
+              'copyright': 'rights',
+              'copyright_detail': 'rights_detail',
+              'tagline': 'subtitle',
+              'tagline_detail': 'subtitle_detail'}
+    def __getitem__(self, key):
+        if key == 'category':
+            return UserDict.__getitem__(self, 'tags')[0]['term']
+        if key == 'categories':
+            return [(tag['scheme'], tag['term']) for tag in UserDict.__getitem__(self, 'tags')]
+        realkey = self.keymap.get(key, key)
+        if type(realkey) == types.ListType:
+            for k in realkey:
+                if UserDict.has_key(self, k):
+                    return UserDict.__getitem__(self, k)
+        if UserDict.has_key(self, key):
+            return UserDict.__getitem__(self, key)
+        return UserDict.__getitem__(self, realkey)
+
+    def __setitem__(self, key, value):
+        for k in self.keymap.keys():
+            if key == k:
+                key = self.keymap[k]
+                if type(key) == types.ListType:
+                    key = key[0]
+        return UserDict.__setitem__(self, key, value)
+
+    def get(self, key, default=None):
+        if self.has_key(key):
+            return self[key]
+        else:
+            return default
+
+    def setdefault(self, key, value):
+        if not self.has_key(key):
+            self[key] = value
+        return self[key]
+        
+    def has_key(self, key):
+        try:
+            return hasattr(self, key) or UserDict.has_key(self, key)
+        except AttributeError:
+            return False
+        
+    def __getattr__(self, key):
+        try:
+            return self.__dict__[key]
+        except KeyError:
+            pass
+        try:
+            assert not key.startswith('_')
+            return self.__getitem__(key)
+        except:
+            raise AttributeError, "object has no attribute '%s'" % key
+
+    def __setattr__(self, key, value):
+        if key.startswith('_') or key == 'data':
+            self.__dict__[key] = value
+        else:
+            return self.__setitem__(key, value)
+
+    def __contains__(self, key):
+        return self.has_key(key)
+
+def zopeCompatibilityHack():
+    global FeedParserDict
+    del FeedParserDict
+    def FeedParserDict(aDict=None):
+        rc = {}
+        if aDict:
+            rc.update(aDict)
+        return rc
+
+_ebcdic_to_ascii_map = None
+def _ebcdic_to_ascii(s):
+    global _ebcdic_to_ascii_map
+    if not _ebcdic_to_ascii_map:
+        emap = (
+            0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
+            16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
+            128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
+            144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
+            32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
+            38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
+            45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
+            186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
+            195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,201,
+            202,106,107,108,109,110,111,112,113,114,203,204,205,206,207,208,
+            209,126,115,116,117,118,119,120,121,122,210,211,212,213,214,215,
+            216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,
+            123,65,66,67,68,69,70,71,72,73,232,233,234,235,236,237,
+            125,74,75,76,77,78,79,80,81,82,238,239,240,241,242,243,
+            92,159,83,84,85,86,87,88,89,90,244,245,246,247,248,249,
+            48,49,50,51,52,53,54,55,56,57,250,251,252,253,254,255
+            )
+        import string
+        _ebcdic_to_ascii_map = string.maketrans( \
+            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+    return s.translate(_ebcdic_to_ascii_map)
+
+_urifixer = re.compile('^([A-Za-z][A-Za-z0-9+-.]*://)(/*)(.*?)')
+def _urljoin(base, uri):
+    uri = _urifixer.sub(r'\1\3', uri)
+    return urlparse.urljoin(base, uri)
+
+class _FeedParserMixin:
+    namespaces = {'': '',
+                  'http://backend.userland.com/rss': '',
+                  'http://blogs.law.harvard.edu/tech/rss': '',
+                  'http://purl.org/rss/1.0/': '',
+                  'http://my.netscape.com/rdf/simple/0.9/': '',
+                  'http://example.com/newformat#': '',
+                  'http://example.com/necho': '',
+                  'http://purl.org/echo/': '',
+                  'uri/of/echo/namespace#': '',
+                  'http://purl.org/pie/': '',
+                  'http://purl.org/atom/ns#': '',
+                  'http://www.w3.org/2005/Atom': '',
+                  'http://purl.org/rss/1.0/modules/rss091#': '',
+                  
+                  'http://webns.net/mvcb/':                               'admin',
+                  'http://purl.org/rss/1.0/modules/aggregation/':         'ag',
+                  'http://purl.org/rss/1.0/modules/annotate/':            'annotate',
+                  'http://media.tangent.org/rss/1.0/':                    'audio',
+                  'http://backend.userland.com/blogChannelModule':        'blogChannel',
+                  'http://web.resource.org/cc/':                          'cc',
+                  'http://backend.userland.com/creativeCommonsRssModule': 'creativeCommons',
+                  'http://purl.org/rss/1.0/modules/company':              'co',
+                  'http://purl.org/rss/1.0/modules/content/':             'content',
+                  'http://my.theinfo.org/changed/1.0/rss/':               'cp',
+                  'http://purl.org/dc/elements/1.1/':                     'dc',
+                  'http://purl.org/dc/terms/':                            'dcterms',
+                  'http://purl.org/rss/1.0/modules/email/':               'email',
+                  'http://purl.org/rss/1.0/modules/event/':               'ev',
+                  'http://rssnamespace.org/feedburner/ext/1.0':           'feedburner',
+                  'http://freshmeat.net/rss/fm/':                         'fm',
+                  'http://xmlns.com/foaf/0.1/':                           'foaf',
+                  'http://www.w3.org/2003/01/geo/wgs84_pos#':             'geo',
+                  'http://postneo.com/icbm/':                             'icbm',
+                  'http://purl.org/rss/1.0/modules/image/':               'image',
+                  'http://www.itunes.com/DTDs/PodCast-1.0.dtd':           'itunes',
+                  'http://example.com/DTDs/PodCast-1.0.dtd':              'itunes',
+                  'http://purl.org/rss/1.0/modules/link/':                'l',
+                  'http://search.yahoo.com/mrss':                         'media',
+                  'http://madskills.com/public/xml/rss/module/pingback/': 'pingback',
+                  'http://prismstandard.org/namespaces/1.2/basic/':       'prism',
+                  'http://www.w3.org/1999/02/22-rdf-syntax-ns#':          'rdf',
+                  'http://www.w3.org/2000/01/rdf-schema#':                'rdfs',
+                  'http://purl.org/rss/1.0/modules/reference/':           'ref',
+                  'http://purl.org/rss/1.0/modules/richequiv/':           'reqv',
+                  'http://purl.org/rss/1.0/modules/search/':              'search',
+                  'http://purl.org/rss/1.0/modules/slash/':               'slash',
+                  'http://schemas.xmlsoap.org/soap/envelope/':            'soap',
+                  'http://purl.org/rss/1.0/modules/servicestatus/':       'ss',
+                  'http://hacks.benhammersley.com/rss/streaming/':        'str',
+                  'http://purl.org/rss/1.0/modules/subscription/':        'sub',
+                  'http://purl.org/rss/1.0/modules/syndication/':         'sy',
+                  'http://purl.org/rss/1.0/modules/taxonomy/':            'taxo',
+                  'http://purl.org/rss/1.0/modules/threading/':           'thr',
+                  'http://purl.org/rss/1.0/modules/textinput/':           'ti',
+                  'http://madskills.com/public/xml/rss/module/trackback/':'trackback',
+                  'http://wellformedweb.org/commentAPI/':                 'wfw',
+                  'http://purl.org/rss/1.0/modules/wiki/':                'wiki',
+                  'http://www.w3.org/1999/xhtml':                         'xhtml',
+                  'http://www.w3.org/XML/1998/namespace':                 'xml',
+                  'http://schemas.pocketsoap.com/rss/myDescModule/':      'szf'
+}
+    _matchnamespaces = {}
+
+    can_be_relative_uri = ['link', 'id', 'wfw_comment', 'wfw_commentrss', 'docs', 'url', 'href', 'comments', 'license', 'icon', 'logo']
+    can_contain_relative_uris = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
+    can_contain_dangerous_markup = ['content', 'title', 'summary', 'info', 'tagline', 'subtitle', 'copyright', 'rights', 'description']
+    html_types = ['text/html', 'application/xhtml+xml']
+    
+    def __init__(self, baseuri=None, baselang=None, encoding='utf-8'):
+        if _debug: sys.stderr.write('initializing FeedParser\n')
+        if not self._matchnamespaces:
+            for k, v in self.namespaces.items():
+                self._matchnamespaces[k.lower()] = v
+        self.feeddata = FeedParserDict() # feed-level data
+        self.encoding = encoding # character encoding
+        self.entries = [] # list of entry-level data
+        self.version = '' # feed type/version, see SUPPORTED_VERSIONS
+        self.namespacesInUse = {} # dictionary of namespaces defined by the feed
+
+        # the following are used internally to track state;
+        # this is really out of control and should be refactored
+        self.infeed = 0
+        self.inentry = 0
+        self.incontent = 0
+        self.intextinput = 0
+        self.inimage = 0
+        self.inauthor = 0
+        self.incontributor = 0
+        self.inpublisher = 0
+        self.insource = 0
+        self.sourcedata = FeedParserDict()
+        self.contentparams = FeedParserDict()
+        self._summaryKey = None
+        self.namespacemap = {}
+        self.elementstack = []
+        self.basestack = []
+        self.langstack = []
+        self.baseuri = baseuri or ''
+        self.lang = baselang or None
+        if baselang:
+            self.feeddata['language'] = baselang
+
+    def unknown_starttag(self, tag, attrs):
+        if _debug: sys.stderr.write('start %s with %s\n' % (tag, attrs))
+        # normalize attrs
+        attrs = [(k.lower(), v) for k, v in attrs]
+        attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+        
+        # track xml:base and xml:lang
+        attrsD = dict(attrs)
+        baseuri = attrsD.get('xml:base', attrsD.get('base')) or self.baseuri
+        self.baseuri = _urljoin(self.baseuri, baseuri)
+        lang = attrsD.get('xml:lang', attrsD.get('lang'))
+        if lang == '':
+            # xml:lang could be explicitly set to '', we need to capture that
+            lang = None
+        elif lang is None:
+            # if no xml:lang is specified, use parent lang
+            lang = self.lang
+        if lang:
+            if tag in ('feed', 'rss', 'rdf:RDF'):
+                self.feeddata['language'] = lang
+        self.lang = lang
+        self.basestack.append(self.baseuri)
+        self.langstack.append(lang)
+        
+        # track namespaces
+        for prefix, uri in attrs:
+            if prefix.startswith('xmlns:'):
+                self.trackNamespace(prefix[6:], uri)
+            elif prefix == 'xmlns':
+                self.trackNamespace(None, uri)
+
+        # track inline content
+        if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
+            # element declared itself as escaped markup, but it isn't really
+            self.contentparams['type'] = 'application/xhtml+xml'
+        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
+            # Note: probably shouldn't simply recreate localname here, but
+            # our namespace handling isn't actually 100% correct in cases where
+            # the feed redefines the default namespace (which is actually
+            # the usual case for inline content, thanks Sam), so here we
+            # cheat and just reconstruct the element based on localname
+            # because that compensates for the bugs in our namespace handling.
+            # This will horribly munge inline content with non-empty qnames,
+            # but nobody actually does that, so I'm not fixing it.
+            tag = tag.split(':')[-1]
+            return self.handle_data('<%s%s>' % (tag, ''.join([' %s="%s"' % t for t in attrs])), escape=0)
+
+        # match namespaces
+        if tag.find(':') <> -1:
+            prefix, suffix = tag.split(':', 1)
+        else:
+            prefix, suffix = '', tag
+        prefix = self.namespacemap.get(prefix, prefix)
+        if prefix:
+            prefix = prefix + '_'
+
+        # special hack for better tracking of empty textinput/image elements in illformed feeds
+        if (not prefix) and tag not in ('title', 'link', 'description', 'name'):
+            self.intextinput = 0
+        if (not prefix) and tag not in ('title', 'link', 'description', 'url', 'href', 'width', 'height'):
+            self.inimage = 0
+        
+        # call special handler (if defined) or default handler
+        methodname = '_start_' + prefix + suffix
+        try:
+            method = getattr(self, methodname)
+            return method(attrsD)
+        except AttributeError:
+            return self.push(prefix + suffix, 1)
+
+    def unknown_endtag(self, tag):
+        if _debug: sys.stderr.write('end %s\n' % tag)
+        # match namespaces
+        if tag.find(':') <> -1:
+            prefix, suffix = tag.split(':', 1)
+        else:
+            prefix, suffix = '', tag
+        prefix = self.namespacemap.get(prefix, prefix)
+        if prefix:
+            prefix = prefix + '_'
+
+        # call special handler (if defined) or default handler
+        methodname = '_end_' + prefix + suffix
+        try:
+            method = getattr(self, methodname)
+            method()
+        except AttributeError:
+            self.pop(prefix + suffix)
+
+        # track inline content
+        if self.incontent and self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
+            # element declared itself as escaped markup, but it isn't really
+            self.contentparams['type'] = 'application/xhtml+xml'
+        if self.incontent and self.contentparams.get('type') == 'application/xhtml+xml':
+            tag = tag.split(':')[-1]
+            self.handle_data('</%s>' % tag, escape=0)
+
+        # track xml:base and xml:lang going out of scope
+        if self.basestack:
+            self.basestack.pop()
+            if self.basestack and self.basestack[-1]:
+                self.baseuri = self.basestack[-1]
+        if self.langstack:
+            self.langstack.pop()
+            if self.langstack: # and (self.langstack[-1] is not None):
+                self.lang = self.langstack[-1]
+
+    def handle_charref(self, ref):
+        # called for each character reference, e.g. for '&#160;', ref will be '160'
+        if not self.elementstack: return
+        ref = ref.lower()
+        if ref in ('34', '38', '39', '60', '62', 'x22', 'x26', 'x27', 'x3c', 'x3e'):
+            text = '&#%s;' % ref
+        else:
+            if ref[0] == 'x':
+                c = int(ref[1:], 16)
+            else:
+                c = int(ref)
+            text = unichr(c).encode('utf-8')
+        self.elementstack[-1][2].append(text)
+
+    def handle_entityref(self, ref):
+        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
+        if not self.elementstack: return
+        if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
+        if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
+            text = '&%s;' % ref
+        else:
+            # entity resolution graciously donated by Aaron Swartz
+            def name2cp(k):
+                import htmlentitydefs
+                if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
+                    return htmlentitydefs.name2codepoint[k]
+                k = htmlentitydefs.entitydefs[k]
+                if k.startswith('&#') and k.endswith(';'):
+                    return int(k[2:-1]) # not in latin-1
+                return ord(k)
+            try: name2cp(ref)
+            except KeyError: text = '&%s;' % ref
+            else: text = unichr(name2cp(ref)).encode('utf-8')
+        self.elementstack[-1][2].append(text)
+
+    def handle_data(self, text, escape=1):
+        # called for each block of plain text, i.e. outside of any tag and
+        # not containing any character or entity references
+        if not self.elementstack: return
+        if escape and self.contentparams.get('type') == 'application/xhtml+xml':
+            text = _xmlescape(text)
+        self.elementstack[-1][2].append(text)
+
+    def handle_comment(self, text):
+        # called for each comment, e.g. <!-- insert message here -->
+        pass
+
+    def handle_pi(self, text):
+        # called for each processing instruction, e.g. <?instruction>
+        pass
+
+    def handle_decl(self, text):
+        pass
+
+    def parse_declaration(self, i):
+        # override internal declaration handler to handle CDATA blocks
+        if _debug: sys.stderr.write('entering parse_declaration\n')
+        if self.rawdata[i:i+9] == '<![CDATA[':
+            k = self.rawdata.find(']]>', i)
+            if k == -1: k = len(self.rawdata)
+            self.handle_data(_xmlescape(self.rawdata[i+9:k]), 0)
+            return k+3
+        else:
+            k = self.rawdata.find('>', i)
+            return k+1
+
+    def mapContentType(self, contentType):
+        contentType = contentType.lower()
+        if contentType == 'text':
+            contentType = 'text/plain'
+        elif contentType == 'html':
+            contentType = 'text/html'
+        elif contentType == 'xhtml':
+            contentType = 'application/xhtml+xml'
+        return contentType
+    
+    def trackNamespace(self, prefix, uri):
+        loweruri = uri.lower()
+        if (prefix, loweruri) == (None, 'http://my.netscape.com/rdf/simple/0.9/') and not self.version:
+            self.version = 'rss090'
+        if loweruri == 'http://purl.org/rss/1.0/' and not self.version:
+            self.version = 'rss10'
+        if loweruri == 'http://www.w3.org/2005/atom' and not self.version:
+            self.version = 'atom10'
+        if loweruri.find('backend.userland.com/rss') <> -1:
+            # match any backend.userland.com namespace
+            uri = 'http://backend.userland.com/rss'
+            loweruri = uri
+        if self._matchnamespaces.has_key(loweruri):
+            self.namespacemap[prefix] = self._matchnamespaces[loweruri]
+            self.namespacesInUse[self._matchnamespaces[loweruri]] = uri
+        else:
+            self.namespacesInUse[prefix or ''] = uri
+
+    def resolveURI(self, uri):
+        return _urljoin(self.baseuri or '', uri)
+    
+    def decodeEntities(self, element, data):
+        return data
+
+    def push(self, element, expectingText):
+        self.elementstack.append([element, expectingText, []])
+
+    def pop(self, element, stripWhitespace=1):
+        if not self.elementstack: return
+        if self.elementstack[-1][0] != element: return
+        
+        element, expectingText, pieces = self.elementstack.pop()
+        output = ''.join(pieces)
+        if stripWhitespace:
+            output = output.strip()
+        if not expectingText: return output
+
+        # decode base64 content
+        if base64 and self.contentparams.get('base64', 0):
+            try:
+                output = base64.decodestring(output)
+            except binascii.Error:
+                pass
+            except binascii.Incomplete:
+                pass
+                
+        # resolve relative URIs
+        if (element in self.can_be_relative_uri) and output:
+            output = self.resolveURI(output)
+        
+        # decode entities within embedded markup
+        if not self.contentparams.get('base64', 0):
+            output = self.decodeEntities(element, output)
+
+        # remove temporary cruft from contentparams
+        try:
+            del self.contentparams['mode']
+        except KeyError:
+            pass
+        try:
+            del self.contentparams['base64']
+        except KeyError:
+            pass
+
+        # resolve relative URIs within embedded markup
+        if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
+            if element in self.can_contain_relative_uris:
+                output = _resolveRelativeURIs(output, self.baseuri, self.encoding)
+        
+        # sanitize embedded markup
+        if self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types:
+            if element in self.can_contain_dangerous_markup:
+                output = _sanitizeHTML(output, self.encoding)
+
+        if self.encoding and type(output) != type(u''):
+            try:
+                output = unicode(output, self.encoding)
+            except:
+                pass
+
+        # categories/tags/keywords/whatever are handled in _end_category
+        if element == 'category':
+            return output
+        
+        # store output in appropriate place(s)
+        if self.inentry and not self.insource:
+            if element == 'content':
+                self.entries[-1].setdefault(element, [])
+                contentparams = copy.deepcopy(self.contentparams)
+                contentparams['value'] = output
+                self.entries[-1][element].append(contentparams)
+            elif element == 'link':
+                self.entries[-1][element] = output
+                if output:
+                    self.entries[-1]['links'][-1]['href'] = output
+            else:
+                if element == 'description':
+                    element = 'summary'
+                self.entries[-1][element] = output
+                if self.incontent:
+                    contentparams = copy.deepcopy(self.contentparams)
+                    contentparams['value'] = output
+                    self.entries[-1][element + '_detail'] = contentparams
+        elif (self.infeed or self.insource) and (not self.intextinput) and (not self.inimage):
+            context = self._getContext()
+            if element == 'description':
+                element = 'subtitle'
+            context[element] = output
+            if element == 'link':
+                context['links'][-1]['href'] = output
+            elif self.incontent:
+                contentparams = copy.deepcopy(self.contentparams)
+                contentparams['value'] = output
+                context[element + '_detail'] = contentparams
+        return output
+
+    def pushContent(self, tag, attrsD, defaultContentType, expectingText):
+        self.incontent += 1
+        self.contentparams = FeedParserDict({
+            'type': self.mapContentType(attrsD.get('type', defaultContentType)),
+            'language': self.lang,
+            'base': self.baseuri})
+        self.contentparams['base64'] = self._isBase64(attrsD, self.contentparams)
+        self.push(tag, expectingText)
+
+    def popContent(self, tag):
+        value = self.pop(tag)
+        self.incontent -= 1
+        self.contentparams.clear()
+        return value
+        
+    def _mapToStandardPrefix(self, name):
+        colonpos = name.find(':')
+        if colonpos <> -1:
+            prefix = name[:colonpos]
+            suffix = name[colonpos+1:]
+            prefix = self.namespacemap.get(prefix, prefix)
+            name = prefix + ':' + suffix
+        return name
+        
+    def _getAttribute(self, attrsD, name):
+        return attrsD.get(self._mapToStandardPrefix(name))
+
+    def _isBase64(self, attrsD, contentparams):
+        if attrsD.get('mode', '') == 'base64':
+            return 1
+        if self.contentparams['type'].startswith('text/'):
+            return 0
+        if self.contentparams['type'].endswith('+xml'):
+            return 0
+        if self.contentparams['type'].endswith('/xml'):
+            return 0
+        return 1
+
+    def _itsAnHrefDamnIt(self, attrsD):
+        href = attrsD.get('url', attrsD.get('uri', attrsD.get('href', None)))
+        if href:
+            try:
+                del attrsD['url']
+            except KeyError:
+                pass
+            try:
+                del attrsD['uri']
+            except KeyError:
+                pass
+            attrsD['href'] = href
+        return attrsD
+    
+    def _save(self, key, value):
+        context = self._getContext()
+        context.setdefault(key, value)
+
+    def _start_rss(self, attrsD):
+        versionmap = {'0.91': 'rss091u',
+                      '0.92': 'rss092',
+                      '0.93': 'rss093',
+                      '0.94': 'rss094'}
+        if not self.version:
+            attr_version = attrsD.get('version', '')
+            version = versionmap.get(attr_version)
+            if version:
+                self.version = version
+            elif attr_version.startswith('2.'):
+                self.version = 'rss20'
+            else:
+                self.version = 'rss'
+    
+    def _start_dlhottitles(self, attrsD):
+        self.version = 'hotrss'
+
+    def _start_channel(self, attrsD):
+        self.infeed = 1
+        self._cdf_common(attrsD)
+    _start_feedinfo = _start_channel
+
+    def _cdf_common(self, attrsD):
+        if attrsD.has_key('lastmod'):
+            self._start_modified({})
+            self.elementstack[-1][-1] = attrsD['lastmod']
+            self._end_modified()
+        if attrsD.has_key('href'):
+            self._start_link({})
+            self.elementstack[-1][-1] = attrsD['href']
+            self._end_link()
+    
+    def _start_feed(self, attrsD):
+        self.infeed = 1
+        versionmap = {'0.1': 'atom01',
+                      '0.2': 'atom02',
+                      '0.3': 'atom03'}
+        if not self.version:
+            attr_version = attrsD.get('version')
+            version = versionmap.get(attr_version)
+            if version:
+                self.version = version
+            else:
+                self.version = 'atom'
+
+    def _end_channel(self):
+        self.infeed = 0
+    _end_feed = _end_channel
+    
+    def _start_image(self, attrsD):
+        self.inimage = 1
+        self.push('image', 0)
+        context = self._getContext()
+        context.setdefault('image', FeedParserDict())
+            
+    def _end_image(self):
+        self.pop('image')
+        self.inimage = 0
+
+    def _start_textinput(self, attrsD):
+        self.intextinput = 1
+        self.push('textinput', 0)
+        context = self._getContext()
+        context.setdefault('textinput', FeedParserDict())
+    _start_textInput = _start_textinput
+    
+    def _end_textinput(self):
+        self.pop('textinput')
+        self.intextinput = 0
+    _end_textInput = _end_textinput
+
+    def _start_author(self, attrsD):
+        self.inauthor = 1
+        self.push('author', 1)
+    _start_managingeditor = _start_author
+    _start_dc_author = _start_author
+    _start_dc_creator = _start_author
+    _start_itunes_author = _start_author
+
+    def _end_author(self):
+        self.pop('author')
+        self.inauthor = 0
+        self._sync_author_detail()
+    _end_managingeditor = _end_author
+    _end_dc_author = _end_author
+    _end_dc_creator = _end_author
+    _end_itunes_author = _end_author
+
+    def _start_itunes_owner(self, attrsD):
+        self.inpublisher = 1
+        self.push('publisher', 0)
+
+    def _end_itunes_owner(self):
+        self.pop('publisher')
+        self.inpublisher = 0
+        self._sync_author_detail('publisher')
+
+    def _start_contributor(self, attrsD):
+        self.incontributor = 1
+        context = self._getContext()
+        context.setdefault('contributors', [])
+        context['contributors'].append(FeedParserDict())
+        self.push('contributor', 0)
+
+    def _end_contributor(self):
+        self.pop('contributor')
+        self.incontributor = 0
+
+    def _start_dc_contributor(self, attrsD):
+        self.incontributor = 1
+        context = self._getContext()
+        context.setdefault('contributors', [])
+        context['contributors'].append(FeedParserDict())
+        self.push('name', 0)
+
+    def _end_dc_contributor(self):
+        self._end_name()
+        self.incontributor = 0
+
+    def _start_name(self, attrsD):
+        self.push('name', 0)
+    _start_itunes_name = _start_name
+
+    def _end_name(self):
+        value = self.pop('name')
+        if self.inpublisher:
+            self._save_author('name', value, 'publisher')
+        elif self.inauthor:
+            self._save_author('name', value)
+        elif self.incontributor:
+            self._save_contributor('name', value)
+        elif self.intextinput:
+            context = self._getContext()
+            context['textinput']['name'] = value
+    _end_itunes_name = _end_name
+
+    def _start_width(self, attrsD):
+        self.push('width', 0)
+
+    def _end_width(self):
+        value = self.pop('width')
+        try:
+            value = int(value)
+        except:
+            value = 0
+        if self.inimage:
+            context = self._getContext()
+            context['image']['width'] = value
+
+    def _start_height(self, attrsD):
+        self.push('height', 0)
+
+    def _end_height(self):
+        value = self.pop('height')
+        try:
+            value = int(value)
+        except:
+            value = 0
+        if self.inimage:
+            context = self._getContext()
+            context['image']['height'] = value
+
+    def _start_url(self, attrsD):
+        self.push('href', 1)
+    _start_homepage = _start_url
+    _start_uri = _start_url
+
+    def _end_url(self):
+        value = self.pop('href')
+        if self.inauthor:
+            self._save_author('href', value)
+        elif self.incontributor:
+            self._save_contributor('href', value)
+        elif self.inimage:
+            context = self._getContext()
+            context['image']['href'] = value
+        elif self.intextinput:
+            context = self._getContext()
+            context['textinput']['link'] = value
+    _end_homepage = _end_url
+    _end_uri = _end_url
+
+    def _start_email(self, attrsD):
+        self.push('email', 0)
+    _start_itunes_email = _start_email
+
+    def _end_email(self):
+        value = self.pop('email')
+        if self.inpublisher:
+            self._save_author('email', value, 'publisher')
+        elif self.inauthor:
+            self._save_author('email', value)
+        elif self.incontributor:
+            self._save_contributor('email', value)
+    _end_itunes_email = _end_email
+
+    def _getContext(self):
+        if self.insource:
+            context = self.sourcedata
+        elif self.inentry:
+            context = self.entries[-1]
+        else:
+            context = self.feeddata
+        return context
+
+    def _save_author(self, key, value, prefix='author'):
+        context = self._getContext()
+        context.setdefault(prefix + '_detail', FeedParserDict())
+        context[prefix + '_detail'][key] = value
+        self._sync_author_detail()
+
+    def _save_contributor(self, key, value):
+        context = self._getContext()
+        context.setdefault('contributors', [FeedParserDict()])
+        context['contributors'][-1][key] = value
+
+    def _sync_author_detail(self, key='author'):
+        context = self._getContext()
+        detail = context.get('%s_detail' % key)
+        if detail:
+            name = detail.get('name')
+            email = detail.get('email')
+            if name and email:
+                context[key] = '%s (%s)' % (name, email)
+            elif name:
+                context[key] = name
+            elif email:
+                context[key] = email
+        else:
+            author = context.get(key)
+            if not author: return
+            emailmatch = re.search(r'''(([a-zA-Z0-9\_\-\.\+]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?))''', author)
+            if not emailmatch: return
+            email = emailmatch.group(0)
+            # probably a better way to do the following, but it passes all the tests
+            author = author.replace(email, '')
+            author = author.replace('()', '')
+            author = author.strip()
+            if author and (author[0] == '('):
+                author = author[1:]
+            if author and (author[-1] == ')'):
+                author = author[:-1]
+            author = author.strip()
+            context.setdefault('%s_detail' % key, FeedParserDict())
+            context['%s_detail' % key]['name'] = author
+            context['%s_detail' % key]['email'] = email
+
+    def _start_subtitle(self, attrsD):
+        self.pushContent('subtitle', attrsD, 'text/plain', 1)
+    _start_tagline = _start_subtitle
+    _start_itunes_subtitle = _start_subtitle
+
+    def _end_subtitle(self):
+        self.popContent('subtitle')
+    _end_tagline = _end_subtitle
+    _end_itunes_subtitle = _end_subtitle
+            
+    def _start_rights(self, attrsD):
+        self.pushContent('rights', attrsD, 'text/plain', 1)
+    _start_dc_rights = _start_rights
+    _start_copyright = _start_rights
+
+    def _end_rights(self):
+        self.popContent('rights')
+    _end_dc_rights = _end_rights
+    _end_copyright = _end_rights
+
+    def _start_item(self, attrsD):
+        self.entries.append(FeedParserDict())
+        self.push('item', 0)
+        self.inentry = 1
+        self.guidislink = 0
+        id = self._getAttribute(attrsD, 'rdf:about')
+        if id:
+            context = self._getContext()
+            context['id'] = id
+        self._cdf_common(attrsD)
+    _start_entry = _start_item
+    _start_product = _start_item
+
+    def _end_item(self):
+        self.pop('item')
+        self.inentry = 0
+    _end_entry = _end_item
+
+    def _start_dc_language(self, attrsD):
+        self.push('language', 1)
+    _start_language = _start_dc_language
+
+    def _end_dc_language(self):
+        self.lang = self.pop('language')
+    _end_language = _end_dc_language
+
+    def _start_dc_publisher(self, attrsD):
+        self.push('publisher', 1)
+    _start_webmaster = _start_dc_publisher
+
+    def _end_dc_publisher(self):
+        self.pop('publisher')
+        self._sync_author_detail('publisher')
+    _end_webmaster = _end_dc_publisher
+
+    def _start_published(self, attrsD):
+        self.push('published', 1)
+    _start_dcterms_issued = _start_published
+    _start_issued = _start_published
+
+    def _end_published(self):
+        value = self.pop('published')
+        self._save('published_parsed', _parse_date(value))
+    _end_dcterms_issued = _end_published
+    _end_issued = _end_published
+
+    def _start_updated(self, attrsD):
+        self.push('updated', 1)
+    _start_modified = _start_updated
+    _start_dcterms_modified = _start_updated
+    _start_pubdate = _start_updated
+    _start_dc_date = _start_updated
+
+    def _end_updated(self):
+        value = self.pop('updated')
+        parsed_value = _parse_date(value)
+        self._save('updated_parsed', parsed_value)
+    _end_modified = _end_updated
+    _end_dcterms_modified = _end_updated
+    _end_pubdate = _end_updated
+    _end_dc_date = _end_updated
+
+    def _start_created(self, attrsD):
+        self.push('created', 1)
+    _start_dcterms_created = _start_created
+
+    def _end_created(self):
+        value = self.pop('created')
+        self._save('created_parsed', _parse_date(value))
+    _end_dcterms_created = _end_created
+
+    def _start_expirationdate(self, attrsD):
+        self.push('expired', 1)
+
+    def _end_expirationdate(self):
+        self._save('expired_parsed', _parse_date(self.pop('expired')))
+
+    def _start_cc_license(self, attrsD):
+        self.push('license', 1)
+        value = self._getAttribute(attrsD, 'rdf:resource')
+        if value:
+            self.elementstack[-1][2].append(value)
+        self.pop('license')
+        
+    def _start_creativecommons_license(self, attrsD):
+        self.push('license', 1)
+
+    def _end_creativecommons_license(self):
+        self.pop('license')
+
+    def _addTag(self, term, scheme, label):
+        context = self._getContext()
+        tags = context.setdefault('tags', [])
+        if (not term) and (not scheme) and (not label): return
+        value = FeedParserDict({'term': term, 'scheme': scheme, 'label': label})
+        if value not in tags:
+            tags.append(FeedParserDict({'term': term, 'scheme': scheme, 'label': label}))
+
+    def _start_category(self, attrsD):
+        if _debug: sys.stderr.write('entering _start_category with %s\n' % repr(attrsD))
+        term = attrsD.get('term')
+        scheme = attrsD.get('scheme', attrsD.get('domain'))
+        label = attrsD.get('label')
+        self._addTag(term, scheme, label)
+        self.push('category', 1)
+    _start_dc_subject = _start_category
+    _start_keywords = _start_category
+        
+    def _end_itunes_keywords(self):
+        for term in self.pop('itunes_keywords').split():
+            self._addTag(term, 'http://www.itunes.com/', None)
+        
+    def _start_itunes_category(self, attrsD):
+        self._addTag(attrsD.get('text'), 'http://www.itunes.com/', None)
+        self.push('category', 1)
+        
+    def _end_category(self):
+        value = self.pop('category')
+        if not value: return
+        context = self._getContext()
+        tags = context['tags']
+        if value and len(tags) and not tags[-1]['term']:
+            tags[-1]['term'] = value
+        else:
+            self._addTag(value, None, None)
+    _end_dc_subject = _end_category
+    _end_keywords = _end_category
+    _end_itunes_category = _end_category
+
+    def _start_cloud(self, attrsD):
+        self._getContext()['cloud'] = FeedParserDict(attrsD)
+        
+    def _start_link(self, attrsD):
+        attrsD.setdefault('rel', 'alternate')
+        attrsD.setdefault('type', 'text/html')
+        attrsD = self._itsAnHrefDamnIt(attrsD)
+        if attrsD.has_key('href'):
+            attrsD['href'] = self.resolveURI(attrsD['href'])
+        expectingText = self.infeed or self.inentry or self.insource
+        context = self._getContext()
+        context.setdefault('links', [])
+        context['links'].append(FeedParserDict(attrsD))
+        if attrsD['rel'] == 'enclosure':
+            self._start_enclosure(attrsD)
+        if attrsD.has_key('href'):
+            expectingText = 0
+            if (attrsD.get('rel') == 'alternate') and (self.mapContentType(attrsD.get('type')) in self.html_types):
+                context['link'] = attrsD['href']
+        else:
+            self.push('link', expectingText)
+    _start_producturl = _start_link
+
+    def _end_link(self):
+        value = self.pop('link')
+        context = self._getContext()
+        if self.intextinput:
+            context['textinput']['link'] = value
+        if self.inimage:
+            context['image']['link'] = value
+    _end_producturl = _end_link
+
+    def _start_guid(self, attrsD):
+        self.guidislink = (attrsD.get('ispermalink', 'true') == 'true')
+        self.push('id', 1)
+
+    def _end_guid(self):
+        value = self.pop('id')
+        self._save('guidislink', self.guidislink and not self._getContext().has_key('link'))
+        if self.guidislink:
+            # guid acts as link, but only if 'ispermalink' is not present or is 'true',
+            # and only if the item doesn't already have a link element
+            self._save('link', value)
+
+    def _start_title(self, attrsD):
+        self.pushContent('title', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
+
+    def _start_title_low_pri(self, attrsD):
+        if not self._getContext().has_key('title'):
+            self._start_title(attrsD)
+    _start_dc_title = _start_title_low_pri
+    _start_media_title = _start_title_low_pri
+
+    def _end_title(self):
+        value = self.popContent('title')
+        context = self._getContext()
+        if self.intextinput:
+            context['textinput']['title'] = value
+        elif self.inimage:
+            context['image']['title'] = value
+
+    def _end_title_low_pri(self):
+        if not self._getContext().has_key('title'):
+            self._end_title()
+    _end_dc_title = _end_title_low_pri
+    _end_media_title = _end_title_low_pri
+
+    def _start_description(self, attrsD):
+        context = self._getContext()
+        if context.has_key('summary'):
+            self._summaryKey = 'content'
+            self._start_content(attrsD)
+        else:
+            self.pushContent('description', attrsD, 'text/html', self.infeed or self.inentry or self.insource)
+
+    def _start_abstract(self, attrsD):
+        self.pushContent('description', attrsD, 'text/plain', self.infeed or self.inentry or self.insource)
+
+    def _end_description(self):
+        if self._summaryKey == 'content':
+            self._end_content()
+        else:
+            value = self.popContent('description')
+            context = self._getContext()
+            if self.intextinput:
+                context['textinput']['description'] = value
+            elif self.inimage:
+                context['image']['description'] = value
+        self._summaryKey = None
+    _end_abstract = _end_description
+
+    def _start_info(self, attrsD):
+        self.pushContent('info', attrsD, 'text/plain', 1)
+    _start_feedburner_browserfriendly = _start_info
+
+    def _end_info(self):
+        self.popContent('info')
+    _end_feedburner_browserfriendly = _end_info
+
+    def _start_generator(self, attrsD):
+        if attrsD:
+            attrsD = self._itsAnHrefDamnIt(attrsD)
+            if attrsD.has_key('href'):
+                attrsD['href'] = self.resolveURI(attrsD['href'])
+        self._getContext()['generator_detail'] = FeedParserDict(attrsD)
+        self.push('generator', 1)
+
+    def _end_generator(self):
+        value = self.pop('generator')
+        context = self._getContext()
+        if context.has_key('generator_detail'):
+            context['generator_detail']['name'] = value
+            
+    def _start_admin_generatoragent(self, attrsD):
+        self.push('generator', 1)
+        value = self._getAttribute(attrsD, 'rdf:resource')
+        if value:
+            self.elementstack[-1][2].append(value)
+        self.pop('generator')
+        self._getContext()['generator_detail'] = FeedParserDict({'href': value})
+
+    def _start_admin_errorreportsto(self, attrsD):
+        self.push('errorreportsto', 1)
+        value = self._getAttribute(attrsD, 'rdf:resource')
+        if value:
+            self.elementstack[-1][2].append(value)
+        self.pop('errorreportsto')
+        
+    def _start_summary(self, attrsD):
+        context = self._getContext()
+        if context.has_key('summary'):
+            self._summaryKey = 'content'
+            self._start_content(attrsD)
+        else:
+            self._summaryKey = 'summary'
+            self.pushContent(self._summaryKey, attrsD, 'text/plain', 1)
+    _start_itunes_summary = _start_summary
+
+    def _end_summary(self):
+        if self._summaryKey == 'content':
+            self._end_content()
+        else:
+            self.popContent(self._summaryKey or 'summary')
+        self._summaryKey = None
+    _end_itunes_summary = _end_summary
+        
+    def _start_enclosure(self, attrsD):
+        attrsD = self._itsAnHrefDamnIt(attrsD)
+        self._getContext().setdefault('enclosures', []).append(FeedParserDict(attrsD))
+        href = attrsD.get('href')
+        if href:
+            context = self._getContext()
+            if not context.get('id'):
+                context['id'] = href
+            
+    def _start_source(self, attrsD):
+        self.insource = 1
+
+    def _end_source(self):
+        self.insource = 0
+        self._getContext()['source'] = copy.deepcopy(self.sourcedata)
+        self.sourcedata.clear()
+
+    def _start_content(self, attrsD):
+        self.pushContent('content', attrsD, 'text/plain', 1)
+        src = attrsD.get('src')
+        if src:
+            self.contentparams['src'] = src
+        self.push('content', 1)
+
+    def _start_prodlink(self, attrsD):
+        self.pushContent('content', attrsD, 'text/html', 1)
+
+    def _start_body(self, attrsD):
+        self.pushContent('content', attrsD, 'application/xhtml+xml', 1)
+    _start_xhtml_body = _start_body
+
+    def _start_content_encoded(self, attrsD):
+        self.pushContent('content', attrsD, 'text/html', 1)
+    _start_fullitem = _start_content_encoded
+
+    def _end_content(self):
+        copyToDescription = self.mapContentType(self.contentparams.get('type')) in (['text/plain'] + self.html_types)
+        value = self.popContent('content')
+        if copyToDescription:
+            self._save('description', value)
+    _end_body = _end_content
+    _end_xhtml_body = _end_content
+    _end_content_encoded = _end_content
+    _end_fullitem = _end_content
+    _end_prodlink = _end_content
+
+    def _start_itunes_image(self, attrsD):
+        self.push('itunes_image', 0)
+        self._getContext()['image'] = FeedParserDict({'href': attrsD.get('href')})
+    _start_itunes_link = _start_itunes_image
+        
+    def _end_itunes_block(self):
+        value = self.pop('itunes_block', 0)
+        self._getContext()['itunes_block'] = (value == 'yes') and 1 or 0
+
+    def _end_itunes_explicit(self):
+        value = self.pop('itunes_explicit', 0)
+        self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
+
+if _XML_AVAILABLE:
+    class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
+        def __init__(self, baseuri, baselang, encoding):
+            if _debug: sys.stderr.write('trying StrictFeedParser\n')
+            xml.sax.handler.ContentHandler.__init__(self)
+            _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
+            self.bozo = 0
+            self.exc = None
+        
+        def startPrefixMapping(self, prefix, uri):
+            self.trackNamespace(prefix, uri)
+        
+        def startElementNS(self, name, qname, attrs):
+            namespace, localname = name
+            lowernamespace = str(namespace or '').lower()
+            if lowernamespace.find('backend.userland.com/rss') <> -1:
+                # match any backend.userland.com namespace
+                namespace = 'http://backend.userland.com/rss'
+                lowernamespace = namespace
+            if qname and qname.find(':') > 0:
+                givenprefix = qname.split(':')[0]
+            else:
+                givenprefix = None
+            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
+            if givenprefix and (prefix == None or (prefix == '' and lowernamespace == '')) and not self.namespacesInUse.has_key(givenprefix):
+                    raise UndeclaredNamespace, "'%s' is not associated with a namespace" % givenprefix
+            if prefix:
+                localname = prefix + ':' + localname
+            localname = str(localname).lower()
+            if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
+
+            # qname implementation is horribly broken in Python 2.1 (it
+            # doesn't report any), and slightly broken in Python 2.2 (it
+            # doesn't report the xml: namespace). So we match up namespaces
+            # with a known list first, and then possibly override them with
+            # the qnames the SAX parser gives us (if indeed it gives us any
+            # at all).  Thanks to MatejC for helping me test this and
+            # tirelessly telling me that it didn't work yet.
+            attrsD = {}
+            for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
+                lowernamespace = (namespace or '').lower()
+                prefix = self._matchnamespaces.get(lowernamespace, '')
+                if prefix:
+                    attrlocalname = prefix + ':' + attrlocalname
+                attrsD[str(attrlocalname).lower()] = attrvalue
+            for qname in attrs.getQNames():
+                attrsD[str(qname).lower()] = attrs.getValueByQName(qname)
+            self.unknown_starttag(localname, attrsD.items())
+
+        def characters(self, text):
+            self.handle_data(text)
+
+        def endElementNS(self, name, qname):
+            namespace, localname = name
+            lowernamespace = str(namespace or '').lower()
+            if qname and qname.find(':') > 0:
+                givenprefix = qname.split(':')[0]
+            else:
+                givenprefix = ''
+            prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
+            if prefix:
+                localname = prefix + ':' + localname
+            localname = str(localname).lower()
+            self.unknown_endtag(localname)
+
+        def error(self, exc):
+            self.bozo = 1
+            self.exc = exc
+            
+        def fatalError(self, exc):
+            self.error(exc)
+            raise exc
+
+class _BaseHTMLProcessor(sgmllib.SGMLParser):
+    elements_no_end_tag = ['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
+      'img', 'input', 'isindex', 'link', 'meta', 'param']
+    
+    def __init__(self, encoding):
+        self.encoding = encoding
+        if _debug: sys.stderr.write('entering BaseHTMLProcessor, encoding=%s\n' % self.encoding)
+        sgmllib.SGMLParser.__init__(self)
+        
+    def reset(self):
+        self.pieces = []
+        sgmllib.SGMLParser.reset(self)
+
+    def _shorttag_replace(self, match):
+        tag = match.group(1)
+        if tag in self.elements_no_end_tag:
+            return '<' + tag + ' />'
+        else:
+            return '<' + tag + '></' + tag + '>'
+        
+    def feed(self, data):
+        data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'&lt;!\1', data)
+        #data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
+        data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) 
+        data = data.replace('&#39;', "'")
+        data = data.replace('&#34;', '"')
+        if self.encoding and type(data) == type(u''):
+            data = data.encode(self.encoding)
+        sgmllib.SGMLParser.feed(self, data)
+
+    def normalize_attrs(self, attrs):
+        # utility method to be called by descendants
+        attrs = [(k.lower(), v) for k, v in attrs]
+        attrs = [(k, k in ('rel', 'type') and v.lower() or v) for k, v in attrs]
+        return attrs
+
+    def unknown_starttag(self, tag, attrs):
+        # called for each start tag
+        # attrs is a list of (attr, value) tuples
+        # e.g. for <pre class='screen'>, tag='pre', attrs=[('class', 'screen')]
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag)
+        uattrs = []
+        # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds
+        for key, value in attrs:
+            if type(value) != type(u''):
+                value = unicode(value, self.encoding)
+            uattrs.append((unicode(key, self.encoding), value))
+        strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding)
+        if tag in self.elements_no_end_tag:
+            self.pieces.append('<%(tag)s%(strattrs)s />' % locals())
+        else:
+            self.pieces.append('<%(tag)s%(strattrs)s>' % locals())
+
+    def unknown_endtag(self, tag):
+        # called for each end tag, e.g. for </pre>, tag will be 'pre'
+        # Reconstruct the original end tag.
+        if tag not in self.elements_no_end_tag:
+            self.pieces.append("</%(tag)s>" % locals())
+
+    def handle_charref(self, ref):
+        # called for each character reference, e.g. for '&#160;', ref will be '160'
+        # Reconstruct the original character reference.
+        self.pieces.append('&#%(ref)s;' % locals())
+        
+    def handle_entityref(self, ref):
+        # called for each entity reference, e.g. for '&copy;', ref will be 'copy'
+        # Reconstruct the original entity reference.
+        self.pieces.append('&%(ref)s;' % locals())
+
+    def handle_data(self, text):
+        # called for each block of plain text, i.e. outside of any tag and
+        # not containing any character or entity references
+        # Store the original text verbatim.
+        if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text)
+        self.pieces.append(text)
+        
+    def handle_comment(self, text):
+        # called for each HTML comment, e.g. <!-- insert Javascript code here -->
+        # Reconstruct the original comment.
+        self.pieces.append('<!--%(text)s-->' % locals())
+        
+    def handle_pi(self, text):
+        # called for each processing instruction, e.g. <?instruction>
+        # Reconstruct original processing instruction.
+        self.pieces.append('<?%(text)s>' % locals())
+
+    def handle_decl(self, text):
+        # called for the DOCTYPE, if present, e.g.
+        # <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        #     "http://www.w3.org/TR/html4/loose.dtd">
+        # Reconstruct original DOCTYPE
+        self.pieces.append('<!%(text)s>' % locals())
+        
+    _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match
+    def _scan_name(self, i, declstartpos):
+        rawdata = self.rawdata
+        n = len(rawdata)
+        if i == n:
+            return None, -1
+        m = self._new_declname_match(rawdata, i)
+        if m:
+            s = m.group()
+            name = s.strip()
+            if (i + len(s)) == n:
+                return None, -1  # end of buffer
+            return name.lower(), m.end()
+        else:
+            self.handle_data(rawdata)
+#            self.updatepos(declstartpos, i)
+            return None, -1
+
+    def output(self):
+        '''Return processed HTML as a single string'''
+        return ''.join([str(p) for p in self.pieces])
+
+class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor):
+    def __init__(self, baseuri, baselang, encoding):
+        sgmllib.SGMLParser.__init__(self)
+        _FeedParserMixin.__init__(self, baseuri, baselang, encoding)
+
+    def decodeEntities(self, element, data):
+        data = data.replace('&#60;', '&lt;')
+        data = data.replace('&#x3c;', '&lt;')
+        data = data.replace('&#62;', '&gt;')
+        data = data.replace('&#x3e;', '&gt;')
+        data = data.replace('&#38;', '&amp;')
+        data = data.replace('&#x26;', '&amp;')
+        data = data.replace('&#34;', '&quot;')
+        data = data.replace('&#x22;', '&quot;')
+        data = data.replace('&#39;', '&apos;')
+        data = data.replace('&#x27;', '&apos;')
+        if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'):
+            data = data.replace('&lt;', '<')
+            data = data.replace('&gt;', '>')
+            data = data.replace('&amp;', '&')
+            data = data.replace('&quot;', '"')
+            data = data.replace('&apos;', "'")
+        return data
+        
+class _RelativeURIResolver(_BaseHTMLProcessor):
+    relative_uris = [('a', 'href'),
+                     ('applet', 'codebase'),
+                     ('area', 'href'),
+                     ('blockquote', 'cite'),
+                     ('body', 'background'),
+                     ('del', 'cite'),
+                     ('form', 'action'),
+                     ('frame', 'longdesc'),
+                     ('frame', 'src'),
+                     ('iframe', 'longdesc'),
+                     ('iframe', 'src'),
+                     ('head', 'profile'),
+                     ('img', 'longdesc'),
+                     ('img', 'src'),
+                     ('img', 'usemap'),
+                     ('input', 'src'),
+                     ('input', 'usemap'),
+                     ('ins', 'cite'),
+                     ('link', 'href'),
+                     ('object', 'classid'),
+                     ('object', 'codebase'),
+                     ('object', 'data'),
+                     ('object', 'usemap'),
+                     ('q', 'cite'),
+                     ('script', 'src')]
+
+    def __init__(self, baseuri, encoding):
+        _BaseHTMLProcessor.__init__(self, encoding)
+        self.baseuri = baseuri
+
+    def resolveURI(self, uri):
+        return _urljoin(self.baseuri, uri)
+    
+    def unknown_starttag(self, tag, attrs):
+        attrs = self.normalize_attrs(attrs)
+        attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs]
+        _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
+        
+def _resolveRelativeURIs(htmlSource, baseURI, encoding):
+    if _debug: sys.stderr.write('entering _resolveRelativeURIs\n')
+    p = _RelativeURIResolver(baseURI, encoding)
+    p.feed(htmlSource)
+    return p.output()
+
+class _HTMLSanitizer(_BaseHTMLProcessor):
+    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big',
+      'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col',
+      'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset',
+      'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input',
+      'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup',
+      'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike',
+      'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th',
+      'thead', 'tr', 'tt', 'u', 'ul', 'var']
+
+    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
+      'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing',
+      'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols',
+      'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled',
+      'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace',
+      'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
+      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
+      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
+      'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type',
+      'usemap', 'valign', 'value', 'vspace', 'width']
+
+    unacceptable_elements_with_end_tag = ['script', 'applet']
+
+    def reset(self):
+        _BaseHTMLProcessor.reset(self)
+        self.unacceptablestack = 0
+        
+    def unknown_starttag(self, tag, attrs):
+        if not tag in self.acceptable_elements:
+            if tag in self.unacceptable_elements_with_end_tag:
+                self.unacceptablestack += 1
+            return
+        attrs = self.normalize_attrs(attrs)
+        attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes]
+        _BaseHTMLProcessor.unknown_starttag(self, tag, attrs)
+        
+    def unknown_endtag(self, tag):
+        if not tag in self.acceptable_elements:
+            if tag in self.unacceptable_elements_with_end_tag:
+                self.unacceptablestack -= 1
+            return
+        _BaseHTMLProcessor.unknown_endtag(self, tag)
+
+    def handle_pi(self, text):
+        pass
+
+    def handle_decl(self, text):
+        pass
+
+    def handle_data(self, text):
+        if not self.unacceptablestack:
+            _BaseHTMLProcessor.handle_data(self, text)
+
+def _sanitizeHTML(htmlSource, encoding):
+    p = _HTMLSanitizer(encoding)
+    p.feed(htmlSource)
+    data = p.output()
+    if TIDY_MARKUP:
+        # loop through list of preferred Tidy interfaces looking for one that's installed,
+        # then set up a common _tidy function to wrap the interface-specific API.
+        _tidy = None
+        for tidy_interface in PREFERRED_TIDY_INTERFACES:
+            try:
+                if tidy_interface == "uTidy":
+                    from tidy import parseString as _utidy
+                    def _tidy(data, **kwargs):
+                        return str(_utidy(data, **kwargs))
+                    break
+                elif tidy_interface == "mxTidy":
+                    from mx.Tidy import Tidy as _mxtidy
+                    def _tidy(data, **kwargs):
+                        nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs)
+                        return data
+                    break
+            except:
+                pass
+        if _tidy:
+            utf8 = type(data) == type(u'')
+            if utf8:
+                data = data.encode('utf-8')
+            data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8")
+            if utf8:
+                data = unicode(data, 'utf-8')
+            if data.count('<body'):
+                data = data.split('<body', 1)[1]
+                if data.count('>'):
+                    data = data.split('>', 1)[1]
+            if data.count('</body'):
+                data = data.split('</body', 1)[0]
+    data = data.strip().replace('\r\n', '\n')
+    return data
+
+class _FeedURLHandler(urllib2.HTTPDigestAuthHandler, urllib2.HTTPRedirectHandler, urllib2.HTTPDefaultErrorHandler):
+    def http_error_default(self, req, fp, code, msg, headers):
+        if ((code / 100) == 3) and (code != 304):
+            return self.http_error_302(req, fp, code, msg, headers)
+        infourl = urllib.addinfourl(fp, headers, req.get_full_url())
+        infourl.status = code
+        return infourl
+
+    def http_error_302(self, req, fp, code, msg, headers):
+        if headers.dict.has_key('location'):
+            infourl = urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
+        else:
+            infourl = urllib.addinfourl(fp, headers, req.get_full_url())
+        if not hasattr(infourl, 'status'):
+            infourl.status = code
+        return infourl
+
+    def http_error_301(self, req, fp, code, msg, headers):
+        if headers.dict.has_key('location'):
+            infourl = urllib2.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, headers)
+        else:
+            infourl = urllib.addinfourl(fp, headers, req.get_full_url())
+        if not hasattr(infourl, 'status'):
+            infourl.status = code
+        return infourl
+
+    http_error_300 = http_error_302
+    http_error_303 = http_error_302
+    http_error_307 = http_error_302
+        
+    def http_error_401(self, req, fp, code, msg, headers):
+        # Check if
+        # - server requires digest auth, AND
+        # - we tried (unsuccessfully) with basic auth, AND
+        # - we're using Python 2.3.3 or later (digest auth is irreparably broken in earlier versions)
+        # If all conditions hold, parse authentication information
+        # out of the Authorization header we sent the first time
+        # (for the username and password) and the WWW-Authenticate
+        # header the server sent back (for the realm) and retry
+        # the request with the appropriate digest auth headers instead.
+        # This evil genius hack has been brought to you by Aaron Swartz.
+        host = urlparse.urlparse(req.get_full_url())[1]
+        try:
+            assert sys.version.split()[0] >= '2.3.3'
+            assert base64 != None
+            user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':')
+            realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
+            self.add_password(realm, host, user, passw)
+            retry = self.http_error_auth_reqed('www-authenticate', host, req, headers)
+            self.reset_retry_count()
+            return retry
+        except:
+            return self.http_error_default(req, fp, code, msg, headers)
+
+def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers):
+    """URL, filename, or string --> stream
+
+    This function lets you define parsers that take any input source
+    (URL, pathname to local or network file, or actual data as a string)
+    and deal with it in a uniform manner.  Returned object is guaranteed
+    to have all the basic stdio read methods (read, readline, readlines).
+    Just .close() the object when you're done with it.
+
+    If the etag argument is supplied, it will be used as the value of an
+    If-None-Match request header.
+
+    If the modified argument is supplied, it must be a tuple of 9 integers
+    as returned by gmtime() in the standard Python time module. This MUST
+    be in GMT (Greenwich Mean Time). The formatted date/time will be used
+    as the value of an If-Modified-Since request header.
+
+    If the agent argument is supplied, it will be used as the value of a
+    User-Agent request header.
+
+    If the referrer argument is supplied, it will be used as the value of a
+    Referer[sic] request header.
+
+    If handlers is supplied, it is a list of handlers used to build a
+    urllib2 opener.
+    """
+
+    if hasattr(url_file_stream_or_string, 'read'):
+        return url_file_stream_or_string
+
+    if url_file_stream_or_string == '-':
+        return sys.stdin
+
+    if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'):
+        if not agent:
+            agent = USER_AGENT
+        # test for inline user:password for basic auth
+        auth = None
+        if base64:
+            urltype, rest = urllib.splittype(url_file_stream_or_string)
+            realhost, rest = urllib.splithost(rest)
+            if realhost:
+                user_passwd, realhost = urllib.splituser(realhost)
+                if user_passwd:
+                    url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest)
+                    auth = base64.encodestring(user_passwd).strip()
+        # try to open with urllib2 (to use optional headers)
+        request = urllib2.Request(url_file_stream_or_string)
+        request.add_header('User-Agent', agent)
+        if etag:
+            request.add_header('If-None-Match', etag)
+        if modified:
+            # format into an RFC 1123-compliant timestamp. We can't use
+            # time.strftime() since the %a and %b directives can be affected
+            # by the current locale, but RFC 2616 states that dates must be
+            # in English.
+            short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
+            months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+            request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5]))
+        if referrer:
+            request.add_header('Referer', referrer)
+        if gzip and zlib:
+            request.add_header('Accept-encoding', 'gzip, deflate')
+        elif gzip:
+            request.add_header('Accept-encoding', 'gzip')
+        elif zlib:
+            request.add_header('Accept-encoding', 'deflate')
+        else:
+            request.add_header('Accept-encoding', '')
+        if auth:
+            request.add_header('Authorization', 'Basic %s' % auth)
+        if ACCEPT_HEADER:
+            request.add_header('Accept', ACCEPT_HEADER)
+        request.add_header('A-IM', 'feed') # RFC 3229 support
+        opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers))
+        opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent
+        try:
+            return opener.open(request)
+        finally:
+            opener.close() # JohnD
+    
+    # try to open with native open function (if url_file_stream_or_string is a filename)
+    try:
+        return open(url_file_stream_or_string)
+    except:
+        pass
+
+    # treat url_file_stream_or_string as string
+    return _StringIO(str(url_file_stream_or_string))
+
+_date_handlers = []
+def registerDateHandler(func):
+    '''Register a date handler function (takes string, returns 9-tuple date in GMT)'''
+    _date_handlers.insert(0, func)
+    
+# ISO-8601 date parsing routines written by Fazal Majid.
+# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
+# parser is beyond the scope of feedparser and would be a worthwhile addition
+# to the Python library.
+# A single regular expression cannot parse ISO 8601 date formats into groups
+# as the standard is highly irregular (for instance is 030104 2003-01-04 or
+# 0301-04-01), so we use templates instead.
+# Please note the order in templates is significant because we need a
+# greedy match.
+_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO',
+                'YY-?MM-?DD', 'YY-?OOO', 'YYYY', 
+                '-YY-?MM', '-OOO', '-YY',
+                '--MM-?DD', '--MM',
+                '---DD',
+                'CC', '']
+_iso8601_re = [
+    tmpl.replace(
+    'YYYY', r'(?P<year>\d{4})').replace(
+    'YY', r'(?P<year>\d\d)').replace(
+    'MM', r'(?P<month>[01]\d)').replace(
+    'DD', r'(?P<day>[0123]\d)').replace(
+    'OOO', r'(?P<ordinal>[0123]\d\d)').replace(
+    'CC', r'(?P<century>\d\d$)')
+    + r'(T?(?P<hour>\d{2}):(?P<minute>\d{2})'
+    + r'(:(?P<second>\d{2}))?'
+    + r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?'
+    for tmpl in _iso8601_tmpl]
+del tmpl
+_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
+del regex
+def _parse_date_iso8601(dateString):
+    '''Parse a variety of ISO-8601-compatible formats like 20040105'''
+    m = None
+    for _iso8601_match in _iso8601_matches:
+        m = _iso8601_match(dateString)
+        if m: break
+    if not m: return
+    if m.span() == (0, 0): return
+    params = m.groupdict()
+    ordinal = params.get('ordinal', 0)
+    if ordinal:
+        ordinal = int(ordinal)
+    else:
+        ordinal = 0
+    year = params.get('year', '--')
+    if not year or year == '--':
+        year = time.gmtime()[0]
+    elif len(year) == 2:
+        # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
+        year = 100 * int(time.gmtime()[0] / 100) + int(year)
+    else:
+        year = int(year)
+    month = params.get('month', '-')
+    if not month or month == '-':
+        # ordinals are NOT normalized by mktime, we simulate them
+        # by setting month=1, day=ordinal
+        if ordinal:
+            month = 1
+        else:
+            month = time.gmtime()[1]
+    month = int(month)
+    day = params.get('day', 0)
+    if not day:
+        # see above
+        if ordinal:
+            day = ordinal
+        elif params.get('century', 0) or \
+                 params.get('year', 0) or params.get('month', 0):
+            day = 1
+        else:
+            day = time.gmtime()[2]
+    else:
+        day = int(day)
+    # special case of the century - is the first year of the 21st century
+    # 2000 or 2001 ? The debate goes on...
+    if 'century' in params.keys():
+        year = (int(params['century']) - 1) * 100 + 1
+    # in ISO 8601 most fields are optional
+    for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']:
+        if not params.get(field, None):
+            params[field] = 0
+    hour = int(params.get('hour', 0))
+    minute = int(params.get('minute', 0))
+    second = int(params.get('second', 0))
+    # weekday is normalized by mktime(), we can ignore it
+    weekday = 0
+    # daylight savings is complex, but not needed for feedparser's purposes
+    # as time zones, if specified, include mention of whether it is active
+    # (e.g. PST vs. PDT, CET). Using -1 is implementation-dependent and
+    # and most implementations have DST bugs
+    daylight_savings_flag = 0
+    tm = [year, month, day, hour, minute, second, weekday,
+          ordinal, daylight_savings_flag]
+    # ISO 8601 time zone adjustments
+    tz = params.get('tz')
+    if tz and tz != 'Z':
+        if tz[0] == '-':
+            tm[3] += int(params.get('tzhour', 0))
+            tm[4] += int(params.get('tzmin', 0))
+        elif tz[0] == '+':
+            tm[3] -= int(params.get('tzhour', 0))
+            tm[4] -= int(params.get('tzmin', 0))
+        else:
+            return None
+    # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
+    # which is guaranteed to normalize d/m/y/h/m/s.
+    # Many implementations have bugs, but we'll pretend they don't.
+    return time.localtime(time.mktime(tm))
+registerDateHandler(_parse_date_iso8601)
+    
+# 8-bit date handling routines written by ytrewq1.
+_korean_year  = u'\ub144' # b3e2 in euc-kr
+_korean_month = u'\uc6d4' # bff9 in euc-kr
+_korean_day   = u'\uc77c' # c0cf in euc-kr
+_korean_am    = u'\uc624\uc804' # bfc0 c0fc in euc-kr
+_korean_pm    = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr
+
+_korean_onblog_date_re = \
+    re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \
+               (_korean_year, _korean_month, _korean_day))
+_korean_nate_date_re = \
+    re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \
+               (_korean_am, _korean_pm))
+def _parse_date_onblog(dateString):
+    '''Parse a string according to the OnBlog 8-bit date format'''
+    m = _korean_onblog_date_re.match(dateString)
+    if not m: return
+    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
+                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
+                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
+                 'zonediff': '+09:00'}
+    if _debug: sys.stderr.write('OnBlog date parsed as: %s\n' % w3dtfdate)
+    return _parse_date_w3dtf(w3dtfdate)
+registerDateHandler(_parse_date_onblog)
+
+def _parse_date_nate(dateString):
+    '''Parse a string according to the Nate 8-bit date format'''
+    m = _korean_nate_date_re.match(dateString)
+    if not m: return
+    hour = int(m.group(5))
+    ampm = m.group(4)
+    if (ampm == _korean_pm):
+        hour += 12
+    hour = str(hour)
+    if len(hour) == 1:
+        hour = '0' + hour
+    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
+                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
+                 'hour': hour, 'minute': m.group(6), 'second': m.group(7),\
+                 'zonediff': '+09:00'}
+    if _debug: sys.stderr.write('Nate date parsed as: %s\n' % w3dtfdate)
+    return _parse_date_w3dtf(w3dtfdate)
+registerDateHandler(_parse_date_nate)
+
+_mssql_date_re = \
+    re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?')
+def _parse_date_mssql(dateString):
+    '''Parse a string according to the MS SQL date format'''
+    m = _mssql_date_re.match(dateString)
+    if not m: return
+    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
+                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
+                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
+                 'zonediff': '+09:00'}
+    if _debug: sys.stderr.write('MS SQL date parsed as: %s\n' % w3dtfdate)
+    return _parse_date_w3dtf(w3dtfdate)
+registerDateHandler(_parse_date_mssql)
+
+# Unicode strings for Greek date strings
+_greek_months = \
+  { \
+   u'\u0399\u03b1\u03bd': u'Jan',       # c9e1ed in iso-8859-7
+   u'\u03a6\u03b5\u03b2': u'Feb',       # d6e5e2 in iso-8859-7
+   u'\u039c\u03ac\u03ce': u'Mar',       # ccdcfe in iso-8859-7
+   u'\u039c\u03b1\u03ce': u'Mar',       # cce1fe in iso-8859-7
+   u'\u0391\u03c0\u03c1': u'Apr',       # c1f0f1 in iso-8859-7
+   u'\u039c\u03ac\u03b9': u'May',       # ccdce9 in iso-8859-7
+   u'\u039c\u03b1\u03ca': u'May',       # cce1fa in iso-8859-7
+   u'\u039c\u03b1\u03b9': u'May',       # cce1e9 in iso-8859-7
+   u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7
+   u'\u0399\u03bf\u03bd': u'Jun',       # c9efed in iso-8859-7
+   u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7
+   u'\u0399\u03bf\u03bb': u'Jul',       # c9f9eb in iso-8859-7
+   u'\u0391\u03cd\u03b3': u'Aug',       # c1fde3 in iso-8859-7
+   u'\u0391\u03c5\u03b3': u'Aug',       # c1f5e3 in iso-8859-7
+   u'\u03a3\u03b5\u03c0': u'Sep',       # d3e5f0 in iso-8859-7
+   u'\u039f\u03ba\u03c4': u'Oct',       # cfeaf4 in iso-8859-7
+   u'\u039d\u03bf\u03ad': u'Nov',       # cdefdd in iso-8859-7
+   u'\u039d\u03bf\u03b5': u'Nov',       # cdefe5 in iso-8859-7
+   u'\u0394\u03b5\u03ba': u'Dec',       # c4e5ea in iso-8859-7
+  }
+
+_greek_wdays = \
+  { \
+   u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7
+   u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7
+   u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7
+   u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7
+   u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7
+   u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7
+   u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7   
+  }
+
+_greek_date_format_re = \
+    re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')
+
+def _parse_date_greek(dateString):
+    '''Parse a string according to a Greek 8-bit date format.'''
+    m = _greek_date_format_re.match(dateString)
+    if not m: return
+    try:
+        wday = _greek_wdays[m.group(1)]
+        month = _greek_months[m.group(3)]
+    except:
+        return
+    rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \
+                 {'wday': wday, 'day': m.group(2), 'month': month, 'year': m.group(4),\
+                  'hour': m.group(5), 'minute': m.group(6), 'second': m.group(7),\
+                  'zonediff': m.group(8)}
+    if _debug: sys.stderr.write('Greek date parsed as: %s\n' % rfc822date)
+    return _parse_date_rfc822(rfc822date)
+registerDateHandler(_parse_date_greek)
+
+# Unicode strings for Hungarian date strings
+_hungarian_months = \
+  { \
+    u'janu\u00e1r':   u'01',  # e1 in iso-8859-2
+    u'febru\u00e1ri': u'02',  # e1 in iso-8859-2
+    u'm\u00e1rcius':  u'03',  # e1 in iso-8859-2
+    u'\u00e1prilis':  u'04',  # e1 in iso-8859-2
+    u'm\u00e1ujus':   u'05',  # e1 in iso-8859-2
+    u'j\u00fanius':   u'06',  # fa in iso-8859-2
+    u'j\u00falius':   u'07',  # fa in iso-8859-2
+    u'augusztus':     u'08',
+    u'szeptember':    u'09',
+    u'okt\u00f3ber':  u'10',  # f3 in iso-8859-2
+    u'november':      u'11',
+    u'december':      u'12',
+  }
+
+_hungarian_date_format_re = \
+  re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))')
+
+def _parse_date_hungarian(dateString):
+    '''Parse a string according to a Hungarian 8-bit date format.'''
+    m = _hungarian_date_format_re.match(dateString)
+    if not m: return
+    try:
+        month = _hungarian_months[m.group(2)]
+        day = m.group(3)
+        if len(day) == 1:
+            day = '0' + day
+        hour = m.group(4)
+        if len(hour) == 1:
+            hour = '0' + hour
+    except:
+        return
+    w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)s' % \
+                {'year': m.group(1), 'month': month, 'day': day,\
+                 'hour': hour, 'minute': m.group(5),\
+                 'zonediff': m.group(6)}
+    if _debug: sys.stderr.write('Hungarian date parsed as: %s\n' % w3dtfdate)
+    return _parse_date_w3dtf(w3dtfdate)
+registerDateHandler(_parse_date_hungarian)
+
+# W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
+# Drake and licensed under the Python license.  Removed all range checking
+# for month, day, hour, minute, and second, since mktime will normalize
+# these later
+def _parse_date_w3dtf(dateString):
+    def __extract_date(m):
+        year = int(m.group('year'))
+        if year < 100:
+            year = 100 * int(time.gmtime()[0] / 100) + int(year)
+        if year < 1000:
+            return 0, 0, 0
+        julian = m.group('julian')
+        if julian:
+            julian = int(julian)
+            month = julian / 30 + 1
+            day = julian % 30 + 1
+            jday = None
+            while jday != julian:
+                t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
+                jday = time.gmtime(t)[-2]
+                diff = abs(jday - julian)
+                if jday > julian:
+                    if diff < day:
+                        day = day - diff
+                    else:
+                        month = month - 1
+                        day = 31
+                elif jday < julian:
+                    if day + diff < 28:
+                       day = day + diff
+                    else:
+                        month = month + 1
+            return year, month, day
+        month = m.group('month')
+        day = 1
+        if month is None:
+            month = 1
+        else:
+            month = int(month)
+            day = m.group('day')
+            if day:
+                day = int(day)
+            else:
+                day = 1
+        return year, month, day
+
+    def __extract_time(m):
+        if not m:
+            return 0, 0, 0
+        hours = m.group('hours')
+        if not hours:
+            return 0, 0, 0
+        hours = int(hours)
+        minutes = int(m.group('minutes'))
+        seconds = m.group('seconds')
+        if seconds:
+            seconds = int(seconds)
+        else:
+            seconds = 0
+        return hours, minutes, seconds
+
+    def __extract_tzd(m):
+        '''Return the Time Zone Designator as an offset in seconds from UTC.'''
+        if not m:
+            return 0
+        tzd = m.group('tzd')
+        if not tzd:
+            return 0
+        if tzd == 'Z':
+            return 0
+        hours = int(m.group('tzdhours'))
+        minutes = m.group('tzdminutes')
+        if minutes:
+            minutes = int(minutes)
+        else:
+            minutes = 0
+        offset = (hours*60 + minutes) * 60
+        if tzd[0] == '+':
+            return -offset
+        return offset
+
+    __date_re = ('(?P<year>\d\d\d\d)'
+                 '(?:(?P<dsep>-|)'
+                 '(?:(?P<julian>\d\d\d)'
+                 '|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
+    __tzd_re = '(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
+    __tzd_rx = re.compile(__tzd_re)
+    __time_re = ('(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
+                 '(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?'
+                 + __tzd_re)
+    __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
+    __datetime_rx = re.compile(__datetime_re)
+    m = __datetime_rx.match(dateString)
+    if (m is None) or (m.group() != dateString): return
+    gmt = __extract_date(m) + __extract_time(m) + (0, 0, 0)
+    if gmt[0] == 0: return
+    return time.gmtime(time.mktime(gmt) + __extract_tzd(m) - time.timezone)
+registerDateHandler(_parse_date_w3dtf)
+
+def _parse_date_rfc822(dateString):
+    '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
+    data = dateString.split()
+    if data[0][-1] in (',', '.') or data[0].lower() in rfc822._daynames:
+        del data[0]
+    if len(data) == 4:
+        s = data[3]
+        i = s.find('+')
+        if i > 0:
+            data[3:] = [s[:i], s[i+1:]]
+        else:
+            data.append('')
+        dateString = " ".join(data)
+    if len(data) < 5:
+        dateString += ' 00:00:00 GMT'
+    tm = rfc822.parsedate_tz(dateString)
+    if tm:
+        return time.gmtime(rfc822.mktime_tz(tm))
+# rfc822.py defines several time zones, but we define some extra ones.
+# 'ET' is equivalent to 'EST', etc.
+_additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -800}
+rfc822._timezones.update(_additional_timezones)
+registerDateHandler(_parse_date_rfc822)    
+
+def _parse_date(dateString):
+    '''Parses a variety of date formats into a 9-tuple in GMT'''
+    for handler in _date_handlers:
+        try:
+            date9tuple = handler(dateString)
+            if not date9tuple: continue
+            if len(date9tuple) != 9:
+                if _debug: sys.stderr.write('date handler function must return 9-tuple\n')
+                raise ValueError
+            map(int, date9tuple)
+            return date9tuple
+        except Exception, e:
+            if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e)))
+            pass
+    return None
+
+def _getCharacterEncoding(http_headers, xml_data):
+    '''Get the character encoding of the XML document
+
+    http_headers is a dictionary
+    xml_data is a raw string (not Unicode)
+    
+    This is so much trickier than it sounds, it's not even funny.
+    According to RFC 3023 ('XML Media Types'), if the HTTP Content-Type
+    is application/xml, application/*+xml,
+    application/xml-external-parsed-entity, or application/xml-dtd,
+    the encoding given in the charset parameter of the HTTP Content-Type
+    takes precedence over the encoding given in the XML prefix within the
+    document, and defaults to 'utf-8' if neither are specified.  But, if
+    the HTTP Content-Type is text/xml, text/*+xml, or
+    text/xml-external-parsed-entity, the encoding given in the XML prefix
+    within the document is ALWAYS IGNORED and only the encoding given in
+    the charset parameter of the HTTP Content-Type header should be
+    respected, and it defaults to 'us-ascii' if not specified.
+
+    Furthermore, discussion on the atom-syntax mailing list with the
+    author of RFC 3023 leads me to the conclusion that any document
+    served with a Content-Type of text/* and no charset parameter
+    must be treated as us-ascii.  (We now do this.)  And also that it
+    must always be flagged as non-well-formed.  (We now do this too.)
+    
+    If Content-Type is unspecified (input was local file or non-HTTP source)
+    or unrecognized (server just got it totally wrong), then go by the
+    encoding given in the XML prefix of the document and default to
+    'iso-8859-1' as per the HTTP specification (RFC 2616).
+    
+    Then, assuming we didn't find a character encoding in the HTTP headers
+    (and the HTTP Content-type allowed us to look in the body), we need
+    to sniff the first few bytes of the XML data and try to determine
+    whether the encoding is ASCII-compatible.  Section F of the XML
+    specification shows the way here:
+    http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
+
+    If the sniffed encoding is not ASCII-compatible, we need to make it
+    ASCII compatible so that we can sniff further into the XML declaration
+    to find the encoding attribute, which will tell us the true encoding.
+
+    Of course, none of this guarantees that we will be able to parse the
+    feed in the declared character encoding (assuming it was declared
+    correctly, which many are not).  CJKCodecs and iconv_codec help a lot;
+    you should definitely install them if you can.
+    http://cjkpython.i18n.org/
+    '''
+
+    def _parseHTTPContentType(content_type):
+        '''takes HTTP Content-Type header and returns (content type, charset)
+
+        If no charset is specified, returns (content type, '')
+        If no content type is specified, returns ('', '')
+        Both return parameters are guaranteed to be lowercase strings
+        '''
+        content_type = content_type or ''
+        content_type, params = cgi.parse_header(content_type)
+        return content_type, params.get('charset', '').replace("'", '')
+
+    sniffed_xml_encoding = ''
+    xml_encoding = ''
+    true_encoding = ''
+    http_content_type, http_encoding = _parseHTTPContentType(http_headers.get('content-type'))
+    # Must sniff for non-ASCII-compatible character encodings before
+    # searching for XML declaration.  This heuristic is defined in
+    # section F of the XML specification:
+    # http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
+    try:
+        if xml_data[:4] == '\x4c\x6f\xa7\x94':
+            # EBCDIC
+            xml_data = _ebcdic_to_ascii(xml_data)
+        elif xml_data[:4] == '\x00\x3c\x00\x3f':
+            # UTF-16BE
+            sniffed_xml_encoding = 'utf-16be'
+            xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+        elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'):
+            # UTF-16BE with BOM
+            sniffed_xml_encoding = 'utf-16be'
+            xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+        elif xml_data[:4] == '\x3c\x00\x3f\x00':
+            # UTF-16LE
+            sniffed_xml_encoding = 'utf-16le'
+            xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+        elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and (xml_data[2:4] != '\x00\x00'):
+            # UTF-16LE with BOM
+            sniffed_xml_encoding = 'utf-16le'
+            xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+        elif xml_data[:4] == '\x00\x00\x00\x3c':
+            # UTF-32BE
+            sniffed_xml_encoding = 'utf-32be'
+            xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+        elif xml_data[:4] == '\x3c\x00\x00\x00':
+            # UTF-32LE
+            sniffed_xml_encoding = 'utf-32le'
+            xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+        elif xml_data[:4] == '\x00\x00\xfe\xff':
+            # UTF-32BE with BOM
+            sniffed_xml_encoding = 'utf-32be'
+            xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+        elif xml_data[:4] == '\xff\xfe\x00\x00':
+            # UTF-32LE with BOM
+            sniffed_xml_encoding = 'utf-32le'
+            xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+        elif xml_data[:3] == '\xef\xbb\xbf':
+            # UTF-8 with BOM
+            sniffed_xml_encoding = 'utf-8'
+            xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+        else:
+            # ASCII-compatible
+            pass
+        xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
+    except:
+        xml_encoding_match = None
+    if xml_encoding_match:
+        xml_encoding = xml_encoding_match.groups()[0].lower()
+        if sniffed_xml_encoding and (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', 'iso-10646-ucs-4', 'ucs-4', 'csucs4', 'utf-16', 'utf-32', 'utf_16', 'utf_32', 'utf16', 'u16')):
+            xml_encoding = sniffed_xml_encoding
+    acceptable_content_type = 0
+    application_content_types = ('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity')
+    text_content_types = ('text/xml', 'text/xml-external-parsed-entity')
+    if (http_content_type in application_content_types) or \
+       (http_content_type.startswith('application/') and http_content_type.endswith('+xml')):
+        acceptable_content_type = 1
+        true_encoding = http_encoding or xml_encoding or 'utf-8'
+    elif (http_content_type in text_content_types) or \
+         (http_content_type.startswith('text/')) and http_content_type.endswith('+xml'):
+        acceptable_content_type = 1
+        true_encoding = http_encoding or 'us-ascii'
+    elif http_content_type.startswith('text/'):
+        true_encoding = http_encoding or 'us-ascii'
+    elif http_headers and (not http_headers.has_key('content-type')):
+        true_encoding = xml_encoding or 'iso-8859-1'
+    else:
+        true_encoding = xml_encoding or 'utf-8'
+    return true_encoding, http_encoding, xml_encoding, sniffed_xml_encoding, acceptable_content_type
+    
+def _toUTF8(data, encoding):
+    '''Changes an XML data stream on the fly to specify a new encoding
+
+    data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already
+    encoding is a string recognized by encodings.aliases
+    '''
+    if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding)
+    # strip Byte Order Mark (if present)
+    if (len(data) >= 4) and