diff -r 4bbd2b021cb5 .hgignore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Sun Sep 05 14:20:37 2010 +0200
@@ -0,0 +1,4 @@
+.*\.pyc
+.*.egg-info/
+.*~
+build/
diff -r 4bbd2b021cb5 MANIFEST.in
--- a/MANIFEST.in	Sat Sep 04 09:10:29 2010 +0200
+++ b/MANIFEST.in	Sun Sep 05 14:20:37 2010 +0200
@@ -2,3 +2,4 @@
 recursive-exclude doc/logo.lineform *
 include doc/api/*.*
 include doc/*.html
+recursive-include genshi/template/tests/templates *.html *.txt
diff -r 4bbd2b021cb5 README.txt
--- a/README.txt	Sat Sep 04 09:10:29 2010 +0200
+++ b/README.txt	Sun Sep 05 14:20:37 2010 +0200
@@ -10,3 +10,8 @@
 directory, and visit the Genshi web site:
 
   <http://genshi.edgewall.org/>
+
+About this repository
+=====================
+
+This is a work area for porting Genshi to Python 3.
diff -r 4bbd2b021cb5 doc/common/doctools.py
--- a/doc/common/doctools.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/doc/common/doctools.py	Sun Sep 05 14:20:37 2010 +0200
@@ -62,14 +62,14 @@
             code_block.content = 1
             rst.directives.register_directive('code-block', code_block)
         except ImportError:
-            print 'Pygments not installed, syntax highlighting disabled'
+            print('Pygments not installed, syntax highlighting disabled')
 
         loader = TemplateLoader(['doc', 'doc/common'], variable_lookup='strict')
         for source in glob('doc/*.txt'):
             dest = os.path.splitext(source)[0] + '.html'
             if self.force or not os.path.exists(dest) or \
                     os.path.getmtime(dest) < os.path.getmtime(source):
-                print 'building documentation file %s' % dest
+                print('building documentation file %s' % dest)
                 publish_cmdline(writer_name='html',
                                 argv=['--config=%s' % docutils_conf, source,
                                       dest])
@@ -104,7 +104,7 @@
                 sys.argv[1:] = old_argv
 
             except ImportError:
-                print 'epydoc not installed, skipping API documentation.'
+                print('epydoc not installed, skipping API documentation.')
 
 
 class test_doc(Command):
@@ -119,5 +119,5 @@
 
     def run(self):
         for filename in glob('doc/*.txt'):
-            print 'testing documentation file %s' % filename
+            print('testing documentation file %s' % filename)
             doctest.testfile(filename, False, optionflags=doctest.ELLIPSIS)
diff -r 4bbd2b021cb5 examples_to_py3k.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples_to_py3k.sh	Sun Sep 05 14:20:37 2010 +0200
@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# Script to run 2to3 on files not covered by setup.py
+#
+export PYTHONIOENCODING=utf8
+
+# General 2to3 run
+2to3 -w --no-diffs examples/
diff -r 4bbd2b021cb5 fixes/fix_unicode_in_strings.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fixes/fix_unicode_in_strings.py	Sun Sep 05 14:20:37 2010 +0200
@@ -0,0 +1,17 @@
+"""Fixer that changes expressions inside strings literals from u"..." to "...".
+
+"""
+
+import re
+from lib2to3 import fixer_base
+
+_literal_re = re.compile(r"(.+?)\b[uU]([rR]?[\'\"])")
+
+class FixUnicodeInStrings(fixer_base.BaseFix):
+
+    PATTERN = "STRING"
+
+    def transform(self, node, results):
+        new = node.clone()
+        new.value = _literal_re.sub(r"\1\2", new.value)
+        return new
diff -r 4bbd2b021cb5 genshi/_speedups.c
--- a/genshi/_speedups.c	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/_speedups.c	Sun Sep 05 14:20:37 2010 +0200
@@ -14,10 +14,17 @@
 #include <Python.h>
 #include <structmember.h>
 
-#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
-typedef int Py_ssize_t;
-#define PY_SSIZE_T_MAX INT_MAX
-#define PY_SSIZE_T_MIN INT_MIN
+#if PY_MAJOR_VERSION > 2
+#   define IS_PY3K
+#elif PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+    typedef int Py_ssize_t;
+#   define PY_SSIZE_T_MAX INT_MAX
+#   define PY_SSIZE_T_MIN INT_MIN
+#endif
+
+/* We only use Unicode Strings in this module */
+#ifndef IS_PY3K
+#   define PyObject_Str PyObject_Unicode
 #endif
 
 static PyObject *amp1, *amp2, *lt1, *lt2, *gt1, *gt2, *qt1, *qt2;
@@ -73,7 +80,7 @@
         Py_DECREF(args);
         return ret;
     }
-    in = (PyUnicodeObject *) PyObject_Unicode(text);
+    in = (PyUnicodeObject *) PyObject_Str(text);
     if (in == NULL) {
         return NULL;
     }
@@ -390,11 +397,11 @@
     PyObject *unicode, *result, *args;
 
     if (PyObject_TypeCheck(self, &MarkupType)) {
-        unicode = PyObject_Unicode(self);
+        unicode = PyObject_Str(self);
         if (unicode == NULL) return NULL;
         result = PyNumber_Multiply(unicode, num);
     } else { // __rmul__
-        unicode = PyObject_Unicode(num);
+        unicode = PyObject_Str(num);
         if (unicode == NULL) return NULL;
         result = PyNumber_Multiply(unicode, self);
     }
@@ -418,9 +425,13 @@
 {
     PyObject *format, *result, *args;
 
+#ifdef IS_PY3K
+    format = PyUnicode_FromString("<Markup %r>");
+#else
     format = PyString_FromString("<Markup %r>");
+#endif
     if (format == NULL) return NULL;
-    result = PyObject_Unicode(self);
+    result = PyObject_Str(self);
     if (result == NULL) {
         Py_DECREF(format);
         return NULL;
@@ -432,7 +443,11 @@
         return NULL;
     }
     PyTuple_SET_ITEM(args, 0, result);
+#ifdef IS_PY3K
+    result = PyUnicode_Format(format, args);
+#else
     result = PyString_Format(format, args);
+#endif
     Py_DECREF(format);
     Py_DECREF(args);
     return result;
@@ -553,13 +568,19 @@
     Markup_add, /*nb_add*/
     0, /*nb_subtract*/
     Markup_mul, /*nb_multiply*/
+#ifndef IS_PY3K
     0, /*nb_divide*/
+#endif
     Markup_mod, /*nb_remainder*/
 };
 
 PyTypeObject MarkupType = {
+#ifdef IS_PY3K
+    PyVarObject_HEAD_INIT(NULL, 0)
+#else
     PyObject_HEAD_INIT(NULL)
     0,
+#endif
     "genshi._speedups.Markup",
     sizeof(MarkupObject),
     0,
@@ -567,7 +588,11 @@
     0,          /*tp_print*/
     0,          /*tp_getattr*/
     0,          /*tp_setattr*/
+#ifdef IS_PY3K
+    0,          /*tp_reserved*/
+#else
     0,          /*tp_compare*/
+#endif
     Markup_repr, /*tp_repr*/
     &Markup_as_number, /*tp_as_number*/
     0,          /*tp_as_sequence*/
@@ -580,7 +605,14 @@
     0,          /*tp_setattro*/
     0,          /*tp_as_buffer*/
 
+#ifdef IS_PY3K
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/
+#elif defined(Py_TPFLAGS_UNICODE_SUBCLASS)
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES | Py_TPFLAGS_UNICODE_SUBCLASS, /*tp_flags*/
+#else
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_CHECKTYPES, /*tp_flags*/
+#endif
+
     Markup__doc__,/*tp_doc*/
 
     0,          /*tp_traverse*/
@@ -616,8 +648,25 @@
     0           /*tp_weaklist*/
 };
 
+#ifdef IS_PY3K
+struct PyModuleDef module_def = {
+    PyModuleDef_HEAD_INIT, /*m_base*/
+    "_speedups",           /*m_name*/
+    NULL,                  /*m_doc*/
+    -1,                    /*m_size*/
+    NULL,                  /*m_methods*/
+    NULL,                  /*m_reload*/
+    NULL,                  /*m_traverse*/
+    NULL,                  /*m_clear*/
+    NULL                   /*m_free*/
+};
+
+PyObject *
+PyInit__speedups(void)
+#else
 PyMODINIT_FUNC
 init_speedups(void)
+#endif
 {
     PyObject *module;
 
@@ -626,11 +675,23 @@
     MarkupType.tp_base = &PyUnicode_Type;
 
     if (PyType_Ready(&MarkupType) < 0)
+#ifdef IS_PY3K
+        return NULL;
+#else
         return;
+#endif
 
     init_constants();
 
+#ifdef IS_PY3K
+    module = PyModule_Create(&module_def);
+#else
     module = Py_InitModule("_speedups", NULL);
+#endif
     Py_INCREF(&MarkupType);
     PyModule_AddObject(module, "Markup", (PyObject *) &MarkupType);
+
+#ifdef IS_PY3K
+    return module;
+#endif
 }
diff -r 4bbd2b021cb5 genshi/compat.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genshi/compat.py	Sun Sep 05 14:20:37 2010 +0200
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+"""Various Python version compatibility classes and functions."""
+
+import sys
+from types import CodeType
+
+
+IS_PYTHON2 = (sys.version_info[0] == 2)
+
+
+# This function should only be called in Python 2, and will fail in Python 3
+
+if IS_PYTHON2:
+    def stringrepr(string):
+        ascii = string.encode('ascii', 'backslashreplace')
+        quoted = "'" +  ascii.replace("'", "\\'") + "'"
+        if len(ascii) > len(string):
+            return 'u' + quoted
+        return quoted
+else:
+    def stringrepr(string):
+        raise RuntimeError(
+                'Python 2 compatibility function. Not usable in Python 3.')
+
+
+# We need to differentiate between StringIO and BytesIO in places
+
+if IS_PYTHON2:
+    from StringIO import StringIO
+    try:
+        from cStringIO import StringIO as BytesIO
+    except ImportError:
+        BytesIO = StringIO
+else:
+    from io import StringIO, BytesIO
+
+
+# We want to test bytestring input to some stuff.
+
+if IS_PYTHON2:
+    def wrapped_bytes(bstr):
+        assert bstr.startswith('b')
+        return bstr[1:]
+else:
+    def wrapped_bytes(bstr):
+        assert bstr.startswith('b')
+        return bstr
+
+
+# We do some scary stuff with CodeType() in template/eval.py
+
+if IS_PYTHON2:
+    def get_code_params(code):
+        return (code.co_nlocals, code.co_stacksize, code.co_flags,
+                code.co_code, code.co_consts, code.co_names, code.co_varnames,
+                code.co_filename, code.co_name, code.co_firstlineno,
+                code.co_lnotab, (), ())
+
+    def build_code_chunk(code, filename, name, lineno):
+        return CodeType(0, code.co_nlocals, code.co_stacksize,
+                        code.co_flags | 0x0040, code.co_code, code.co_consts,
+                        code.co_names, code.co_varnames, filename, name,
+                        lineno, code.co_lnotab, (), ())
+else:
+    def get_code_params(code):
+        return (code.co_nlocals, code.co_kwonlyargcount, code.co_stacksize,
+                code.co_flags, code.co_code, code.co_consts, code.co_names,
+                code.co_varnames, code.co_filename, code.co_name,
+                code.co_firstlineno, code.co_lnotab, (), ())
+
+    def build_code_chunk(code, filename, name, lineno):
+        return CodeType(0, code.co_nlocals, code.co_kwonlyargcount,
+                        code.co_stacksize, code.co_flags | 0x0040,
+                        code.co_code, code.co_consts, code.co_names,
+                        code.co_varnames, filename, name, lineno,
+                        code.co_lnotab, (), ())
+
+# Compatibility fallback implementations for Python < 2.6
+
+try:
+    next = next
+except NameError:
+    def next(iterator):
+        return iterator.next()
+
+# Compatibility fallback implementations for Python < 2.5
+
+try:
+    all = all
+    any = any
+except NameError:
+    def any(S):
+        for x in S:
+            if x:
+                return True
+        return False
+
+    def all(S):
+        for x in S:
+            if not x:
+                return False
+        return True
+
diff -r 4bbd2b021cb5 genshi/core.py
--- a/genshi/core.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/core.py	Sun Sep 05 14:20:37 2010 +0200
@@ -17,6 +17,7 @@
     reduce # builtin in Python < 3
 except NameError:
     from functools import reduce
+import sys
 from itertools import chain
 import operator
 
@@ -92,7 +93,7 @@
         Assume the following stream produced by the `HTML` function:
         
         >>> from genshi.input import HTML
-        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''')
+        >>> html = HTML('''<p onclick="alert('Whoa')">Hello, world!</p>''', encoding='utf-8')
         >>> print(html)
         <p onclick="alert('Whoa')">Hello, world!</p>
         
@@ -153,7 +154,7 @@
         """
         return reduce(operator.or_, (self,) + filters)
 
-    def render(self, method=None, encoding='utf-8', out=None, **kwargs):
+    def render(self, method=None, encoding=None, out=None, **kwargs):
         """Return a string representation of the stream.
         
         Any additional keyword arguments are passed to the serializer, and thus
@@ -187,7 +188,7 @@
         XPath expression.
         
         >>> from genshi import HTML
-        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>')
+        >>> stream = HTML('<doc><elem>foo</elem><elem>bar</elem></doc>', encoding='utf-8')
         >>> print(stream.select('elem'))
         <elem>foo</elem><elem>bar</elem>
         >>> print(stream.select('elem/text()'))
@@ -667,8 +668,13 @@
     def __hash__(self):
         return hash(self.uri)
 
-    def __repr__(self):
-        return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
+    if sys.version_info[0] == 2:
+        # Only use stringrepr in python 2
+        def __repr__(self):
+            return '%s(%s)' % (type(self).__name__, stringrepr(self.uri))
+    else:
+        def __repr__(self):
+            return '%s(%r)' % (type(self).__name__, self.uri)
 
     def __str__(self):
         return self.uri.encode('utf-8')
@@ -728,5 +734,10 @@
     def __getnewargs__(self):
         return (self.lstrip('{'),)
 
-    def __repr__(self):
-        return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
+    if sys.version_info[0] == 2:
+        # Only use stringrepr in python 2
+        def __repr__(self):
+            return '%s(%s)' % (type(self).__name__, stringrepr(self.lstrip('{')))
+    else:
+        def __repr__(self):
+            return '%s(%r)' % (type(self).__name__, self.lstrip('{'))
diff -r 4bbd2b021cb5 genshi/filters/html.py
--- a/genshi/filters/html.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/html.py	Sun Sep 05 14:20:37 2010 +0200
@@ -32,7 +32,7 @@
     >>> from genshi.input import HTML
     >>> html = HTML('''<form>
     ...   <p><input type="text" name="foo" /></p>
-    ... </form>''')
+    ... </form>''', encoding='utf-8')
     >>> filler = HTMLFormFiller(data={'foo': 'bar'})
     >>> print(html | filler)
     <form>
@@ -199,7 +199,7 @@
     from the stream.
     
     >>> from genshi import HTML
-    >>> html = HTML('<div><script>alert(document.cookie)</script></div>')
+    >>> html = HTML('<div><script>alert(document.cookie)</script></div>', encoding='utf-8')
     >>> print(html | HTMLSanitizer())
     <div/>
     
@@ -207,7 +207,7 @@
     is instantiated. For example, to allow inline ``style`` attributes, the
     following instantation would work:
     
-    >>> html = HTML('<div style="background: #000"></div>')
+    >>> html = HTML('<div style="background: #000"></div>', encoding='utf-8')
     >>> sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
     >>> print(html | sanitizer)
     <div style="background: #000"/>
@@ -215,7 +215,7 @@
     Note that even in this case, the filter *does* attempt to remove dangerous
     constructs from style attributes:
 
-    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>')
+    >>> html = HTML('<div style="background: url(javascript:void); color: #000"></div>', encoding='utf-8')
     >>> print(html | sanitizer)
     <div style="color: #000"/>
     
diff -r 4bbd2b021cb5 genshi/filters/i18n.py
--- a/genshi/filters/i18n.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/i18n.py	Sun Sep 05 14:20:37 2010 +0200
@@ -33,6 +33,7 @@
 from genshi.template.base import DirectiveFactory, EXPR, SUB, _apply_directives
 from genshi.template.directives import Directive, StripDirective
 from genshi.template.markup import MarkupTemplate, EXEC
+from genshi.compat import IS_PYTHON2
 
 __all__ = ['Translator', 'extract']
 __docformat__ = 'restructuredtext en'
@@ -288,8 +289,7 @@
     also need to pass a name for those parameters. Consider the following
     examples:
     
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <div i18n:choose="num; num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -301,8 +301,7 @@
     [(2, 'ngettext', (u'There is %(num)s coin',
                       u'There are %(num)s coins'), [])]
 
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <div i18n:choose="num; num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -324,8 +323,7 @@
 
     When used as a element and not as an attribute:
 
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <i18n:choose numeral="num" params="num">
     ...     <p i18n:singular="">There is $num coin</p>
     ...     <p i18n:plural="">There are $num coins</p>
@@ -492,8 +490,7 @@
     another i18n domain(catalog) to translate from.
     
     >>> from genshi.filters.tests.i18n import DummyTranslations
-    >>> tmpl = MarkupTemplate('''\
-        <html xmlns:i18n="http://genshi.edgewall.org/i18n">
+    >>> tmpl = MarkupTemplate('''<html xmlns:i18n="http://genshi.edgewall.org/i18n">
     ...   <p i18n:msg="">Bar</p>
     ...   <div i18n:domain="foo">
     ...     <p i18n:msg="">FooBar</p>
@@ -663,11 +660,19 @@
             if ctxt:
                 ctxt['_i18n.gettext'] = gettext
         else:
-            gettext = self.translate.ugettext
-            ngettext = self.translate.ungettext
+            if IS_PYTHON2:
+                gettext = self.translate.ugettext
+                ngettext = self.translate.ungettext
+            else:
+                gettext = self.translate.gettext
+                ngettext = self.translate.ngettext
             try:
-                dgettext = self.translate.dugettext
-                dngettext = self.translate.dungettext
+                if IS_PYTHON2:
+                    dgettext = self.translate.dugettext
+                    dngettext = self.translate.dungettext
+                else:
+                    dgettext = self.translate.dgettext
+                    dngettext = self.translate.dngettext
             except AttributeError:
                 dgettext = lambda _, y: gettext(y)
                 dngettext = lambda _, s, p, n: ngettext(s, p, n)
@@ -678,6 +683,8 @@
                 ctxt['_i18n.dngettext'] = dngettext
 
         if ctxt and ctxt.get('_i18n.domain'):
+            # TODO: This can cause infinite recursion if dgettext is defined
+            #       via the AttributeError case above!
             gettext = lambda msg: dgettext(ctxt.get('_i18n.domain'), msg)
 
         for kind, data, pos in stream:
@@ -1168,7 +1175,9 @@
                 and node.func.id in gettext_functions:
             strings = []
             def _add(arg):
-                if isinstance(arg, _ast.Str) and isinstance(arg.s, basestring):
+                if isinstance(arg, _ast.Str) and isinstance(arg.s, unicode):
+                    strings.append(arg.s)
+                elif isinstance(arg, _ast.Str):
                     strings.append(unicode(arg.s, 'utf-8'))
                 elif arg:
                     strings.append(None)
diff -r 4bbd2b021cb5 genshi/filters/tests/__init__.py
--- a/genshi/filters/tests/__init__.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/tests/__init__.py	Sun Sep 05 14:20:37 2010 +0200
@@ -15,9 +15,9 @@
 import unittest
 
 def suite():
-    from genshi.filters.tests import html, i18n, transform
+    from genshi.filters.tests import test_html, i18n, transform
     suite = unittest.TestSuite()
-    suite.addTest(html.suite())
+    suite.addTest(test_html.suite())
     suite.addTest(i18n.suite())
     if hasattr(doctest, 'NORMALIZE_WHITESPACE'):
         suite.addTest(transform.suite())
diff -r 4bbd2b021cb5 genshi/filters/tests/html.py
--- a/genshi/filters/tests/html.py	Sat Sep 04 09:10:29 2010 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,513 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2006-2009 Edgewall Software
-# All rights reserved.
-#
-# This software is licensed as described in the file COPYING, which
-# you should have received as part of this distribution. The terms
-# are also available at http://genshi.edgewall.org/wiki/License.
-#
-# This software consists of voluntary contributions made by many
-# individuals. For the exact contribution history, see the revision
-# history and logs, available at http://genshi.edgewall.org/log/.
-
-import doctest
-import unittest
-
-from genshi.input import HTML, ParseError
-from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
-from genshi.template import MarkupTemplate
-
-class HTMLFormFillerTestCase(unittest.TestCase):
-
-    def test_fill_input_text_no_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_text_single_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_text_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="text" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <input type="text" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_no_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_single_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_hidden_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="hidden" name="foo" />
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <input type="hidden" name="foo" value="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_no_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <textarea name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_single_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">bar</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_multi_value(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">bar</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_multiple(self):
-        # Ensure that the subsequent textarea doesn't get the data from the
-        # first
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-          <textarea name="bar"></textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">Some text</textarea>
-          <textarea name="bar"/>
-        </p></form>""", html.render())
-
-    def test_fill_textarea_preserve_original(self):
-        html = HTML("""<form><p>
-          <textarea name="foo"></textarea>
-          <textarea name="bar">Original value</textarea>
-        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
-        self.assertEquals("""<form><p>
-          <textarea name="foo">Some text</textarea>
-          <textarea name="bar">Original value</textarea>
-        </p></form>""", html.render())
-
-    def test_fill_input_checkbox_single_value_auto_no_value(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_checkbox_single_value_auto(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
-
-    def test_fill_input_checkbox_single_value_defined(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
-
-    def test_fill_input_checkbox_multi_value_auto(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
-
-    def test_fill_input_checkbox_multi_value_defined(self):
-        html = HTML("""<form><p>
-          <input type="checkbox" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
-        self.assertEquals("""<form><p>
-          <input type="checkbox" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
-
-    def test_fill_input_radio_no_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" />
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_radio_single_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
-
-    def test_fill_input_radio_multi_value(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="1" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="1"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
-
-    def test_fill_input_radio_empty_string(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
-
-    def test_fill_input_radio_multi_empty_string(self):
-        html = HTML("""<form><p>
-          <input type="radio" name="foo" value="" />
-        </p></form>""")
-        self.assertEquals("""<form><p>
-          <input type="radio" name="foo" value="" checked="checked"/>
-        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
-
-    def test_fill_select_no_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_no_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller()
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_single_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option selected="selected">1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_single_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo">
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form><p>
-          <select name="foo">
-            <option value="1" selected="selected">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_multi_value_auto(self):
-        html = HTML("""<form><p>
-          <select name="foo" multiple>
-            <option>1</option>
-            <option>2</option>
-            <option>3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
-        self.assertEquals("""<form><p>
-          <select name="foo" multiple="multiple">
-            <option selected="selected">1</option>
-            <option>2</option>
-            <option selected="selected">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_select_multi_value_defined(self):
-        html = HTML("""<form><p>
-          <select name="foo" multiple>
-            <option value="1">1</option>
-            <option value="2">2</option>
-            <option value="3">3</option>
-          </select>
-        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
-        self.assertEquals("""<form><p>
-          <select name="foo" multiple="multiple">
-            <option value="1" selected="selected">1</option>
-            <option value="2">2</option>
-            <option value="3" selected="selected">3</option>
-          </select>
-        </p></form>""", html.render())
-
-    def test_fill_option_segmented_text(self):
-        html = MarkupTemplate("""<form>
-          <select name="foo">
-            <option value="1">foo $x</option>
-          </select>
-        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
-        self.assertEquals("""<form>
-          <select name="foo">
-            <option value="1" selected="selected">foo 1</option>
-          </select>
-        </form>""", html.render())
-
-    def test_fill_option_segmented_text_no_value(self):
-        html = MarkupTemplate("""<form>
-          <select name="foo">
-            <option>foo $x bar</option>
-          </select>
-        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
-        self.assertEquals("""<form>
-          <select name="foo">
-            <option selected="selected">foo 1 bar</option>
-          </select>
-        </form>""", html.render())
-
-    def test_fill_option_unicode_value(self):
-        html = HTML("""<form>
-          <select name="foo">
-            <option value="&ouml;">foo</option>
-          </select>
-        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
-        self.assertEquals(u"""<form>
-          <select name="foo">
-            <option value="ö" selected="selected">foo</option>
-          </select>
-        </form>""", html.render(encoding=None))
-
-    def test_fill_input_password_disabled(self):
-        html = HTML("""<form><p>
-          <input type="password" name="pass" />
-        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
-        self.assertEquals("""<form><p>
-          <input type="password" name="pass"/>
-        </p></form>""", html.render())
-
-    def test_fill_input_password_enabled(self):
-        html = HTML("""<form><p>
-          <input type="password" name="pass" />
-        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
-        self.assertEquals("""<form><p>
-          <input type="password" name="pass" value="1234"/>
-        </p></form>""", html.render())
-
-
-class HTMLSanitizerTestCase(unittest.TestCase):
-
-    def test_sanitize_unchanged(self):
-        html = HTML('<a href="#">fo<br />o</a>')
-        self.assertEquals('<a href="#">fo<br/>o</a>',
-                          (html | HTMLSanitizer()).render())
-        html = HTML('<a href="#with:colon">foo</a>')
-        self.assertEquals('<a href="#with:colon">foo</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_escape_text(self):
-        html = HTML('<a href="#">fo&amp;</a>')
-        self.assertEquals('<a href="#">fo&amp;</a>',
-                          (html | HTMLSanitizer()).render())
-        html = HTML('<a href="#">&lt;foo&gt;</a>')
-        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_entityref_text(self):
-        html = HTML('<a href="#">fo&ouml;</a>')
-        self.assertEquals(u'<a href="#">foö</a>',
-                          (html | HTMLSanitizer()).render(encoding=None))
-
-    def test_sanitize_escape_attr(self):
-        html = HTML('<div title="&lt;foo&gt;"></div>')
-        self.assertEquals('<div title="&lt;foo&gt;"/>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_close_empty_tag(self):
-        html = HTML('<a href="#">fo<br>o</a>')
-        self.assertEquals('<a href="#">fo<br/>o</a>',
-                          (html | HTMLSanitizer()).render())
-
-    def test_sanitize_invalid_entity(self):
-        html = HTML('&junk;')
-        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_script_elem(self):
-        html = HTML('<script>alert("Foo")</script>')
-        self.assertEquals('', (html | HTMLSanitizer()).render())
-        html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
-        self.assertEquals('', (html | HTMLSanitizer()).render())
-        self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
-        self.assertRaises(ParseError, HTML,
-                          '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
-
-    def test_sanitize_remove_onclick_attr(self):
-        html = HTML('<div onclick=\'alert("foo")\' />')
-        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_input_password(self):
-        html = HTML('<form><input type="password" /></form>')
-        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_comments(self):
-        html = HTML('''<div><!-- conditional comment crap --></div>''')
-        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
-
-    def test_sanitize_remove_style_scripts(self):
-        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
-        # Inline style with url() using javascript: scheme
-        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, using control char
-        html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, in quotes
-        html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        # IE expressions in CSS not allowed
-        html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
-                                 'color: #fff\'>')
-        self.assertEquals('<div style="color: #fff"/>',
-                          (html | sanitizer).render())
-        # Inline style with url() using javascript: scheme, using unicode
-        # escapes
-        html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-
-    def test_sanitize_remove_style_phishing(self):
-        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
-        # The position property is not allowed
-        html = HTML('<div style="position:absolute;top:0"></div>')
-        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
-        # Normal margins get passed through
-        html = HTML('<div style="margin:10px 20px"></div>')
-        self.assertEquals('<div style="margin:10px 20px"/>',
-                          (html | sanitizer).render())
-        # But not negative margins
-        html = HTML('<div style="margin:-1000px 0 0"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<div style="margin-left:-2000px 0 0"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-        html = HTML('<div style="margin-left:1em 1em 1em -4000px"></div>')
-        self.assertEquals('<div/>', (html | sanitizer).render())
-
-    def test_sanitize_remove_src_javascript(self):
-        html = HTML('<img src=\'javascript:alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Case-insensitive protocol matching
-        html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Grave accents (not parsed)
-        self.assertRaises(ParseError, HTML,
-                          '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
-        # Protocol encoded using UTF-8 numeric entities
-        html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
-                    '&#112;&#116;&#58;alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Protocol encoded using UTF-8 numeric entities without a semicolon
-        # (which is allowed because the max number of digits is used)
-        html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
-                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
-                    '&#0000058alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
-        # (which is allowed because the max number of digits is used)
-        html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
-                    '&#x70&#x74&#x3A;alert("foo")\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Embedded tab character in protocol
-        html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-        # Embedded tab character in protocol, but encoded this time
-        html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
-        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
-
-
-def suite():
-    suite = unittest.TestSuite()
-    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
-    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
-    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
-    return suite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest='suite')
diff -r 4bbd2b021cb5 genshi/filters/tests/i18n.py
--- a/genshi/filters/tests/i18n.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/tests/i18n.py	Sun Sep 05 14:20:37 2010 +0200
@@ -14,13 +14,13 @@
 from datetime import datetime
 import doctest
 from gettext import NullTranslations
-from StringIO import StringIO
 import unittest
 
 from genshi.core import Attrs
 from genshi.template import MarkupTemplate, Context
 from genshi.filters.i18n import Translator, extract
 from genshi.input import HTML
+from genshi.compat import IS_PYTHON2, StringIO
 
 
 class DummyTranslations(NullTranslations):
@@ -39,17 +39,31 @@
     def _domain_call(self, func, domain, *args, **kwargs):
         return getattr(self._domains.get(domain, self), func)(*args, **kwargs)
 
-    def ugettext(self, message):
-        missing = object()
-        tmsg = self._catalog.get(message, missing)
-        if tmsg is missing:
-            if self._fallback:
-                return self._fallback.ugettext(message)
-            return unicode(message)
-        return tmsg
+    if IS_PYTHON2:
+        def ugettext(self, message):
+            missing = object()
+            tmsg = self._catalog.get(message, missing)
+            if tmsg is missing:
+                if self._fallback:
+                    return self._fallback.ugettext(message)
+                return unicode(message)
+            return tmsg
+    else:
+        def gettext(self, message):
+            missing = object()
+            tmsg = self._catalog.get(message, missing)
+            if tmsg is missing:
+                if self._fallback:
+                    return self._fallback.gettext(message)
+                return unicode(message)
+            return tmsg
 
-    def dugettext(self, domain, message):
-        return self._domain_call('ugettext', domain, message)
+    if IS_PYTHON2:
+        def dugettext(self, domain, message):
+            return self._domain_call('ugettext', domain, message)
+    else:
+        def dgettext(self, domain, message):
+            return self._domain_call('gettext', domain, message)
 
     def ungettext(self, msgid1, msgid2, n):
         try:
@@ -62,8 +76,16 @@
             else:
                 return msgid2
 
-    def dungettext(self, domain, singular, plural, numeral):
-        return self._domain_call('ungettext', domain, singular, plural, numeral)
+    if not IS_PYTHON2:
+        ngettext = ungettext
+        del ungettext
+
+    if IS_PYTHON2:
+        def dungettext(self, domain, singular, plural, numeral):
+            return self._domain_call('ungettext', domain, singular, plural, numeral)
+    else:
+        def dngettext(self, domain, singular, plural, numeral):
+            return self._domain_call('ngettext', domain, singular, plural, numeral)
 
 
 class TranslatorTestCase(unittest.TestCase):
@@ -72,7 +94,7 @@
         """
         Verify that translated attributes end up in a proper `Attrs` instance.
         """
-        html = HTML("""<html>
+        html = HTML(u"""<html>
           <span title="Foo"></span>
         </html>""")
         translator = Translator(lambda s: u"Voh")
@@ -218,9 +240,9 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html">Hilfe</a>.</p>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_nonewline(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -241,7 +263,7 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html">Hilfe</a></p>
         </html>""", tmpl.generate().render())
 
@@ -264,9 +286,9 @@
         gettext = lambda s: u"Für Details siehe bitte [1:Hilfe]"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           Für Details siehe bitte <a href="help.html">Hilfe</a>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_attributes(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -394,7 +416,7 @@
         gettext = lambda s: u"Für Details siehe bitte [1:[2:Hilfeseite]]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für Details siehe bitte <a href="help.html"><em>Hilfeseite</em></a>.</p>
         </html>""", tmpl.generate().render())
 
@@ -449,7 +471,7 @@
         gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><input type="text" name="num"/> Einträge pro Seite anzeigen.</p>
         </html>""", tmpl.generate().render())
 
@@ -476,7 +498,7 @@
         gettext = lambda s: u"Für [2:Details] siehe bitte [1:Hilfe]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Für <em>Details</em> siehe bitte <a href="help.html">Hilfe</a>.</p>
         </html>""", tmpl.generate().render())
 
@@ -500,13 +522,13 @@
           <p i18n:msg="">
             Show me <input type="text" name="num" /> entries per page, starting at page <input type="text" name="num" />.
           </p>
-        </html>""")
+        </html>""", encoding='utf-8')
         gettext = lambda s: u"[1:] Einträge pro Seite, beginnend auf Seite [2:]."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
-          <p><input type="text" name="num"/> Eintr\xc3\xa4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
-        </html>""", tmpl.generate().render())
+        self.assertEqual(u"""<html>
+          <p><input type="text" name="num"/> Eintr\u00E4ge pro Seite, beginnend auf Seite <input type="text" name="num"/>.</p>
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_param(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -545,7 +567,7 @@
         gettext = lambda s: u"%(name)s, sei gegrüßt!"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Jim, sei gegrüßt!</p>
         </html>""", tmpl.generate(user=dict(name='Jim')).render())
 
@@ -559,7 +581,7 @@
         gettext = lambda s: u"Sei gegrüßt, [1:Alter]!"
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Sei gegrüßt, <a href="#42">Alter</a>!</p>
         </html>""", tmpl.generate(anchor='42').render())
 
@@ -617,7 +639,7 @@
         gettext = lambda s: u"[1:] Einträge pro Seite anzeigen."
         translator = Translator(gettext)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><input type="text" name="num" value="x"/> Einträge pro Seite anzeigen.</p>
         </html>""", tmpl.generate().render())
 
@@ -676,7 +698,7 @@
         }))
         tmpl.filters.insert(0, translator)
         tmpl.add_directives(Translator.NAMESPACE, translator)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p title="Voh bär">Voh</p>
         </html>""", tmpl.generate().render())
 
@@ -720,9 +742,9 @@
         })
         translator = Translator(translations)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           Modificado à um dia por Pedro
-        </html>""", tmpl.generate(date='um dia', author="Pedro").render())
+        </html>""".encode('utf-8'), tmpl.generate(date='um dia', author="Pedro").render(encoding='utf-8'))
 
 
     def test_i18n_msg_ticket_251_extract(self):
@@ -749,9 +771,9 @@
         })
         translator = Translator(translations)
         translator.setup(tmpl)
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p><tt><b>Trandução[ 0 ]</b>: <em>Uma moeda</em></tt></p>
-        </html>""", tmpl.generate().render())
+        </html>""".encode('utf-8'), tmpl.generate().render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_other_directives_nested(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -811,7 +833,7 @@
         self.assertEqual(1, len(messages))
         ctx = Context()
         ctx.push({'trac': {'homepage': 'http://trac.edgewall.org/'}})
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p>Antes de o fazer, porém,
             <strong>por favor tente <a href="http://trac.edgewall.org/search?ticket=yes&amp;noquickjump=1&amp;q=q">procurar</a>
             por problemas semelhantes</strong>, uma vez que é muito provável que este problema
@@ -846,11 +868,11 @@
             '[2:[3:trac.ini]]\n            and cannot be edited on this page.',
             messages[0][2]
         )
-        self.assertEqual("""<html>
+        self.assertEqual(u"""<html>
           <p class="hint"><strong>Nota:</strong> Este repositório está definido em
            <code><a href="href.wiki(TracIni)">trac.ini</a></code>
             e não pode ser editado nesta página.</p>
-        </html>""", tmpl.generate(editable=False).render())
+        </html>""".encode('utf-8'), tmpl.generate(editable=False).render(encoding='utf-8'))
 
     def test_extract_i18n_msg_with_py_strip(self):
         tmpl = MarkupTemplate("""<html xmlns:py="http://genshi.edgewall.org/"
@@ -1771,6 +1793,11 @@
             loader = TemplateLoader([dirname], callback=callback)
             tmpl = loader.load('tmpl10.html')
 
+            if IS_PYTHON2:
+                dgettext = translations.dugettext
+            else:
+                dgettext = translations.dgettext
+
             self.assertEqual("""<html>
                         <div>Included tmpl0</div>
                         <p title="foo_Bar 0">foo_Bar 0</p>
@@ -1797,7 +1824,7 @@
                         <p title="Voh">Voh 3</p>
                         <p title="Voh">Voh 3</p>
                 </html>""", tmpl.generate(idx=-1,
-                                          dg=translations.dugettext).render())
+                                          dg=dgettext).render())
         finally:
             shutil.rmtree(dirname)
 
diff -r 4bbd2b021cb5 genshi/filters/tests/test_html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genshi/filters/tests/test_html.py	Sun Sep 05 14:20:37 2010 +0200
@@ -0,0 +1,513 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2006-2009 Edgewall Software
+# All rights reserved.
+#
+# This software is licensed as described in the file COPYING, which
+# you should have received as part of this distribution. The terms
+# are also available at http://genshi.edgewall.org/wiki/License.
+#
+# This software consists of voluntary contributions made by many
+# individuals. For the exact contribution history, see the revision
+# history and logs, available at http://genshi.edgewall.org/log/.
+
+import doctest
+import unittest
+
+from genshi.input import HTML, ParseError
+from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
+from genshi.template import MarkupTemplate
+
+class HTMLFormFillerTestCase(unittest.TestCase):
+
+    def test_fill_input_text_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_text_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_text_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="text" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <input type="text" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_hidden_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="hidden" name="foo" />
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <input type="hidden" name="foo" value="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_no_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <textarea name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_single_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">bar</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_multi_value(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">bar</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_multiple(self):
+        # Ensure that the subsequent textarea doesn't get the data from the
+        # first
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+          <textarea name="bar"></textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">Some text</textarea>
+          <textarea name="bar"/>
+        </p></form>""", html.render())
+
+    def test_fill_textarea_preserve_original(self):
+        html = HTML(u"""<form><p>
+          <textarea name="foo"></textarea>
+          <textarea name="bar">Original value</textarea>
+        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
+        self.assertEquals("""<form><p>
+          <textarea name="foo">Some text</textarea>
+          <textarea name="bar">Original value</textarea>
+        </p></form>""", html.render())
+
+    def test_fill_input_checkbox_single_value_auto_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_checkbox_single_value_auto(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
+
+    def test_fill_input_checkbox_single_value_defined(self):
+        html = HTML("""<form><p>
+          <input type="checkbox" name="foo" value="1" />
+        </p></form>""", encoding='ascii')
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+    def test_fill_input_checkbox_multi_value_auto(self):
+        html = HTML("""<form><p>
+          <input type="checkbox" name="foo" />
+        </p></form>""", encoding='ascii')
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
+
+    def test_fill_input_checkbox_multi_value_defined(self):
+        html = HTML(u"""<form><p>
+          <input type="checkbox" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+        self.assertEquals("""<form><p>
+          <input type="checkbox" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+    def test_fill_input_radio_no_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" />
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_radio_single_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
+
+    def test_fill_input_radio_multi_value(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="1" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="1"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
+
+    def test_fill_input_radio_empty_string(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
+
+    def test_fill_input_radio_multi_empty_string(self):
+        html = HTML(u"""<form><p>
+          <input type="radio" name="foo" value="" />
+        </p></form>""")
+        self.assertEquals("""<form><p>
+          <input type="radio" name="foo" value="" checked="checked"/>
+        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
+
+    def test_fill_select_no_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_no_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller()
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_single_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option selected="selected">1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_single_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo">
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals("""<form><p>
+          <select name="foo">
+            <option value="1" selected="selected">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_multi_value_auto(self):
+        html = HTML(u"""<form><p>
+          <select name="foo" multiple>
+            <option>1</option>
+            <option>2</option>
+            <option>3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
+        self.assertEquals("""<form><p>
+          <select name="foo" multiple="multiple">
+            <option selected="selected">1</option>
+            <option>2</option>
+            <option selected="selected">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_select_multi_value_defined(self):
+        html = HTML(u"""<form><p>
+          <select name="foo" multiple>
+            <option value="1">1</option>
+            <option value="2">2</option>
+            <option value="3">3</option>
+          </select>
+        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
+        self.assertEquals("""<form><p>
+          <select name="foo" multiple="multiple">
+            <option value="1" selected="selected">1</option>
+            <option value="2">2</option>
+            <option value="3" selected="selected">3</option>
+          </select>
+        </p></form>""", html.render())
+
+    def test_fill_option_segmented_text(self):
+        html = MarkupTemplate(u"""<form>
+          <select name="foo">
+            <option value="1">foo $x</option>
+          </select>
+        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
+        self.assertEquals(u"""<form>
+          <select name="foo">
+            <option value="1" selected="selected">foo 1</option>
+          </select>
+        </form>""", html.render())
+
+    def test_fill_option_segmented_text_no_value(self):
+        html = MarkupTemplate("""<form>
+          <select name="foo">
+            <option>foo $x bar</option>
+          </select>
+        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
+        self.assertEquals("""<form>
+          <select name="foo">
+            <option selected="selected">foo 1 bar</option>
+          </select>
+        </form>""", html.render())
+
+    def test_fill_option_unicode_value(self):
+        html = HTML(u"""<form>
+          <select name="foo">
+            <option value="&ouml;">foo</option>
+          </select>
+        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
+        self.assertEquals(u"""<form>
+          <select name="foo">
+            <option value="ö" selected="selected">foo</option>
+          </select>
+        </form>""", html.render(encoding=None))
+
+    def test_fill_input_password_disabled(self):
+        html = HTML(u"""<form><p>
+          <input type="password" name="pass" />
+        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
+        self.assertEquals("""<form><p>
+          <input type="password" name="pass"/>
+        </p></form>""", html.render())
+
+    def test_fill_input_password_enabled(self):
+        html = HTML(u"""<form><p>
+          <input type="password" name="pass" />
+        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
+        self.assertEquals("""<form><p>
+          <input type="password" name="pass" value="1234"/>
+        </p></form>""", html.render())
+
+
+class HTMLSanitizerTestCase(unittest.TestCase):
+
+    def test_sanitize_unchanged(self):
+        html = HTML(u'<a href="#">fo<br />o</a>')
+        self.assertEquals('<a href="#">fo<br/>o</a>',
+                          (html | HTMLSanitizer()).render())
+        html = HTML(u'<a href="#with:colon">foo</a>')
+        self.assertEquals('<a href="#with:colon">foo</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_escape_text(self):
+        html = HTML(u'<a href="#">fo&amp;</a>')
+        self.assertEquals('<a href="#">fo&amp;</a>',
+                          (html | HTMLSanitizer()).render())
+        html = HTML(u'<a href="#">&lt;foo&gt;</a>')
+        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_entityref_text(self):
+        html = HTML(u'<a href="#">fo&ouml;</a>')
+        self.assertEquals(u'<a href="#">foö</a>',
+                          (html | HTMLSanitizer()).render(encoding=None))
+
+    def test_sanitize_escape_attr(self):
+        html = HTML(u'<div title="&lt;foo&gt;"></div>')
+        self.assertEquals('<div title="&lt;foo&gt;"/>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_close_empty_tag(self):
+        html = HTML(u'<a href="#">fo<br>o</a>')
+        self.assertEquals('<a href="#">fo<br/>o</a>',
+                          (html | HTMLSanitizer()).render())
+
+    def test_sanitize_invalid_entity(self):
+        html = HTML(u'&junk;')
+        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_script_elem(self):
+        html = HTML(u'<script>alert("Foo")</script>')
+        self.assertEquals('', (html | HTMLSanitizer()).render())
+        html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>')
+        self.assertEquals('', (html | HTMLSanitizer()).render())
+        self.assertRaises(ParseError, HTML, u'<SCR\0IPT>alert("foo")</SCR\0IPT>')
+        self.assertRaises(ParseError, HTML,
+                          u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
+
+    def test_sanitize_remove_onclick_attr(self):
+        html = HTML(u'<div onclick=\'alert("foo")\' />')
+        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_input_password(self):
+        html = HTML(u'<form><input type="password" /></form>')
+        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_comments(self):
+        html = HTML(u'''<div><!-- conditional comment crap --></div>''')
+        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
+
+    def test_sanitize_remove_style_scripts(self):
+        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+        # Inline style with url() using javascript: scheme
+        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, using control char
+        html = HTML(u'<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, in quotes
+        html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        # IE expressions in CSS not allowed
+        html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));'
+                                 'color: #fff\'>')
+        self.assertEquals('<div style="color: #fff"/>',
+                          (html | sanitizer).render())
+        # Inline style with url() using javascript: scheme, using unicode
+        # escapes
+        html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+
+    def test_sanitize_remove_style_phishing(self):
+        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
+        # The position property is not allowed
+        html = HTML(u'<div style="position:absolute;top:0"></div>')
+        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
+        # Normal margins get passed through
+        html = HTML(u'<div style="margin:10px 20px"></div>')
+        self.assertEquals('<div style="margin:10px 20px"/>',
+                          (html | sanitizer).render())
+        # But not negative margins
+        html = HTML(u'<div style="margin:-1000px 0 0"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<div style="margin-left:-2000px 0 0"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+        html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>')
+        self.assertEquals('<div/>', (html | sanitizer).render())
+
+    def test_sanitize_remove_src_javascript(self):
+        html = HTML(u'<img src=\'javascript:alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Case-insensitive protocol matching
+        html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Grave accents (not parsed)
+        self.assertRaises(ParseError, HTML,
+                          u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
+        # Protocol encoded using UTF-8 numeric entities
+        html = HTML(u'<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
+                    '&#112;&#116;&#58;alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Protocol encoded using UTF-8 numeric entities without a semicolon
+        # (which is allowed because the max number of digits is used)
+        html = HTML(u'<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
+                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
+                    '&#0000058alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
+        # (which is allowed because the max number of digits is used)
+        html = HTML(u'<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
+                    '&#x70&#x74&#x3A;alert("foo")\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Embedded tab character in protocol
+        html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+        # Embedded tab character in protocol, but encoded this time
+        html = HTML(u'<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
+        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
+    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
+    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
+    return suite
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
diff -r 4bbd2b021cb5 genshi/filters/tests/transform.py
--- a/genshi/filters/tests/transform.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/tests/transform.py	Sun Sep 05 14:20:37 2010 +0200
@@ -48,8 +48,10 @@
 
 def _transform(html, transformer, with_attrs=False):
     """Apply transformation returning simplified marked stream."""
-    if isinstance(html, basestring):
-        html = HTML(html)
+    if isinstance(html, basestring) and not isinstance(html, unicode):
+        html = HTML(html, encoding='utf-8')
+    elif isinstance(html, unicode):
+        html = HTML(html, encoding='utf-8')
     stream = transformer(html, keep_marks=True)
     return _simplify(stream, with_attrs)
 
@@ -57,7 +59,7 @@
 class SelectTest(unittest.TestCase):
     """Test .select()"""
     def _select(self, select):
-        html = HTML(FOOBAR)
+        html = HTML(FOOBAR, encoding='utf-8')
         if isinstance(select, basestring):
             select = [select]
         transformer = Transformer(select[0])
@@ -138,7 +140,7 @@
 
     def test_select_text_context(self):
         self.assertEqual(
-            list(Transformer('.')(HTML('foo'), keep_marks=True)),
+            list(Transformer('.')(HTML(u'foo'), keep_marks=True)),
             [('OUTSIDE', ('TEXT', u'foo', (None, 1, 0)))],
             )
 
@@ -205,7 +207,7 @@
 
     def test_invert_text_context(self):
         self.assertEqual(
-            _simplify(Transformer('.').invert()(HTML('foo'), keep_marks=True)),
+            _simplify(Transformer('.').invert()(HTML(u'foo'), keep_marks=True)),
             [(None, 'TEXT', u'foo')],
             )
 
@@ -271,7 +273,7 @@
 
     def test_empty_text_context(self):
         self.assertEqual(
-            _simplify(Transformer('.')(HTML('foo'), keep_marks=True)),
+            _simplify(Transformer('.')(HTML(u'foo'), keep_marks=True)),
             [(OUTSIDE, TEXT, u'foo')],
             )
 
@@ -656,9 +658,11 @@
 
             def __iter__(self):
                 self.count += 1
-                return iter(HTML('CONTENT %i' % self.count))
+                return iter(HTML(u'CONTENT %i' % self.count))
 
-        if isinstance(html, basestring):
+        if isinstance(html, basestring) and not isinstance(html, unicode):
+            html = HTML(html, encoding='utf-8')
+        else:
             html = HTML(html)
         if content is None:
             content = Injector()
diff -r 4bbd2b021cb5 genshi/filters/transform.py
--- a/genshi/filters/transform.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/filters/transform.py	Sun Sep 05 14:20:37 2010 +0200
@@ -31,7 +31,8 @@
 ...  <body>
 ...    Some <em>body</em> text.
 ...  </body>
-... </html>''')
+... </html>''',
+... encoding='utf-8')
 >>> print(html | Transformer('body/em').map(unicode.upper, TEXT)
 ...                                    .unwrap().wrap(tag.u))
 <html>
@@ -136,7 +137,8 @@
     mark.
 
     >>> html = HTML('<html><head><title>Some Title</title></head>'
-    ...             '<body>Some <em>body</em> text.</body></html>')
+    ...             '<body>Some <em>body</em> text.</body></html>',
+    ...             encoding='utf-8')
 
     Transformations act on selected stream events matching an XPath expression.
     Here's an example of removing some markup (the title, in this case)
@@ -215,7 +217,8 @@
         ...             yield mark, (kind, data.upper(), pos)
         ...         else:
         ...             yield mark, (kind, data, pos)
-        >>> short_stream = HTML('<body>Some <em>test</em> text</body>')
+        >>> short_stream = HTML('<body>Some <em>test</em> text</body>',
+        ...                      encoding='utf-8')
         >>> print(short_stream | Transformer('.//em/text()').apply(upper))
         <body>Some <em>TEST</em> text</body>
         """
@@ -233,7 +236,7 @@
         """Mark events matching the given XPath expression, within the current
         selection.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer().select('.//em').trace())
         (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
         (None, ('TEXT', u'Some ', (None, 1, 6)))
@@ -257,7 +260,7 @@
         Specificaly, all marks are converted to null marks, and all null marks
         are converted to OUTSIDE marks.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('//em').invert().trace())
         ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
         ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -277,7 +280,7 @@
 
         Example:
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('//em').end().trace())
         ('OUTSIDE', ('START', (QName('body'), Attrs()), (None, 1, 0)))
         ('OUTSIDE', ('TEXT', u'Some ', (None, 1, 6)))
@@ -301,7 +304,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').empty())
         <html><head><title>Some Title</title></head><body>Some <em/>
         text.</body></html>
@@ -316,7 +320,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').remove())
         <html><head><title>Some Title</title></head><body>Some
         text.</body></html>
@@ -333,7 +338,8 @@
         Example:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').unwrap())
         <html><head><title>Some Title</title></head><body>Some body
         text.</body></html>
@@ -346,7 +352,8 @@
         """Wrap selection in an element.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').wrap('strong'))
         <html><head><title>Some Title</title></head><body>Some
         <strong><em>body</em></strong> text.</body></html>
@@ -362,7 +369,8 @@
         """Replace selection with content.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//title/text()').replace('New Title'))
         <html><head><title>New Title</title></head><body>Some <em>body</em>
         text.</body></html>
@@ -380,7 +388,8 @@
         tag:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').before('emphasised '))
         <html><head><title>Some Title</title></head><body>Some emphasised
         <em>body</em> text.</body></html>
@@ -397,7 +406,8 @@
         Here, we insert some text after the </em> closing tag:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em').after(' rock'))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         rock text.</body></html>
@@ -414,7 +424,8 @@
         Inserting some new text at the start of the <body>:
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//body').prepend('Some new body text. '))
         <html><head><title>Some Title</title></head><body>Some new body text.
         Some <em>body</em> text.</body></html>
@@ -429,7 +440,8 @@
         """Insert content before the END event of the selection.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//body').append(' Some new body text.'))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         text. Some new body text.</body></html>
@@ -450,7 +462,7 @@
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
         ...             '<body>Some <em class="before">body</em> <em>text</em>.</body>'
-        ...             '</html>')
+        ...             '</html>', encoding='utf-8')
         >>> print(html | Transformer('body/em').attr('class', None))
         <html><head><title>Some Title</title></head><body>Some <em>body</em>
         <em>text</em>.</body></html>
@@ -493,7 +505,8 @@
         >>> from genshi.builder import tag
         >>> buffer = StreamBuffer()
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('head/title/text()').copy(buffer)
         ...     .end().select('body').prepend(tag.h1(buffer)))
         <html><head><title>Some Title</title></head><body><h1>Some
@@ -514,7 +527,8 @@
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
         ...             '<body><em>Some</em> <em class="before">body</em>'
-        ...             '<em>text</em>.</body></html>')
+        ...             '<em>text</em>.</body></html>',
+        ...             encoding='utf-8')
         >>> buffer = StreamBuffer()
         >>> def apply_attr(name, entry):
         ...     return list(buffer)[0][1][1].get('class')
@@ -546,7 +560,8 @@
         >>> from genshi.builder import tag
         >>> buffer = StreamBuffer()
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...             '<body>Some <em>body</em> text.</body></html>')
+        ...             '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('.//em/text()').cut(buffer)
         ...     .end().select('.//em').after(tag.h1(buffer)))
         <html><head><title>Some Title</title></head><body>Some
@@ -577,7 +592,8 @@
         top of the document:
 
         >>> doc = HTML('<doc><notes></notes><body>Some <note>one</note> '
-        ...            'text <note>two</note>.</body></doc>')
+        ...            'text <note>two</note>.</body></doc>',
+        ...             encoding='utf-8')
         >>> buffer = StreamBuffer()
         >>> print(doc | Transformer('body/note').cut(buffer, accumulate=True)
         ...     .end().buffer().select('notes').prepend(buffer))
@@ -595,7 +611,8 @@
 
         >>> from genshi.filters.html import HTMLSanitizer
         >>> html = HTML('<html><body>Some text<script>alert(document.cookie)'
-        ...             '</script> and some more text</body></html>')
+        ...             '</script> and some more text</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/*').filter(HTMLSanitizer()))
         <html><body>Some text and some more text</body></html>
 
@@ -609,7 +626,8 @@
         the selection.
 
         >>> html = HTML('<html><head><title>Some Title</title></head>'
-        ...               '<body>Some <em>body</em> text.</body></html>')
+        ...               '<body>Some <em>body</em> text.</body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('head/title').map(unicode.upper, TEXT))
         <html><head><title>SOME TITLE</title></head><body>Some <em>body</em>
         text.</body></html>
@@ -627,7 +645,8 @@
 
         >>> html = HTML('<html><body>Some text, some more text and '
         ...             '<b>some bold text</b>\\n'
-        ...             '<i>some italicised text</i></body></html>')
+        ...             '<i>some italicised text</i></body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/b').substitute('(?i)some', 'SOME'))
         <html><body>Some text, some more text and <b>SOME bold text</b>
         <i>some italicised text</i></body></html>
@@ -649,7 +668,8 @@
         """Rename matching elements.
 
         >>> html = HTML('<html><body>Some text, some more text and '
-        ...             '<b>some bold text</b></body></html>')
+        ...             '<b>some bold text</b></body></html>',
+        ...             encoding='utf-8')
         >>> print(html | Transformer('body/b').rename('strong'))
         <html><body>Some text, some more text and <strong>some bold text</strong></body></html>
         """
@@ -658,7 +678,7 @@
     def trace(self, prefix='', fileobj=None):
         """Print events as they pass through the transform.
 
-        >>> html = HTML('<body>Some <em>test</em> text</body>')
+        >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
         >>> print(html | Transformer('em').trace())
         (None, ('START', (QName('body'), Attrs()), (None, 1, 0)))
         (None, ('TEXT', u'Some ', (None, 1, 6)))
@@ -1024,7 +1044,7 @@
     ...             yield event
     ...         for event in stream:
     ...             yield event
-    >>> html = HTML('<body>Some <em>test</em> text</body>')
+    >>> html = HTML('<body>Some <em>test</em> text</body>', encoding='utf-8')
     >>> print(html | Transformer('.//em').apply(Top('Prefix ')))
     Prefix <body>Some <em>test</em> text</body>
     """
diff -r 4bbd2b021cb5 genshi/input.py
--- a/genshi/input.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/input.py	Sun Sep 05 14:20:37 2010 +0200
@@ -18,12 +18,13 @@
 from itertools import chain
 import htmlentitydefs as entities
 import HTMLParser as html
-from StringIO import StringIO
 from xml.parsers import expat
 
 from genshi.core import Attrs, QName, Stream, stripentities
 from genshi.core import START, END, XML_DECL, DOCTYPE, TEXT, START_NS, \
                         END_NS, START_CDATA, END_CDATA, PI, COMMENT
+from genshi.compat import StringIO, BytesIO
+
 
 __all__ = ['ET', 'ParseError', 'XMLParser', 'XML', 'HTMLParser', 'HTML']
 __docformat__ = 'restructuredtext en'
@@ -90,7 +91,7 @@
 
     _entitydefs = ['<!ENTITY %s "&#%d;">' % (name, value) for name, value in
                    entities.name2codepoint.items()]
-    _external_dtd = '\n'.join(_entitydefs)
+    _external_dtd = u'\n'.join(_entitydefs).encode('utf-8')
 
     def __init__(self, source, filename=None, encoding=None):
         """Initialize the parser for the given XML input.
@@ -108,7 +109,9 @@
         # Setup the Expat parser
         parser = expat.ParserCreate(encoding, '}')
         parser.buffer_text = True
-        parser.returns_unicode = True
+        # Python 3 does not have returns_unicode
+        if hasattr(parser, 'returns_unicode'):
+            parser.returns_unicode = True
         parser.ordered_attributes = True
 
         parser.StartElementHandler = self._handle_start
@@ -146,7 +149,7 @@
                 while 1:
                     while not done and len(self._queue) == 0:
                         data = self.source.read(bufsize)
-                        if data == '': # end of data
+                        if not data: # end of data
                             if hasattr(self, 'expat'):
                                 self.expat.Parse('', True)
                                 del self.expat # get rid of circular references
@@ -170,7 +173,7 @@
 
     def _build_foreign(self, context, base, sysid, pubid):
         parser = self.expat.ExternalEntityParserCreate(context)
-        parser.ParseFile(StringIO(self._external_dtd))
+        parser.ParseFile(BytesIO(self._external_dtd))
         return 1
 
     def _enqueue(self, kind, data=None, pos=None):
@@ -279,7 +282,7 @@
     
     The parsing is initiated by iterating over the parser object:
     
-    >>> parser = HTMLParser(StringIO('<UL compact><LI>Foo</UL>'))
+    >>> parser = HTMLParser(BytesIO(u'<UL compact><LI>Foo</UL>'.encode('utf-8')), encoding='utf-8')
     >>> for kind, data, pos in parser:
     ...     print('%s %s' % (kind, data))
     START (QName('ul'), Attrs([(QName('compact'), u'compact')]))
@@ -293,7 +296,7 @@
                               'hr', 'img', 'input', 'isindex', 'link', 'meta',
                               'param'])
 
-    def __init__(self, source, filename=None, encoding='utf-8'):
+    def __init__(self, source, filename=None, encoding=None):
         """Initialize the parser for the given HTML input.
         
         :param source: the HTML text as a file-like object
@@ -320,10 +323,16 @@
                 while 1:
                     while not done and len(self._queue) == 0:
                         data = self.source.read(bufsize)
-                        if data == '': # end of data
+                        if not data: # end of data
                             self.close()
                             done = True
                         else:
+                            if not isinstance(data, unicode):
+                                # bytes
+                                if self.encoding:
+                                    data = data.decode(self.encoding)
+                                else:
+                                    raise UnicodeError("source returned bytes, but no encoding specified")
                             self.feed(data)
                     for kind, data, pos in self._queue:
                         yield kind, data, pos
@@ -403,13 +412,13 @@
         self._enqueue(COMMENT, text)
 
 
-def HTML(text, encoding='utf-8'):
+def HTML(text, encoding=None):
     """Parse the given HTML source and return a markup stream.
     
     Unlike with `HTMLParser`, the returned stream is reusable, meaning it can be
     iterated over multiple times:
     
-    >>> html = HTML('<body><h1>Foo</h1></body>')
+    >>> html = HTML('<body><h1>Foo</h1></body>', encoding='utf-8')
     >>> print(html)
     <body><h1>Foo</h1></body>
     >>> print(html.select('h1'))
@@ -422,7 +431,9 @@
     :raises ParseError: if the HTML text is not well-formed, and error recovery
                         fails
     """
-    return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
+    if isinstance(text, unicode):
+        return Stream(list(HTMLParser(StringIO(text), encoding=encoding)))
+    return Stream(list(HTMLParser(BytesIO(text), encoding=encoding)))
 
 
 def _coalesce(stream):
diff -r 4bbd2b021cb5 genshi/output.py
--- a/genshi/output.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/output.py	Sun Sep 05 14:20:37 2010 +0200
@@ -27,7 +27,7 @@
 __docformat__ = 'restructuredtext en'
 
 
-def encode(iterator, method='xml', encoding='utf-8', out=None):
+def encode(iterator, method='xml', encoding=None, out=None):
     """Encode serializer output into a string.
     
     :param iterator: the iterator returned from serializing a stream (basically
diff -r 4bbd2b021cb5 genshi/template/astutil.py
--- a/genshi/template/astutil.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/astutil.py	Sun Sep 05 14:20:37 2010 +0200
@@ -21,6 +21,7 @@
     def parse(source, mode):
         return compile(source, '', mode, _ast.PyCF_ONLY_AST)
 
+from genshi.compat import IS_PYTHON2
 
 __docformat__ = 'restructuredtext en'
 
@@ -129,6 +130,11 @@
                 first = False
             self._write('**' + node.kwarg)
 
+    if not IS_PYTHON2:
+        # In Python 3 arguments get a special node
+        def visit_arg(self, node):
+            self._write(node.arg)
+
     # FunctionDef(identifier name, arguments args,
     #                           stmt* body, expr* decorator_list)
     def visit_FunctionDef(self, node):
@@ -289,22 +295,36 @@
         self._change_indent(-1)
 
 
-    # Raise(expr? type, expr? inst, expr? tback)
-    def visit_Raise(self, node):
-        self._new_line()
-        self._write('raise')
-        if not node.type:
-            return
-        self._write(' ')
-        self.visit(node.type)
-        if not node.inst:
-            return
-        self._write(', ')
-        self.visit(node.inst)
-        if not node.tback:
-            return
-        self._write(', ')
-        self.visit(node.tback)
+    if IS_PYTHON2:
+        # Raise(expr? type, expr? inst, expr? tback)
+        def visit_Raise(self, node):
+            self._new_line()
+            self._write('raise')
+            if not node.type:
+                return
+            self._write(' ')
+            self.visit(node.type)
+            if not node.inst:
+                return
+            self._write(', ')
+            self.visit(node.inst)
+            if not node.tback:
+                return
+            self._write(', ')
+            self.visit(node.tback)
+    else:
+        # Raise(expr? exc from expr? cause)
+        def visit_Raise(self, node):
+            self._new_line()
+            self._write('raise')
+            if not node.exc:
+                return
+            self._write(' ')
+            self.visit(node.exc)
+            if not node.cause:
+                return
+            self._write(' from ')
+            self.visit(node.cause)
 
     # TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
     def visit_TryExcept(self, node):
@@ -626,6 +646,11 @@
     def visit_Str(self, node):
         self._write(repr(node.s))
 
+    if not IS_PYTHON2:
+        # Bytes(bytes s)
+        def visit_Bytes(self, node):
+            self._write(repr(node.s))
+
     # Attribute(expr value, identifier attr, expr_context ctx)
     def visit_Attribute(self, node):
         self.visit(node.value)
diff -r 4bbd2b021cb5 genshi/template/base.py
--- a/genshi/template/base.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/base.py	Sun Sep 05 14:20:37 2010 +0200
@@ -15,9 +15,9 @@
 
 from collections import deque
 import os
-from StringIO import StringIO
 import sys
 
+from genshi.compat import StringIO, BytesIO
 from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
 from genshi.input import ParseError
 
@@ -398,10 +398,11 @@
         self._init_loader()
         self._prepared = False
 
-        if isinstance(source, basestring):
-            source = StringIO(source)
-        else:
-            source = source
+        if not isinstance(source, Stream) and not hasattr(source, 'read'):
+            if isinstance(source, unicode):
+                source = StringIO(source)
+            else:
+                source = BytesIO(source)
         try:
             self._stream = self._parse(source, encoding)
         except ParseError, e:
diff -r 4bbd2b021cb5 genshi/template/directives.py
--- a/genshi/template/directives.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/directives.py	Sun Sep 05 14:20:37 2010 +0200
@@ -622,13 +622,13 @@
         if not info:
             raise TemplateRuntimeError('"when" directives can only be used '
                                        'inside a "choose" directive',
-                                       self.filename, *stream.next()[2][1:])
+                                       self.filename, *(stream.next())[2][1:])
         if info[0]:
             return []
         if not self.expr and not info[1]:
             raise TemplateRuntimeError('either "choose" or "when" directive '
                                        'must have a test expression',
-                                       self.filename, *stream.next()[2][1:])
+                                       self.filename, *(stream.next())[2][1:])
         if info[1]:
             value = info[2]
             if self.expr:
@@ -661,7 +661,7 @@
         if not info:
             raise TemplateRuntimeError('an "otherwise" directive can only be '
                                        'used inside a "choose" directive',
-                                       self.filename, *stream.next()[2][1:])
+                                       self.filename, *(stream.next())[2][1:])
         if info[0]:
             return []
         info[0] = True
diff -r 4bbd2b021cb5 genshi/template/eval.py
--- a/genshi/template/eval.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/eval.py	Sun Sep 05 14:20:37 2010 +0200
@@ -24,6 +24,8 @@
 from genshi.template.base import TemplateRuntimeError
 from genshi.util import flatten
 
+from genshi.compat import get_code_params, build_code_chunk, IS_PYTHON2
+
 __all__ = ['Code', 'Expression', 'Suite', 'LenientLookup', 'StrictLookup',
            'Undefined', 'UndefinedError']
 __docformat__ = 'restructuredtext en'
@@ -98,10 +100,7 @@
     def __getstate__(self):
         state = {'source': self.source, 'ast': self.ast,
                  'lookup': self._globals.im_self}
-        c = self.code
-        state['code'] = (c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code,
-                         c.co_consts, c.co_names, c.co_varnames, c.co_filename,
-                         c.co_name, c.co_firstlineno, c.co_lnotab, (), ())
+        state['code'] = get_code_params(self.code)
         return state
 
     def __setstate__(self, state):
@@ -236,15 +235,17 @@
     of that variable, will raise an exception that includes the name used to
     reference that undefined variable.
     
-    >>> foo('bar')
-    Traceback (most recent call last):
-        ...
-    UndefinedError: "foo" not defined
+    >>> try:
+    ...     foo('bar')
+    ... except UndefinedError, e:
+    ...     print e.msg
+    "foo" not defined
 
-    >>> foo.bar
-    Traceback (most recent call last):
-        ...
-    UndefinedError: "foo" not defined
+    >>> try:
+    ...     foo.bar
+    ... except UndefinedError, e:
+    ...     print e.msg
+    "foo" not defined
     
     :see: `LenientLookup`
     """
@@ -388,19 +389,21 @@
     raise an ``UndefinedError``:
     
     >>> expr = Expression('nothing', lookup='strict')
-    >>> expr.evaluate({})
-    Traceback (most recent call last):
-        ...
-    UndefinedError: "nothing" not defined
+    >>> try:
+    ...     expr.evaluate({})
+    ... except UndefinedError, e:
+    ...     print e.msg
+    "nothing" not defined
     
     The same happens when a non-existing attribute or item is accessed on an
     existing object:
     
     >>> expr = Expression('something.nil', lookup='strict')
-    >>> expr.evaluate({'something': dict()})
-    Traceback (most recent call last):
-        ...
-    UndefinedError: {} has no member named "nil"
+    >>> try:
+    ...     expr.evaluate({'something': dict()})
+    ... except UndefinedError, e:
+    ...     print e.msg
+    {} has no member named "nil"
     """
 
     @classmethod
@@ -421,17 +424,22 @@
                 rest = '\n'.join(['    %s' % line for line in rest.splitlines()])
             source = '\n'.join([first, rest])
     if isinstance(source, unicode):
-        source = '\xef\xbb\xbf' + source.encode('utf-8')
+        source = (u'\ufeff' + source).encode('utf-8')
     return parse(source, mode)
 
 
 def _compile(node, source=None, mode='eval', filename=None, lineno=-1,
              xform=None):
-    if isinstance(filename, unicode):
-        # unicode file names not allowed for code objects
-        filename = filename.encode('utf-8', 'replace')
-    elif not filename:
+    if not filename:
         filename = '<string>'
+    if IS_PYTHON2:
+        # Python 2 requires non-unicode filenames
+        if isinstance(filename, unicode):
+            filename = filename.encode('utf-8', 'replace')
+    else:
+        # Python 3 requires unicode filenames
+        if not isinstance(filename, unicode):
+            filename = filename.decode('utf-8', 'replace')
     if lineno <= 0:
         lineno = 1
 
@@ -458,10 +466,7 @@
     try:
         # We'd like to just set co_firstlineno, but it's readonly. So we need
         # to clone the code object while adjusting the line number
-        return CodeType(0, code.co_nlocals, code.co_stacksize,
-                        code.co_flags | 0x0040, code.co_code, code.co_consts,
-                        code.co_names, code.co_varnames, filename, name,
-                        lineno, code.co_lnotab, (), ())
+        return build_code_chunk(code, filename, name, lineno)
     except RuntimeError:
         return code
 
@@ -493,6 +498,8 @@
     def _extract_names(self, node):
         names = set()
         def _process(node):
+            if not IS_PYTHON2 and isinstance(node, _ast.arg):
+                names.add(node.arg)
             if isinstance(node, _ast.Name):
                 names.add(node.id)
             elif isinstance(node, _ast.alias):
@@ -513,7 +520,7 @@
         return names
 
     def visit_Str(self, node):
-        if isinstance(node.s, str):
+        if not isinstance(node.s, unicode):
             try: # If the string is ASCII, return a `str` object
                 node.s.decode('ascii')
             except ValueError: # Otherwise return a `unicode` object
diff -r 4bbd2b021cb5 genshi/template/loader.py
--- a/genshi/template/loader.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/loader.py	Sun Sep 05 14:20:37 2010 +0200
@@ -46,7 +46,7 @@
     
     >>> import tempfile
     >>> fd, path = tempfile.mkstemp(suffix='.html', prefix='template')
-    >>> os.write(fd, '<p>$var</p>')
+    >>> os.write(fd, u'<p>$var</p>'.encode('utf-8'))
     11
     >>> os.close(fd)
     
@@ -283,7 +283,7 @@
         """
         def _load_from_directory(filename):
             filepath = os.path.join(path, filename)
-            fileobj = open(filepath, 'U')
+            fileobj = open(filepath, 'rbU')
             mtime = os.path.getmtime(filepath)
             def _uptodate():
                 return mtime == os.path.getmtime(filepath)
diff -r 4bbd2b021cb5 genshi/template/plugin.py
--- a/genshi/template/plugin.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/plugin.py	Sun Sep 05 14:20:37 2010 +0200
@@ -44,7 +44,7 @@
             options = {}
         self.options = options
 
-        self.default_encoding = options.get('genshi.default_encoding', 'utf-8')
+        self.default_encoding = options.get('genshi.default_encoding', None)
         auto_reload = options.get('genshi.auto_reload', '1')
         if isinstance(auto_reload, basestring):
             auto_reload = auto_reload.lower() in ('1', 'on', 'yes', 'true')
diff -r 4bbd2b021cb5 genshi/template/tests/directives.py
--- a/genshi/template/tests/directives.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/tests/directives.py	Sun Sep 05 14:20:37 2010 +0200
@@ -1137,8 +1137,8 @@
           <py:with vars="x = x * 2; y = x / 2;">${x} ${y}</py:with>
         </div>""")
         self.assertEqual("""<div>
-          84 42
-        </div>""", tmpl.generate(x=42).render(encoding=None))
+          84 %s
+        </div>""" % (84 / 2), tmpl.generate(x=42).render(encoding=None))
 
     def test_semicolon_escape(self):
         tmpl = MarkupTemplate("""<div xmlns:py="http://genshi.edgewall.org/">
diff -r 4bbd2b021cb5 genshi/template/tests/eval.py
--- a/genshi/template/tests/eval.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/tests/eval.py	Sun Sep 05 14:20:37 2010 +0200
@@ -14,7 +14,6 @@
 import doctest
 import os
 import pickle
-from StringIO import StringIO
 import sys
 from tempfile import mkstemp
 import unittest
@@ -23,6 +22,7 @@
 from genshi.template.base import Context
 from genshi.template.eval import Expression, Suite, Undefined, UndefinedError, \
                                  UNDEFINED
+from genshi.compat import BytesIO, IS_PYTHON2, wrapped_bytes
 
 
 class ExpressionTestCase(unittest.TestCase):
@@ -39,7 +39,7 @@
 
     def test_pickle(self):
         expr = Expression('1 < 2')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(expr, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
@@ -58,7 +58,8 @@
     def test_str_literal(self):
         self.assertEqual('foo', Expression('"foo"').evaluate({}))
         self.assertEqual('foo', Expression('"""foo"""').evaluate({}))
-        self.assertEqual('foo', Expression("'foo'").evaluate({}))
+        self.assertEqual(u'foo'.encode('utf-8'),
+                         Expression(wrapped_bytes("b'foo'")).evaluate({}))
         self.assertEqual('foo', Expression("'''foo'''").evaluate({}))
         self.assertEqual('foo', Expression("u'foo'").evaluate({}))
         self.assertEqual('foo', Expression("r'foo'").evaluate({}))
@@ -68,14 +69,23 @@
         self.assertEqual(u'þ', expr.evaluate({}))
         expr = Expression("u'\xfe'")
         self.assertEqual(u'þ', expr.evaluate({}))
-        expr = Expression("'\xc3\xbe'")
-        self.assertEqual(u'þ', expr.evaluate({}))
+        # On Python2 strings are converted to unicode if they contained
+        # non-ASCII characters.
+        # On Py3k, we have no need to do this as non-prefixed strings aren't
+        # raw.
+        expr = Expression(wrapped_bytes(r"b'\xc3\xbe'"))
+        if IS_PYTHON2:
+            self.assertEqual(u'þ', expr.evaluate({}))
+        else:
+            self.assertEqual(u'þ'.encode('utf-8'), expr.evaluate({}))
 
     def test_num_literal(self):
         self.assertEqual(42, Expression("42").evaluate({}))
-        self.assertEqual(42L, Expression("42L").evaluate({}))
+        if IS_PYTHON2:
+            self.assertEqual(42L, Expression("42L").evaluate({}))
         self.assertEqual(.42, Expression(".42").evaluate({}))
-        self.assertEqual(07, Expression("07").evaluate({}))
+        if IS_PYTHON2:
+            self.assertEqual(07, Expression("07").evaluate({}))
         self.assertEqual(0xF2, Expression("0xF2").evaluate({}))
         self.assertEqual(0XF2, Expression("0XF2").evaluate({}))
 
@@ -246,12 +256,15 @@
     def test_lambda(self):
         data = {'items': range(5)}
         expr = Expression("filter(lambda x: x > 2, items)")
-        self.assertEqual([3, 4], expr.evaluate(data))
+        self.assertEqual([3, 4], list(expr.evaluate(data)))
 
     def test_lambda_tuple_arg(self):
+        # This syntax goes away in Python 3
+        if not IS_PYTHON2:
+            return
         data = {'items': [(1, 2), (2, 1)]}
         expr = Expression("filter(lambda (x, y): x > y, items)")
-        self.assertEqual([(2, 1)], expr.evaluate(data))
+        self.assertEqual([(2, 1)], list(expr.evaluate(data)))
 
     def test_list_comprehension(self):
         expr = Expression("[n for n in numbers if n < 2]")
@@ -470,7 +483,7 @@
 
     def test_pickle(self):
         suite = Suite('foo = 42')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(suite, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
@@ -645,26 +658,26 @@
         assert 'plain' in data
 
     def test_import(self):
-        suite = Suite("from itertools import ifilter")
+        suite = Suite("from itertools import repeat")
         data = {}
         suite.execute(data)
-        assert 'ifilter' in data
+        assert 'repeat' in data
 
     def test_import_star(self):
         suite = Suite("from itertools import *")
         data = Context()
         suite.execute(data)
-        assert 'ifilter' in data
+        assert 'repeat' in data
 
     def test_import_in_def(self):
         suite = Suite("""def fun():
-    from itertools import ifilter
-    return ifilter(None, range(3))
+    from itertools import repeat
+    return repeat(1, 3)
 """)
         data = Context()
         suite.execute(data)
-        assert 'ifilter' not in data
-        self.assertEqual([1, 2], list(data['fun']()))
+        assert 'repeat' not in data
+        self.assertEqual([1, 1, 1], list(data['fun']()))
 
     def test_for(self):
         suite = Suite("""x = []
@@ -766,7 +779,7 @@
         self.assertEqual("foo", d["k"])
 
     def test_exec(self):
-        suite = Suite("x = 1; exec d['k']; assert x == 42, x")
+        suite = Suite("x = 1; exec(d['k']); assert x == 42, x")
         suite.execute({"d": {"k": "x = 42"}})
 
     def test_return(self):
@@ -828,7 +841,8 @@
 
         def test_yield_expression(self):
             d = {}
-            suite = Suite("""results = []
+            suite = Suite("""from genshi.compat import next
+results = []
 def counter(maximum):
     i = 0
     while i < maximum:
@@ -838,9 +852,9 @@
         else:
             i += 1
 it = counter(5)
-results.append(it.next())
+results.append(next(it))
 results.append(it.send(3))
-results.append(it.next())
+results.append(next(it))
 """)
             suite.execute(d)
             self.assertEqual([0, 3, 4], d['results'])
diff -r 4bbd2b021cb5 genshi/template/tests/loader.py
--- a/genshi/template/tests/loader.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/tests/loader.py	Sun Sep 05 14:20:37 2010 +0200
@@ -347,7 +347,7 @@
         assert 'tmpl2.html' not in loader._cache
 
     def test_load_with_default_encoding(self):
-        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb')
         try:
             f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
         finally:
@@ -356,7 +356,7 @@
         loader.load('tmpl.html')
 
     def test_load_with_explicit_encoding(self):
-        f = open(os.path.join(self.dirname, 'tmpl.html'), 'w')
+        f = open(os.path.join(self.dirname, 'tmpl.html'), 'wb')
         try:
             f.write(u'<div>\xf6</div>'.encode('iso-8859-1'))
         finally:
diff -r 4bbd2b021cb5 genshi/template/tests/markup.py
--- a/genshi/template/tests/markup.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/tests/markup.py	Sun Sep 05 14:20:37 2010 +0200
@@ -15,11 +15,11 @@
 import os
 import pickle
 import shutil
-from StringIO import StringIO
 import sys
 import tempfile
 import unittest
 
+from genshi.compat import BytesIO, StringIO
 from genshi.core import Markup
 from genshi.input import XML
 from genshi.template.base import BadDirectiveError, TemplateSyntaxError
@@ -43,7 +43,7 @@
     def test_pickle(self):
         stream = XML('<root>$var</root>')
         tmpl = MarkupTemplate(stream)
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(tmpl, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
diff -r 4bbd2b021cb5 genshi/template/tests/plugin.py
--- a/genshi/template/tests/plugin.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/tests/plugin.py	Sun Sep 05 14:20:37 2010 +0200
@@ -30,7 +30,7 @@
 
     def test_init_no_options(self):
         plugin = MarkupTemplateEnginePlugin()
-        self.assertEqual('utf-8', plugin.default_encoding)
+        self.assertEqual(None, plugin.default_encoding)
         self.assertEqual('html', plugin.default_format)
         self.assertEqual(None, plugin.default_doctype)
 
@@ -165,7 +165,7 @@
     def test_helper_functions(self):
         plugin = MarkupTemplateEnginePlugin()
         tmpl = plugin.load_template(PACKAGE + '.templates.functions')
-        output = plugin.render({'snippet': '<b>Foo</b>'}, template=tmpl)
+        output = plugin.render({'snippet': u'<b>Foo</b>'}, template=tmpl)
         self.assertEqual("""<div>
 False
 bar
@@ -178,7 +178,7 @@
 
     def test_init_no_options(self):
         plugin = TextTemplateEnginePlugin()
-        self.assertEqual('utf-8', plugin.default_encoding)
+        self.assertEqual(None, plugin.default_encoding)
         self.assertEqual('text', plugin.default_format)
 
         self.assertEqual([], plugin.loader.search_path)
diff -r 4bbd2b021cb5 genshi/template/text.py
--- a/genshi/template/text.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/template/text.py	Sun Sep 05 14:20:37 2010 +0200
@@ -162,7 +162,7 @@
         depth = 0
 
         source = source.read()
-        if isinstance(source, str):
+        if not isinstance(source, unicode):
             source = source.decode(encoding or 'utf-8', 'replace')
         offset = 0
         lineno = 1
@@ -279,7 +279,7 @@
         depth = 0
 
         source = source.read()
-        if isinstance(source, str):
+        if not isinstance(source, unicode):
             source = source.decode(encoding or 'utf-8', 'replace')
         offset = 0
         lineno = 1
diff -r 4bbd2b021cb5 genshi/tests/core.py
--- a/genshi/tests/core.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/tests/core.py	Sun Sep 05 14:20:37 2010 +0200
@@ -13,37 +13,34 @@
 
 import doctest
 import pickle
-from StringIO import StringIO
-try:
-    from cStringIO import StringIO as cStringIO
-except ImportError:
-    cStringIO = StringIO
 import unittest
 
 from genshi import core
 from genshi.core import Markup, Attrs, Namespace, QName, escape, unescape
 from genshi.input import XML, ParseError
+from genshi.compat import StringIO, BytesIO
 
 
 class StreamTestCase(unittest.TestCase):
 
     def test_render_utf8(self):
         xml = XML('<li>Über uns</li>')
-        self.assertEqual('<li>Über uns</li>', xml.render())
+        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), xml.render(encoding='utf-8'))
 
     def test_render_unicode(self):
         xml = XML('<li>Über uns</li>')
+        self.assertEqual(u'<li>Über uns</li>', xml.render())
         self.assertEqual(u'<li>Über uns</li>', xml.render(encoding=None))
 
     def test_render_ascii(self):
         xml = XML('<li>Über uns</li>')
-        self.assertEqual('<li>&#220;ber uns</li>', xml.render(encoding='ascii'))
+        self.assertEqual(u'<li>&#220;ber uns</li>'.encode('ascii'), xml.render(encoding='ascii'))
 
     def test_render_output_stream_utf8(self):
         xml = XML('<li>Über uns</li>')
-        strio = cStringIO()
-        self.assertEqual(None, xml.render(out=strio))
-        self.assertEqual('<li>Über uns</li>', strio.getvalue())
+        strio = BytesIO()
+        self.assertEqual(None, xml.render(encoding='utf-8', out=strio))
+        self.assertEqual(u'<li>Über uns</li>'.encode('utf-8'), strio.getvalue())
 
     def test_render_output_stream_unicode(self):
         xml = XML('<li>Über uns</li>')
@@ -53,7 +50,7 @@
 
     def test_pickle(self):
         xml = XML('<li>Foo</li>')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(xml, buf, 2)
         buf.seek(0)
         xml = pickle.load(buf)
@@ -63,8 +60,9 @@
 class MarkupTestCase(unittest.TestCase):
 
     def test_new_with_encoding(self):
-        markup = Markup('Döner', encoding='utf-8')
-        self.assertEquals("<Markup u'D\\xf6ner'>", repr(markup))
+        markup = Markup(u'Döner'.encode('utf-8'), encoding='utf-8')
+        # mimic Markup.__repr__ when constructing output for Python 2/3 compatibility
+        self.assertEquals("<Markup %r>" % u'D\u00f6ner', repr(markup))
 
     def test_repr(self):
         markup = Markup('foo')
@@ -158,7 +156,7 @@
 
     def test_pickle(self):
         markup = Markup('foo')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(markup, buf, 2)
         buf.seek(0)
         self.assertEquals("<Markup u'foo'>", repr(pickle.load(buf)))
@@ -168,7 +166,7 @@
 
     def test_pickle(self):
         attrs = Attrs([("attr1", "foo"), ("attr2", "bar")])
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(attrs, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
@@ -196,7 +194,7 @@
 
     def test_pickle(self):
         ns = Namespace('http://www.example.org/namespace')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(ns, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
@@ -209,7 +207,7 @@
 
     def test_pickle(self):
         qname = QName('http://www.example.org/namespace}elem')
-        buf = StringIO()
+        buf = BytesIO()
         pickle.dump(qname, buf, 2)
         buf.seek(0)
         unpickled = pickle.load(buf)
diff -r 4bbd2b021cb5 genshi/tests/input.py
--- a/genshi/tests/input.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/tests/input.py	Sun Sep 05 14:20:37 2010 +0200
@@ -12,12 +12,12 @@
 # history and logs, available at http://genshi.edgewall.org/log/.
 
 import doctest
-from StringIO import StringIO
 import sys
 import unittest
 
 from genshi.core import Attrs, Stream
 from genshi.input import XMLParser, HTMLParser, ParseError
+from genshi.compat import StringIO, BytesIO
 
 
 class XMLParserTestCase(unittest.TestCase):
@@ -59,7 +59,7 @@
 
     def test_latin1_encoded(self):
         text = u'<div>\xf6</div>'.encode('iso-8859-1')
-        events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
+        events = list(XMLParser(BytesIO(text), encoding='iso-8859-1'))
         kind, data, pos = events[1]
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xf6', data)
@@ -68,7 +68,7 @@
         text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
         <div>\xf6</div>
         """.encode('iso-8859-1')
-        events = list(XMLParser(StringIO(text)))
+        events = list(XMLParser(BytesIO(text)))
         kind, data, pos = events[2]
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xf6', data)
@@ -116,7 +116,7 @@
 class HTMLParserTestCase(unittest.TestCase):
 
     def test_text_node_pos_single_line(self):
-        text = '<elem>foo bar</elem>'
+        text = u'<elem>foo bar</elem>'
         events = list(HTMLParser(StringIO(text)))
         kind, data, pos = events[1]
         self.assertEqual(Stream.TEXT, kind)
@@ -124,7 +124,7 @@
         self.assertEqual((None, 1, 6), pos)
 
     def test_text_node_pos_multi_line(self):
-        text = '''<elem>foo
+        text = u'''<elem>foo
 bar</elem>'''
         events = list(HTMLParser(StringIO(text)))
         kind, data, pos = events[1]
@@ -134,14 +134,14 @@
 
     def test_input_encoding_text(self):
         text = u'<div>\xf6</div>'.encode('iso-8859-1')
-        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
         kind, data, pos = events[1]
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xf6', data)
 
     def test_input_encoding_attribute(self):
         text = u'<div title="\xf6"></div>'.encode('iso-8859-1')
-        events = list(HTMLParser(StringIO(text), encoding='iso-8859-1'))
+        events = list(HTMLParser(BytesIO(text), encoding='iso-8859-1'))
         kind, (tag, attrib), pos = events[0]
         self.assertEqual(Stream.START, kind)
         self.assertEqual(u'\xf6', attrib.get('title'))
@@ -154,7 +154,7 @@
         self.assertEqual(u'\u2013', data)
 
     def test_html_entity_in_attribute(self):
-        text = '<p title="&nbsp;"></p>'
+        text = u'<p title="&nbsp;"></p>'
         events = list(HTMLParser(StringIO(text)))
         kind, data, pos = events[0]
         self.assertEqual(Stream.START, kind)
@@ -163,14 +163,14 @@
         self.assertEqual(Stream.END, kind)
 
     def test_html_entity_in_text(self):
-        text = '<p>&nbsp;</p>'
+        text = u'<p>&nbsp;</p>'
         events = list(HTMLParser(StringIO(text)))
         kind, data, pos = events[1]
         self.assertEqual(Stream.TEXT, kind)
         self.assertEqual(u'\xa0', data)
 
     def test_processing_instruction(self):
-        text = '<?php echo "Foobar" ?>'
+        text = u'<?php echo "Foobar" ?>'
         events = list(HTMLParser(StringIO(text)))
         kind, (target, data), pos = events[0]
         self.assertEqual(Stream.PI, kind)
@@ -205,7 +205,7 @@
         self.assertEqual(1, standalone)
 
     def test_processing_instruction_trailing_qmark(self):
-        text = '<?php echo "Foobar" ??>'
+        text = u'<?php echo "Foobar" ??>'
         events = list(HTMLParser(StringIO(text)))
         kind, (target, data), pos = events[0]
         self.assertEqual(Stream.PI, kind)
@@ -213,7 +213,7 @@
         self.assertEqual('echo "Foobar" ?', data)
 
     def test_out_of_order_tags1(self):
-        text = '<span><b>Foobar</span></b>'
+        text = u'<span><b>Foobar</span></b>'
         events = list(HTMLParser(StringIO(text)))
         self.assertEqual(5, len(events))
         self.assertEqual((Stream.START, ('span', ())), events[0][:2])
@@ -223,8 +223,8 @@
         self.assertEqual((Stream.END, 'span'), events[4][:2])
 
     def test_out_of_order_tags2(self):
-        text = '<span class="baz"><b><i>Foobar</span></b></i>'
-        events = list(HTMLParser(StringIO(text)))
+        text = u'<span class="baz"><b><i>Foobar</span></b></i>'.encode('utf-8')
+        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
         self.assertEqual(7, len(events))
         self.assertEqual((Stream.START, ('span', Attrs([('class', 'baz')]))),
                          events[0][:2])
@@ -236,8 +236,8 @@
         self.assertEqual((Stream.END, 'span'), events[6][:2])
 
     def test_out_of_order_tags3(self):
-        text = '<span><b>Foobar</i>'
-        events = list(HTMLParser(StringIO(text)))
+        text = u'<span><b>Foobar</i>'.encode('utf-8')
+        events = list(HTMLParser(BytesIO(text), encoding='utf-8'))
         self.assertEqual(5, len(events))
         self.assertEqual((Stream.START, ('span', ())), events[0][:2])
         self.assertEqual((Stream.START, ('b', ())), events[1][:2])
@@ -246,7 +246,7 @@
         self.assertEqual((Stream.END, 'span'), events[4][:2])
 
     def test_hex_charref(self):
-        text = '<span>&#x27;</span>'
+        text = u'<span>&#x27;</span>'
         events = list(HTMLParser(StringIO(text)))
         self.assertEqual(3, len(events))
         self.assertEqual((Stream.START, ('span', ())), events[0][:2])
diff -r 4bbd2b021cb5 genshi/tests/output.py
--- a/genshi/tests/output.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/tests/output.py	Sun Sep 05 14:20:37 2010 +0200
@@ -356,7 +356,7 @@
         </div>""", output)
 
     def test_html5_doctype(self):
-        stream = HTML('<html></html>')
+        stream = HTML(u'<html></html>')
         output = stream.render(XHTMLSerializer, doctype=DocType.HTML5,
                                encoding=None)
         self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
@@ -427,7 +427,7 @@
         </style>""", output)
 
     def test_html5_doctype(self):
-        stream = HTML('<html></html>')
+        stream = HTML(u'<html></html>')
         output = stream.render(HTMLSerializer, doctype=DocType.HTML5,
                                encoding=None)
         self.assertEqual('<!DOCTYPE html>\n<html></html>', output)
diff -r 4bbd2b021cb5 genshi/util.py
--- a/genshi/util.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/genshi/util.py	Sun Sep 05 14:20:37 2010 +0200
@@ -15,6 +15,9 @@
 
 import htmlentitydefs as entities
 import re
+import sys
+
+from compat import any, all, stringrepr
 
 __docformat__ = 'restructuredtext en'
 
@@ -246,29 +249,3 @@
     """
     return _STRIPTAGS_RE.sub('', text)
 
-
-def stringrepr(string):
-    ascii = string.encode('ascii', 'backslashreplace')
-    quoted = "'" +  ascii.replace("'", "\\'") + "'"
-    if len(ascii) > len(string):
-        return 'u' + quoted
-    return quoted
-
-
-# Compatibility fallback implementations for older Python versions
-
-try:
-    all = all
-    any = any
-except NameError:
-    def any(S):
-        for x in S:
-            if x:
-               return True
-        return False
-
-    def all(S):
-        for x in S:
-            if not x:
-               return False
-        return True
diff -r 4bbd2b021cb5 setup.py
--- a/setup.py	Sat Sep 04 09:10:29 2010 +0200
+++ b/setup.py	Sun Sep 05 14:20:37 2010 +0200
@@ -41,7 +41,8 @@
     def run(self):
         try:
             build_ext.run(self)
-        except DistutilsPlatformError, e:
+        except DistutilsPlatformError:
+            _etype, e, _tb = sys.exc_info()
             self._unavailable(e)
 
     def build_extension(self, ext):
@@ -49,7 +50,8 @@
             build_ext.build_extension(self, ext)
             global _speedup_available
             _speedup_available = True
-        except CCompilerError, e:
+        except CCompilerError:
+            _etype, e, _tb = sys.exc_info()
             self._unavailable(e)
 
     def _unavailable(self, exc):
@@ -86,6 +88,25 @@
     cmdclass['bdist_egg'] = my_bdist_egg
 
 
+# Use 2to3 if we're running under Python 3 (with Distribute)
+extra = {}
+if sys.version_info >= (3,):
+    extra['use_2to3'] = True
+    extra['convert_2to3_doctests'] = []
+    extra['use_2to3_fixers'] = ['fixes']
+    # include tests for python3 setup.py test
+    packages = [
+        'genshi', 'genshi.filters', 'genshi.template',
+        'genshi.tests', 'genshi.filters.tests',
+        'genshi.template.tests',
+        'genshi.template.tests.templates',
+    ]
+    # Install genshi template tests
+    extra['include_package_data'] = True
+else:
+    packages = ['genshi', 'genshi.filters', 'genshi.template']
+
+
 setup(
     name = 'Genshi',
     version = '0.7',
@@ -114,7 +135,7 @@
         'Topic :: Text Processing :: Markup :: XML'
     ],
     keywords = ['python.templating.engines'],
-    packages = ['genshi', 'genshi.filters', 'genshi.template'],
+    packages = packages,
     test_suite = 'genshi.tests.suite',
 
     extras_require = {
@@ -132,5 +153,7 @@
     """,
 
     features = {'speedups': speedups},
-    cmdclass = cmdclass
+    cmdclass = cmdclass,
+
+    **extra
 )

