first comit

2024-02-23 10:30:02 +00:00
commit ddeb07d0ba
12482 changed files with 1857507 additions and 0 deletions
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+    CSS Selectors based on XPath
+    ============================
+
+    This module supports selecting XML/HTML elements based on CSS selectors.
+    See the `CSSSelector` class for details.
+
+
+    :copyright: (c) 2007-2012 Ian Bicking and contributors.
+                See AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+
+"""
+
+from cssselect.parser import (
+    parse,
+    Selector,
+    FunctionalPseudoElement,
+    SelectorError,
+    SelectorSyntaxError,
+)
+from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
+
+__all__ = (
+    "ExpressionError",
+    "FunctionalPseudoElement",
+    "GenericTranslator",
+    "HTMLTranslator",
+    "parse",
+    "Selector",
+    "SelectorError",
+    "SelectorSyntaxError",
+)
+
+VERSION = "1.2.0"
+__version__ = VERSION
@@ -0,0 +1,889 @@
+# -*- coding: utf-8 -*-
+"""
+    cssselect.xpath
+    ===============
+
+    Translation of parsed CSS selectors to XPath expressions.
+
+
+    :copyright: (c) 2007-2012 Ian Bicking and contributors.
+                See AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+
+"""
+
+import re
+import typing
+import warnings
+from typing import Optional
+
+from cssselect.parser import (
+    parse,
+    parse_series,
+    PseudoElement,
+    Selector,
+    SelectorError,
+    Tree,
+    Element,
+    Hash,
+    Class,
+    Function,
+    Pseudo,
+    Attrib,
+    Negation,
+    Relation,
+    Matching,
+    SpecificityAdjustment,
+    CombinedSelector,
+)
+
+
+@typing.no_type_check
+def _unicode_safe_getattr(obj, name, default=None):
+    warnings.warn(
+        "_unicode_safe_getattr is deprecated and will be removed in the"
+        " next release, use getattr() instead",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return getattr(obj, name, default)
+
+
+class ExpressionError(SelectorError, RuntimeError):
+    """Unknown or unsupported selector (eg. pseudo-class)."""
+
+
+#### XPath Helpers
+
+
+class XPathExpr:
+    def __init__(
+        self, path: str = "", element: str = "*", condition: str = "", star_prefix: bool = False
+    ) -> None:
+        self.path = path
+        self.element = element
+        self.condition = condition
+
+    def __str__(self) -> str:
+        path = str(self.path) + str(self.element)
+        if self.condition:
+            path += "[%s]" % self.condition
+        return path
+
+    def __repr__(self) -> str:
+        return "%s[%s]" % (self.__class__.__name__, self)
+
+    def add_condition(self, condition: str, conjuction: str = "and") -> "XPathExpr":
+        if self.condition:
+            self.condition = "(%s) %s (%s)" % (self.condition, conjuction, condition)
+        else:
+            self.condition = condition
+        return self
+
+    def add_name_test(self) -> None:
+        if self.element == "*":
+            # We weren't doing a test anyway
+            return
+        self.add_condition("name() = %s" % GenericTranslator.xpath_literal(self.element))
+        self.element = "*"
+
+    def add_star_prefix(self) -> None:
+        """
+        Append '*/' to the path to keep the context constrained
+        to a single parent.
+        """
+        self.path += "*/"
+
+    def join(
+        self,
+        combiner: str,
+        other: "XPathExpr",
+        closing_combiner: Optional[str] = None,
+        has_inner_condition: bool = False,
+    ) -> "XPathExpr":
+        path = str(self) + combiner
+        # Any "star prefix" is redundant when joining.
+        if other.path != "*/":
+            path += other.path
+        self.path = path
+        if not has_inner_condition:
+            self.element = other.element + closing_combiner if closing_combiner else other.element
+            self.condition = other.condition
+        else:
+            self.element = other.element
+            if other.condition:
+                self.element += "[" + other.condition + "]"
+            if closing_combiner:
+                self.element += closing_combiner
+        return self
+
+
+split_at_single_quotes = re.compile("('+)").split
+
+# The spec is actually more permissive than that, but don’t bother.
+# This is just for the fast path.
+# http://www.w3.org/TR/REC-xml/#NT-NameStartChar
+is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
+
+# Test that the string is not empty and does not contain whitespace
+is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
+
+
+#### Translation
+
+
+class GenericTranslator:
+    """
+    Translator for "generic" XML documents.
+
+    Everything is case-sensitive, no assumption is made on the meaning
+    of element names and attribute names.
+
+    """
+
+    ####
+    ####  HERE BE DRAGONS
+    ####
+    ####  You are welcome to hook into this to change some behavior,
+    ####  but do so at your own risks.
+    ####  Until it has received a lot more work and review,
+    ####  I reserve the right to change this API in backward-incompatible ways
+    ####  with any minor version of cssselect.
+    ####  See https://github.com/scrapy/cssselect/pull/22
+    ####  -- Simon Sapin.
+    ####
+
+    combinator_mapping = {
+        " ": "descendant",
+        ">": "child",
+        "+": "direct_adjacent",
+        "~": "indirect_adjacent",
+    }
+
+    attribute_operator_mapping = {
+        "exists": "exists",
+        "=": "equals",
+        "~=": "includes",
+        "|=": "dashmatch",
+        "^=": "prefixmatch",
+        "$=": "suffixmatch",
+        "*=": "substringmatch",
+        "!=": "different",  # XXX Not in Level 3 but meh
+    }
+
+    #: The attribute used for ID selectors depends on the document language:
+    #: http://www.w3.org/TR/selectors/#id-selectors
+    id_attribute = "id"
+
+    #: The attribute used for ``:lang()`` depends on the document language:
+    #: http://www.w3.org/TR/selectors/#lang-pseudo
+    lang_attribute = "xml:lang"
+
+    #: The case sensitivity of document language element names,
+    #: attribute names, and attribute values in selectors depends
+    #: on the document language.
+    #: http://www.w3.org/TR/selectors/#casesens
+    #:
+    #: When a document language defines one of these as case-insensitive,
+    #: cssselect assumes that the document parser makes the parsed values
+    #: lower-case. Making the selector lower-case too makes the comparaison
+    #: case-insensitive.
+    #:
+    #: In HTML, element names and attributes names (but not attribute values)
+    #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
+    #: and HTMLParser make them lower-case in their parse result, so
+    #: the assumption holds.
+    lower_case_element_names = False
+    lower_case_attribute_names = False
+    lower_case_attribute_values = False
+
+    # class used to represent and xpath expression
+    xpathexpr_cls = XPathExpr
+
+    def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
+        """Translate a *group of selectors* to XPath.
+
+        Pseudo-elements are not supported here since XPath only knows
+        about "real" elements.
+
+        :param css:
+            A *group of selectors* as a string.
+        :param prefix:
+            This string is prepended to the XPath expression for each selector.
+            The default makes selectors scoped to the context node’s subtree.
+        :raises:
+            :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
+            :class:`ExpressionError` on unknown/unsupported selectors,
+            including pseudo-elements.
+        :returns:
+            The equivalent XPath 1.0 expression as a string.
+
+        """
+        return " | ".join(
+            self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
+            for selector in parse(css)
+        )
+
+    def selector_to_xpath(
+        self,
+        selector: Selector,
+        prefix: str = "descendant-or-self::",
+        translate_pseudo_elements: bool = False,
+    ) -> str:
+        """Translate a parsed selector to XPath.
+
+
+        :param selector:
+            A parsed :class:`Selector` object.
+        :param prefix:
+            This string is prepended to the resulting XPath expression.
+            The default makes selectors scoped to the context node’s subtree.
+        :param translate_pseudo_elements:
+            Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
+            the :attr:`~Selector.pseudo_element` attribute of the selector
+            is ignored.
+            It is the caller's responsibility to reject selectors
+            with pseudo-elements, or to account for them somehow.
+        :raises:
+            :class:`ExpressionError` on unknown/unsupported selectors.
+        :returns:
+            The equivalent XPath 1.0 expression as a string.
+
+        """
+        tree = getattr(selector, "parsed_tree", None)
+        if not tree:
+            raise TypeError("Expected a parsed selector, got %r" % (selector,))
+        xpath = self.xpath(tree)
+        assert isinstance(xpath, self.xpathexpr_cls)  # help debug a missing 'return'
+        if translate_pseudo_elements and selector.pseudo_element:
+            xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
+        return (prefix or "") + str(xpath)
+
+    def xpath_pseudo_element(self, xpath: XPathExpr, pseudo_element: PseudoElement) -> XPathExpr:
+        """Translate a pseudo-element.
+
+        Defaults to not supporting pseudo-elements at all,
+        but can be overridden by sub-classes.
+
+        """
+        raise ExpressionError("Pseudo-elements are not supported.")
+
+    @staticmethod
+    def xpath_literal(s: str) -> str:
+        s = str(s)
+        if "'" not in s:
+            s = "'%s'" % s
+        elif '"' not in s:
+            s = '"%s"' % s
+        else:
+            s = "concat(%s)" % ",".join(
+                [
+                    (("'" in part) and '"%s"' or "'%s'") % part
+                    for part in split_at_single_quotes(s)
+                    if part
+                ]
+            )
+        return s
+
+    def xpath(self, parsed_selector: Tree) -> XPathExpr:
+        """Translate any parsed selector object."""
+        type_name = type(parsed_selector).__name__
+        method = getattr(self, "xpath_%s" % type_name.lower(), None)
+        if method is None:
+            raise ExpressionError("%s is not supported." % type_name)
+        return typing.cast(XPathExpr, method(parsed_selector))
+
+    # Dispatched by parsed object type
+
+    def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
+        """Translate a combined selector."""
+        combinator = self.combinator_mapping[combined.combinator]
+        method = getattr(self, "xpath_%s_combinator" % combinator)
+        return typing.cast(
+            XPathExpr, method(self.xpath(combined.selector), self.xpath(combined.subselector))
+        )
+
+    def xpath_negation(self, negation: Negation) -> XPathExpr:
+        xpath = self.xpath(negation.selector)
+        sub_xpath = self.xpath(negation.subselector)
+        sub_xpath.add_name_test()
+        if sub_xpath.condition:
+            return xpath.add_condition("not(%s)" % sub_xpath.condition)
+        else:
+            return xpath.add_condition("0")
+
+    def xpath_relation(self, relation: Relation) -> XPathExpr:
+        xpath = self.xpath(relation.selector)
+        combinator = relation.combinator
+        subselector = relation.subselector
+        right = self.xpath(subselector.parsed_tree)
+        method = getattr(
+            self,
+            "xpath_relation_%s_combinator"
+            % self.combinator_mapping[typing.cast(str, combinator.value)],
+        )
+        return typing.cast(XPathExpr, method(xpath, right))
+
+    def xpath_matching(self, matching: Matching) -> XPathExpr:
+        xpath = self.xpath(matching.selector)
+        exprs = [self.xpath(selector) for selector in matching.selector_list]
+        for e in exprs:
+            e.add_name_test()
+            if e.condition:
+                xpath.add_condition(e.condition, "or")
+        return xpath
+
+    def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
+        xpath = self.xpath(matching.selector)
+        exprs = [self.xpath(selector) for selector in matching.selector_list]
+        for e in exprs:
+            e.add_name_test()
+            if e.condition:
+                xpath.add_condition(e.condition, "or")
+        return xpath
+
+    def xpath_function(self, function: Function) -> XPathExpr:
+        """Translate a functional pseudo-class."""
+        method_name = "xpath_%s_function" % function.name.replace("-", "_")
+        method = getattr(self, method_name, None)
+        if not method:
+            raise ExpressionError("The pseudo-class :%s() is unknown" % function.name)
+        return typing.cast(XPathExpr, method(self.xpath(function.selector), function))
+
+    def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
+        """Translate a pseudo-class."""
+        method_name = "xpath_%s_pseudo" % pseudo.ident.replace("-", "_")
+        method = getattr(self, method_name, None)
+        if not method:
+            # TODO: better error message for pseudo-elements?
+            raise ExpressionError("The pseudo-class :%s is unknown" % pseudo.ident)
+        return typing.cast(XPathExpr, method(self.xpath(pseudo.selector)))
+
+    def xpath_attrib(self, selector: Attrib) -> XPathExpr:
+        """Translate an attribute selector."""
+        operator = self.attribute_operator_mapping[selector.operator]
+        method = getattr(self, "xpath_attrib_%s" % operator)
+        if self.lower_case_attribute_names:
+            name = selector.attrib.lower()
+        else:
+            name = selector.attrib
+        safe = is_safe_name(name)
+        if selector.namespace:
+            name = "%s:%s" % (selector.namespace, name)
+            safe = safe and is_safe_name(selector.namespace)
+        if safe:
+            attrib = "@" + name
+        else:
+            attrib = "attribute::*[name() = %s]" % self.xpath_literal(name)
+        if selector.value is None:
+            value = None
+        elif self.lower_case_attribute_values:
+            value = typing.cast(str, selector.value.value).lower()
+        else:
+            value = selector.value.value
+        return typing.cast(XPathExpr, method(self.xpath(selector.selector), attrib, value))
+
+    def xpath_class(self, class_selector: Class) -> XPathExpr:
+        """Translate a class selector."""
+        # .foo is defined as [class~=foo] in the spec.
+        xpath = self.xpath(class_selector.selector)
+        return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
+
+    def xpath_hash(self, id_selector: Hash) -> XPathExpr:
+        """Translate an ID selector."""
+        xpath = self.xpath(id_selector.selector)
+        return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
+
+    def xpath_element(self, selector: Element) -> XPathExpr:
+        """Translate a type or universal selector."""
+        element = selector.element
+        if not element:
+            element = "*"
+            safe = True
+        else:
+            safe = bool(is_safe_name(element))
+            if self.lower_case_element_names:
+                element = element.lower()
+        if selector.namespace:
+            # Namespace prefixes are case-sensitive.
+            # http://www.w3.org/TR/css3-namespace/#prefixes
+            element = "%s:%s" % (selector.namespace, element)
+            safe = safe and bool(is_safe_name(selector.namespace))
+        xpath = self.xpathexpr_cls(element=element)
+        if not safe:
+            xpath.add_name_test()
+        return xpath
+
+    # CombinedSelector: dispatch by combinator
+
+    def xpath_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is a child, grand-child or further descendant of left"""
+        return left.join("/descendant-or-self::*/", right)
+
+    def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is an immediate child of left"""
+        return left.join("/", right)
+
+    def xpath_direct_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is a sibling immediately after left"""
+        xpath = left.join("/following-sibling::", right)
+        xpath.add_name_test()
+        return xpath.add_condition("position() = 1")
+
+    def xpath_indirect_adjacent_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is a sibling after left, immediately or not"""
+        return left.join("/following-sibling::", right)
+
+    def xpath_relation_descendant_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is a child, grand-child or further descendant of left; select left"""
+        return left.join("[descendant::", right, closing_combiner="]", has_inner_condition=True)
+
+    def xpath_relation_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
+        """right is an immediate child of left; select left"""
+        return left.join("[./", right, closing_combiner="]")
+
+    def xpath_relation_direct_adjacent_combinator(
+        self, left: XPathExpr, right: XPathExpr
+    ) -> XPathExpr:
+        """right is a sibling immediately after left; select left"""
+        xpath = left.add_condition(
+            "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element)
+        )
+        return xpath
+
+    def xpath_relation_indirect_adjacent_combinator(
+        self, left: XPathExpr, right: XPathExpr
+    ) -> XPathExpr:
+        """right is a sibling after left, immediately or not; select left"""
+        return left.join("[following-sibling::", right, closing_combiner="]")
+
+    # Function: dispatch by function/pseudo-class name
+
+    def xpath_nth_child_function(
+        self, xpath: XPathExpr, function: Function, last: bool = False, add_name_test: bool = True
+    ) -> XPathExpr:
+        try:
+            a, b = parse_series(function.arguments)
+        except ValueError:
+            raise ExpressionError("Invalid series: '%r'" % function.arguments)
+
+        # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
+        #
+        # :nth-child(an+b)
+        #       an+b-1 siblings before
+        #
+        # :nth-last-child(an+b)
+        #       an+b-1 siblings after
+        #
+        # :nth-of-type(an+b)
+        #       an+b-1 siblings with the same expanded element name before
+        #
+        # :nth-last-of-type(an+b)
+        #       an+b-1 siblings with the same expanded element name after
+        #
+        # So,
+        # for :nth-child and :nth-of-type
+        #
+        #    count(preceding-sibling::<nodetest>) = an+b-1
+        #
+        # for :nth-last-child and :nth-last-of-type
+        #
+        #    count(following-sibling::<nodetest>) = an+b-1
+        #
+        # therefore,
+        #    count(...) - (b-1) ≡ 0 (mod a)
+        #
+        # if a == 0:
+        # ~~~~~~~~~~
+        #    count(...) = b-1
+        #
+        # if a < 0:
+        # ~~~~~~~~~
+        #    count(...) - b +1 <= 0
+        # -> count(...) <= b-1
+        #
+        # if a > 0:
+        # ~~~~~~~~~
+        #    count(...) - b +1 >= 0
+        # -> count(...) >= b-1
+
+        # work with b-1 instead
+        b_min_1 = b - 1
+
+        # early-exit condition 1:
+        # ~~~~~~~~~~~~~~~~~~~~~~~
+        # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
+        # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
+        # there is always an "n" matching any number of siblings (maybe none)
+        if a == 1 and b_min_1 <= 0:
+            return xpath
+
+        # early-exit condition 2:
+        # ~~~~~~~~~~~~~~~~~~~~~~~
+        # an+b-1 siblings with a<0 and (b-1)<0 is not possible
+        if a < 0 and b_min_1 < 0:
+            return xpath.add_condition("0")
+
+        # `add_name_test` boolean is inverted and somewhat counter-intuitive:
+        #
+        # nth_of_type() calls nth_child(add_name_test=False)
+        if add_name_test:
+            nodetest = "*"
+        else:
+            nodetest = "%s" % xpath.element
+
+        # count siblings before or after the element
+        if not last:
+            siblings_count = "count(preceding-sibling::%s)" % nodetest
+        else:
+            siblings_count = "count(following-sibling::%s)" % nodetest
+
+        # special case of fixed position: nth-*(0n+b)
+        # if a == 0:
+        # ~~~~~~~~~~
+        #    count(***-sibling::***) = b-1
+        if a == 0:
+            return xpath.add_condition("%s = %s" % (siblings_count, b_min_1))
+
+        expressions = []
+
+        if a > 0:
+            # siblings count, an+b-1, is always >= 0,
+            # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
+            # therefore, the predicate is only interesting if (b-1)>0
+            if b_min_1 > 0:
+                expressions.append("%s >= %s" % (siblings_count, b_min_1))
+        else:
+            # if a<0, and (b-1)<0, no "n" satisfies this,
+            # this is tested above as an early exist condition
+            # otherwise,
+            expressions.append("%s <= %s" % (siblings_count, b_min_1))
+
+        # operations modulo 1 or -1 are simpler, one only needs to verify:
+        #
+        # - either:
+        # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
+        #   i.e. count(***-sibling::***) >= (b-1)
+        #
+        # - or:
+        # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
+        #   i.e. count(***-sibling::***) <= (b-1)
+        # we we just did above.
+        #
+        if abs(a) != 1:
+            # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
+            left = siblings_count
+
+            # apply "modulo a" on 2nd term, -(b-1),
+            # to simplify things like "(... +6) % -3",
+            # and also make it positive with |a|
+            b_neg = (-b_min_1) % abs(a)
+
+            if b_neg != 0:
+                b_neg_as_str = "+%s" % b_neg
+                left = "(%s %s)" % (left, b_neg_as_str)
+
+            expressions.append("%s mod %s = 0" % (left, a))
+
+        if len(expressions) > 1:
+            template = "(%s)"
+        else:
+            template = "%s"
+        xpath.add_condition(" and ".join(template % expression for expression in expressions))
+        return xpath
+
+    def xpath_nth_last_child_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        return self.xpath_nth_child_function(xpath, function, last=True)
+
+    def xpath_nth_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        if xpath.element == "*":
+            raise ExpressionError("*:nth-of-type() is not implemented")
+        return self.xpath_nth_child_function(xpath, function, add_name_test=False)
+
+    def xpath_nth_last_of_type_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        if xpath.element == "*":
+            raise ExpressionError("*:nth-of-type() is not implemented")
+        return self.xpath_nth_child_function(xpath, function, last=True, add_name_test=False)
+
+    def xpath_contains_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        # Defined there, removed in later drafts:
+        # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
+        if function.argument_types() not in (["STRING"], ["IDENT"]):
+            raise ExpressionError(
+                "Expected a single string or ident for :contains(), got %r" % function.arguments
+            )
+        value = typing.cast(str, function.arguments[0].value)
+        return xpath.add_condition("contains(., %s)" % self.xpath_literal(value))
+
+    def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        if function.argument_types() not in (["STRING"], ["IDENT"]):
+            raise ExpressionError(
+                "Expected a single string or ident for :lang(), got %r" % function.arguments
+            )
+        value = typing.cast(str, function.arguments[0].value)
+        return xpath.add_condition("lang(%s)" % (self.xpath_literal(value)))
+
+    # Pseudo: dispatch by pseudo-class name
+
+    def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("not(parent::*)")
+
+    # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
+    # Works only at the start of a selector
+    # Needed to get immediate children of a processed selector in Scrapy
+    # for product in response.css('.product'):
+    #     description = product.css(':scope > div::text').get()
+    def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("1")
+
+    def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("count(preceding-sibling::*) = 0")
+
+    def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("count(following-sibling::*) = 0")
+
+    def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        if xpath.element == "*":
+            raise ExpressionError("*:first-of-type is not implemented")
+        return xpath.add_condition("count(preceding-sibling::%s) = 0" % xpath.element)
+
+    def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        if xpath.element == "*":
+            raise ExpressionError("*:last-of-type is not implemented")
+        return xpath.add_condition("count(following-sibling::%s) = 0" % xpath.element)
+
+    def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("count(parent::*/child::*) = 1")
+
+    def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        if xpath.element == "*":
+            raise ExpressionError("*:only-of-type is not implemented")
+        return xpath.add_condition("count(parent::*/child::%s) = 1" % xpath.element)
+
+    def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
+        return xpath.add_condition("not(*) and not(string-length())")
+
+    def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
+        """Common implementation for pseudo-classes that never match."""
+        return xpath.add_condition("0")
+
+    xpath_link_pseudo = pseudo_never_matches
+    xpath_visited_pseudo = pseudo_never_matches
+    xpath_hover_pseudo = pseudo_never_matches
+    xpath_active_pseudo = pseudo_never_matches
+    xpath_focus_pseudo = pseudo_never_matches
+    xpath_target_pseudo = pseudo_never_matches
+    xpath_enabled_pseudo = pseudo_never_matches
+    xpath_disabled_pseudo = pseudo_never_matches
+    xpath_checked_pseudo = pseudo_never_matches
+
+    # Attrib: dispatch by attribute operator
+
+    def xpath_attrib_exists(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr:
+        assert not value
+        xpath.add_condition(name)
+        return xpath
+
+    def xpath_attrib_equals(self, xpath: XPathExpr, name: str, value: Optional[str]) -> XPathExpr:
+        assert value is not None
+        xpath.add_condition("%s = %s" % (name, self.xpath_literal(value)))
+        return xpath
+
+    def xpath_attrib_different(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        assert value is not None
+        # FIXME: this seems like a weird hack...
+        if value:
+            xpath.add_condition("not(%s) or %s != %s" % (name, name, self.xpath_literal(value)))
+        else:
+            xpath.add_condition("%s != %s" % (name, self.xpath_literal(value)))
+        return xpath
+
+    def xpath_attrib_includes(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        if value and is_non_whitespace(value):
+            xpath.add_condition(
+                "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
+                % (name, name, self.xpath_literal(" " + value + " "))
+            )
+        else:
+            xpath.add_condition("0")
+        return xpath
+
+    def xpath_attrib_dashmatch(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        assert value is not None
+        # Weird, but true...
+        xpath.add_condition(
+            "%s and (%s = %s or starts-with(%s, %s))"
+            % (name, name, self.xpath_literal(value), name, self.xpath_literal(value + "-"))
+        )
+        return xpath
+
+    def xpath_attrib_prefixmatch(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        if value:
+            xpath.add_condition(
+                "%s and starts-with(%s, %s)" % (name, name, self.xpath_literal(value))
+            )
+        else:
+            xpath.add_condition("0")
+        return xpath
+
+    def xpath_attrib_suffixmatch(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        if value:
+            # Oddly there is a starts-with in XPath 1.0, but not ends-with
+            xpath.add_condition(
+                "%s and substring(%s, string-length(%s)-%s) = %s"
+                % (name, name, name, len(value) - 1, self.xpath_literal(value))
+            )
+        else:
+            xpath.add_condition("0")
+        return xpath
+
+    def xpath_attrib_substringmatch(
+        self, xpath: XPathExpr, name: str, value: Optional[str]
+    ) -> XPathExpr:
+        if value:
+            # Attribute selectors are case sensitive
+            xpath.add_condition(
+                "%s and contains(%s, %s)" % (name, name, self.xpath_literal(value))
+            )
+        else:
+            xpath.add_condition("0")
+        return xpath
+
+
+class HTMLTranslator(GenericTranslator):
+    """
+    Translator for (X)HTML documents.
+
+    Has a more useful implementation of some pseudo-classes based on
+    HTML-specific element names and attribute names, as described in
+    the `HTML5 specification`_. It assumes no-quirks mode.
+    The API is the same as :class:`GenericTranslator`.
+
+    .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
+
+    :param xhtml:
+        If false (the default), element names and attribute names
+        are case-insensitive.
+
+    """
+
+    lang_attribute = "lang"
+
+    def __init__(self, xhtml: bool = False) -> None:
+        self.xhtml = xhtml  # Might be useful for sub-classes?
+        if not xhtml:
+            # See their definition in GenericTranslator.
+            self.lower_case_element_names = True
+            self.lower_case_attribute_names = True
+
+    def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore
+        # FIXME: is this really all the elements?
+        return xpath.add_condition(
+            "(@selected and name(.) = 'option') or "
+            "(@checked "
+            "and (name(.) = 'input' or name(.) = 'command')"
+            "and (@type = 'checkbox' or @type = 'radio'))"
+        )
+
+    def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
+        if function.argument_types() not in (["STRING"], ["IDENT"]):
+            raise ExpressionError(
+                "Expected a single string or ident for :lang(), got %r" % function.arguments
+            )
+        value = function.arguments[0].value
+        assert value
+        return xpath.add_condition(
+            "ancestor-or-self::*[@lang][1][starts-with(concat("
+            # XPath 1.0 has no lower-case function...
+            "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
+            "'abcdefghijklmnopqrstuvwxyz'), "
+            "'-'), %s)]" % (self.lang_attribute, self.xpath_literal(value.lower() + "-"))
+        )
+
+    def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore
+        return xpath.add_condition(
+            "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
+        )
+
+    # Links are never visited, the implementation for :visited is the same
+    # as in GenericTranslator
+
+    def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore
+        # http://www.w3.org/TR/html5/section-index.html#attributes-1
+        return xpath.add_condition(
+            """
+        (
+            @disabled and
+            (
+                (name(.) = 'input' and @type != 'hidden') or
+                name(.) = 'button' or
+                name(.) = 'select' or
+                name(.) = 'textarea' or
+                name(.) = 'command' or
+                name(.) = 'fieldset' or
+                name(.) = 'optgroup' or
+                name(.) = 'option'
+            )
+        ) or (
+            (
+                (name(.) = 'input' and @type != 'hidden') or
+                name(.) = 'button' or
+                name(.) = 'select' or
+                name(.) = 'textarea'
+            )
+            and ancestor::fieldset[@disabled]
+        )
+        """
+        )
+        # FIXME: in the second half, add "and is not a descendant of that
+        # fieldset element's first legend element child, if any."
+
+    def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore
+        # http://www.w3.org/TR/html5/section-index.html#attributes-1
+        return xpath.add_condition(
+            """
+        (
+            @href and (
+                name(.) = 'a' or
+                name(.) = 'link' or
+                name(.) = 'area'
+            )
+        ) or (
+            (
+                name(.) = 'command' or
+                name(.) = 'fieldset' or
+                name(.) = 'optgroup'
+            )
+            and not(@disabled)
+        ) or (
+            (
+                (name(.) = 'input' and @type != 'hidden') or
+                name(.) = 'button' or
+                name(.) = 'select' or
+                name(.) = 'textarea' or
+                name(.) = 'keygen'
+            )
+            and not (@disabled or ancestor::fieldset[@disabled])
+        ) or (
+            name(.) = 'option' and not(
+                @disabled or ancestor::optgroup[@disabled]
+            )
+        )
+        """
+        )
+        # FIXME: ... or "li elements that are children of menu elements,
+        # and that have a child element that defines a command, if the first
+        # such element's Disabled State facet is false (not disabled)".
+        # FIXME: after ancestor::fieldset[@disabled], add "and is not a
+        # descendant of that fieldset element's first legend element child,
+        # if any."