first comit

2024-02-23 10:30:02 +00:00
commit ddeb07d0ba
12482 changed files with 1857507 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/pyquery/init.py
+++ b/venv/lib/python3.10/site-packages/pyquery/init.py
@@ -0,0 +1,5 @@
+# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
+#
+# Distributed under the BSD license, see LICENSE.txt
+
+from .pyquery import PyQuery  # NOQA
--- a/venv/lib/python3.10/site-packages/pyquery/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pyquery/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/pyquery/pycache/cssselectpatch.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pyquery/pycache/cssselectpatch.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/pyquery/pycache/openers.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pyquery/pycache/openers.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/pyquery/pycache/pyquery.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pyquery/pycache/pyquery.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/pyquery/pycache/text.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/pyquery/pycache/text.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/pyquery/cssselectpatch.py
+++ b/venv/lib/python3.10/site-packages/pyquery/cssselectpatch.py
@@ -0,0 +1,469 @@
+# Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
+#
+# Distributed under the BSD license, see LICENSE.txt
+from __future__ import unicode_literals
+from cssselect import xpath as cssselect_xpath
+from cssselect.xpath import ExpressionError
+
+XPathExprOrig = cssselect_xpath.XPathExpr
+
+
+class XPathExpr(XPathExprOrig):
+
+    def __init__(self, path='', element='*', condition='', star_prefix=False):
+        self.path = path
+        self.element = element
+        self.condition = condition
+        self.post_condition = None
+
+    def add_post_condition(self, post_condition):
+        if self.post_condition:
+            self.post_condition = '%s and (%s)' % (self.post_condition,
+                                                   post_condition)
+        else:
+            self.post_condition = post_condition
+
+    def __str__(self):
+        path = XPathExprOrig.__str__(self)
+        if self.post_condition:
+            path = '%s[%s]' % (path, self.post_condition)
+        return path
+
+    def join(self, combiner, other,
+             closing_combiner=None, has_inner_condition=False):
+        res = XPathExprOrig.join(self, combiner, other,
+                                 closing_combiner=closing_combiner,
+                                 has_inner_condition=has_inner_condition)
+        self.post_condition = other.post_condition
+        return res
+
+
+# keep cssselect < 0.8 compat for now
+
+
+class JQueryTranslator(cssselect_xpath.HTMLTranslator):
+    """This class is used to implement the css pseudo classes
+    (:first, :last, ...) that are not defined in the css standard,
+    but are defined in the jquery API.
+    """
+
+    xpathexpr_cls = XPathExpr
+
+    def xpath_first_pseudo(self, xpath):
+        """Matches the first selected element::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><p class="first"></p><p></p></div>')
+            >>> d('p:first')
+            [<p.first>]
+
+        ..
+        """
+        xpath.add_post_condition('position() = 1')
+        return xpath
+
+    def xpath_last_pseudo(self, xpath):
+        """Matches the last selected element::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
+            >>> d('p:last')
+            [<p.last>]
+
+        ..
+        """
+        xpath.add_post_condition('position() = last()')
+        return xpath
+
+    def xpath_even_pseudo(self, xpath):
+        """Matches even elements, zero-indexed::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
+            >>> d('p:even')
+            [<p>]
+
+        ..
+        """
+        # the first element is 1 in xpath and 0 in python and js
+        xpath.add_post_condition('position() mod 2 = 1')
+        return xpath
+
+    def xpath_odd_pseudo(self, xpath):
+        """Matches odd elements, zero-indexed::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
+            >>> d('p:odd')
+            [<p.last>]
+
+        ..
+        """
+        xpath.add_post_condition('position() mod 2 = 0')
+        return xpath
+
+    def xpath_checked_pseudo(self, xpath):
+        """Matches odd elements, zero-indexed::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input checked="checked"/></div>')
+            >>> d('input:checked')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@checked and name(.) = 'input'")
+        return xpath
+
+    def xpath_selected_pseudo(self, xpath):
+        """Matches all elements that are selected::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<select><option selected="selected"/></select>')
+            >>> d('option:selected')
+            [<option>]
+
+        ..
+        """
+        xpath.add_condition("@selected and name(.) = 'option'")
+        return xpath
+
+    def _format_disabled_xpath(self, disabled=True):
+        """Format XPath condition for :disabled or :enabled pseudo-classes
+        according to the WHATWG spec. See: https://html.spec.whatwg.org
+        /multipage/semantics-other.html#concept-element-disabled
+        """
+        bool_op = '' if disabled else 'not'
+        return '''(
+            ((name(.) = 'button' or name(.) = 'input' or name(.) = 'select'
+                    or name(.) = 'textarea' or name(.) = 'fieldset')
+                and %s(@disabled or (ancestor::fieldset[@disabled]
+                    and not(ancestor::legend[not(preceding-sibling::legend)])))
+            )
+            or
+            ((name(.) = 'option'
+                and %s(@disabled or ancestor::optgroup[@disabled]))
+            )
+            or
+            ((name(.) = 'optgroup' and %s(@disabled)))
+            )''' % (bool_op, bool_op, bool_op)
+
+    def xpath_disabled_pseudo(self, xpath):
+        """Matches all elements that are disabled::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input disabled="disabled"/></div>')
+            >>> d('input:disabled')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition(self._format_disabled_xpath())
+        return xpath
+
+    def xpath_enabled_pseudo(self, xpath):
+        """Matches all elements that are enabled::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input value="foo" /></div>')
+            >>> d('input:enabled')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition(self._format_disabled_xpath(disabled=False))
+        return xpath
+
+    def xpath_file_pseudo(self, xpath):
+        """Matches all input elements of type file::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="file"/></div>')
+            >>> d('input:file')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'file' and name(.) = 'input'")
+        return xpath
+
+    def xpath_input_pseudo(self, xpath):
+        """Matches all input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery(('<div><input type="file"/>'
+            ...              '<textarea></textarea></div>'))
+            >>> d(':input')
+            [<input>, <textarea>]
+
+        ..
+        """
+        xpath.add_condition((
+            "(name(.) = 'input' or name(.) = 'select') "
+            "or (name(.) = 'textarea' or name(.) = 'button')"))
+        return xpath
+
+    def xpath_button_pseudo(self, xpath):
+        """Matches all button input elements and the button element::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery(('<div><input type="button"/>'
+            ...              '<button></button></div>'))
+            >>> d(':button')
+            [<input>, <button>]
+
+        ..
+        """
+        xpath.add_condition((
+            "(@type = 'button' and name(.) = 'input') "
+            "or name(.) = 'button'"))
+        return xpath
+
+    def xpath_radio_pseudo(self, xpath):
+        """Matches all radio input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="radio"/></div>')
+            >>> d('input:radio')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'radio' and name(.) = 'input'")
+        return xpath
+
+    def xpath_text_pseudo(self, xpath):
+        """Matches all text input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="text"/></div>')
+            >>> d('input:text')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'text' and name(.) = 'input'")
+        return xpath
+
+    def xpath_checkbox_pseudo(self, xpath):
+        """Matches all checkbox input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="checkbox"/></div>')
+            >>> d('input:checkbox')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'checkbox' and name(.) = 'input'")
+        return xpath
+
+    def xpath_password_pseudo(self, xpath):
+        """Matches all password input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="password"/></div>')
+            >>> d('input:password')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'password' and name(.) = 'input'")
+        return xpath
+
+    def xpath_submit_pseudo(self, xpath):
+        """Matches all submit input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="submit"/></div>')
+            >>> d('input:submit')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'submit' and name(.) = 'input'")
+        return xpath
+
+    def xpath_hidden_pseudo(self, xpath):
+        """Matches all hidden input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="hidden"/></div>')
+            >>> d('input:hidden')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'hidden' and name(.) = 'input'")
+        return xpath
+
+    def xpath_image_pseudo(self, xpath):
+        """Matches all image input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="image"/></div>')
+            >>> d('input:image')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'image' and name(.) = 'input'")
+        return xpath
+
+    def xpath_reset_pseudo(self, xpath):
+        """Matches all reset input elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><input type="reset"/></div>')
+            >>> d('input:reset')
+            [<input>]
+
+        ..
+        """
+        xpath.add_condition("@type = 'reset' and name(.) = 'input'")
+        return xpath
+
+    def xpath_header_pseudo(self, xpath):
+        """Matches all header elements (h1, ..., h6)::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1>title</h1></div>')
+            >>> d(':header')
+            [<h1>]
+
+        ..
+        """
+        # this seems kind of brute-force, is there a better way?
+        xpath.add_condition((
+            "(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') "
+            "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')"))
+        return xpath
+
+    def xpath_parent_pseudo(self, xpath):
+        """Match all elements that contain other elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1><span>title</span></h1><h1/></div>')
+            >>> d('h1:parent')
+            [<h1>]
+
+        ..
+        """
+        xpath.add_condition("count(child::*) > 0")
+        return xpath
+
+    def xpath_empty_pseudo(self, xpath):
+        """Match all elements that do not contain other elements::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>')
+            >>> d(':empty')
+            [<h2>]
+
+        ..
+        """
+        xpath.add_condition("not(node())")
+        return xpath
+
+    def xpath_eq_function(self, xpath, function):
+        """Matches a single element by its index::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
+            >>> d('h1:eq(0)')
+            [<h1.first>]
+            >>> d('h1:eq(1)')
+            [<h1.last>]
+
+        ..
+        """
+        if function.argument_types() != ['NUMBER']:
+            raise ExpressionError(
+                "Expected a single integer for :eq(), got %r" % (
+                    function.arguments,))
+        value = int(function.arguments[0].value)
+        xpath.add_post_condition('position() = %s' % (value + 1))
+        return xpath
+
+    def xpath_gt_function(self, xpath, function):
+        """Matches all elements with an index over the given one::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
+            >>> d('h1:gt(0)')
+            [<h1.last>]
+
+        ..
+        """
+        if function.argument_types() != ['NUMBER']:
+            raise ExpressionError(
+                "Expected a single integer for :gt(), got %r" % (
+                    function.arguments,))
+        value = int(function.arguments[0].value)
+        xpath.add_post_condition('position() > %s' % (value + 1))
+        return xpath
+
+    def xpath_lt_function(self, xpath, function):
+        """Matches all elements with an index below the given one::
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
+            >>> d('h1:lt(1)')
+            [<h1.first>]
+
+        ..
+        """
+        if function.argument_types() != ['NUMBER']:
+            raise ExpressionError(
+                "Expected a single integer for :gt(), got %r" % (
+                    function.arguments,))
+
+        value = int(function.arguments[0].value)
+        xpath.add_post_condition('position() < %s' % (value + 1))
+        return xpath
+
+    def xpath_contains_function(self, xpath, function):
+        """Matches all elements that contain the given text
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>')
+            >>> d('h1:contains("title")')
+            [<h1.title>]
+
+        ..
+        """
+        if function.argument_types() not in (['STRING'], ['IDENT']):
+            raise ExpressionError(
+                "Expected a single string or ident for :contains(), got %r" % (
+                    function.arguments,))
+
+        value = self.xpath_literal(function.arguments[0].value)
+        xpath.add_post_condition('contains(., %s)' % value)
+        return xpath
+
+    def xpath_has_function(self, xpath, function):
+        """Matches elements which contain at least one element that matches
+        the specified selector. https://api.jquery.com/has-selector/
+
+            >>> from pyquery import PyQuery
+            >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>')
+            >>> d('.foo:has(".baz")')
+            []
+            >>> d('.foo:has(".foo")')
+            []
+            >>> d('.foo:has(".bar")')
+            [<div.foo>]
+            >>> d('.foo:has(div)')
+            [<div.foo>]
+
+        ..
+        """
+        if function.argument_types() not in (['STRING'], ['IDENT']):
+            raise ExpressionError(
+                "Expected a single string or ident for :has(), got %r" % (
+                    function.arguments,))
+        value = self.css_to_xpath(
+            function.arguments[0].value, prefix='descendant::',
+        )
+        xpath.add_post_condition(value)
+        return xpath
--- a/venv/lib/python3.10/site-packages/pyquery/openers.py
+++ b/venv/lib/python3.10/site-packages/pyquery/openers.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+from urllib.request import urlopen
+from urllib.parse import urlencode
+from urllib.error import HTTPError
+
+try:
+    import requests
+    HAS_REQUEST = True
+except ImportError:
+    HAS_REQUEST = False
+
+DEFAULT_TIMEOUT = 60
+
+basestring = (str, bytes)
+
+allowed_args = (
+    'auth', 'data', 'headers', 'verify',
+    'cert', 'config', 'hooks', 'proxies', 'cookies'
+)
+
+
+def _query(url, method, kwargs):
+    data = None
+    if 'data' in kwargs:
+        data = kwargs.pop('data')
+    if type(data) in (dict, list, tuple):
+        data = urlencode(data)
+
+    if isinstance(method, basestring) and \
+       method.lower() == 'get' and data:
+        if '?' not in url:
+            url += '?'
+        elif url[-1] not in ('?', '&'):
+            url += '&'
+        url += data
+        data = None
+
+    if data:
+        data = data.encode('utf-8')
+    return url, data
+
+
+def _requests(url, kwargs):
+
+    encoding = kwargs.get('encoding')
+    method = kwargs.get('method', 'get').lower()
+    session = kwargs.get('session')
+    if session:
+        meth = getattr(session, str(method))
+    else:
+        meth = getattr(requests, str(method))
+    if method == 'get':
+        url, data = _query(url, method, kwargs)
+    kw = {}
+    for k in allowed_args:
+        if k in kwargs:
+            kw[k] = kwargs[k]
+    resp = meth(url=url, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT), **kw)
+    if not (200 <= resp.status_code < 300):
+        raise HTTPError(resp.url, resp.status_code,
+                        resp.reason, resp.headers, None)
+    if encoding:
+        resp.encoding = encoding
+    html = resp.text
+    return html
+
+
+def _urllib(url, kwargs):
+    method = kwargs.get('method')
+    url, data = _query(url, method, kwargs)
+    return urlopen(url, data, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT))
+
+
+def url_opener(url, kwargs):
+    if HAS_REQUEST:
+        return _requests(url, kwargs)
+    return _urllib(url, kwargs)
--- a/venv/lib/python3.10/site-packages/pyquery/pyquery.py
+++ b/venv/lib/python3.10/site-packages/pyquery/pyquery.py
--- a/venv/lib/python3.10/site-packages/pyquery/text.py
+++ b/venv/lib/python3.10/site-packages/pyquery/text.py
@@ -0,0 +1,111 @@
+import re
+
+
+# https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements#Elements
+INLINE_TAGS = {
+    'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button', 'cite',
+    'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd', 'label', 'map',
+    'object', 'q', 'samp', 'script', 'select', 'small', 'span', 'strong',
+    'sub', 'sup', 'textarea', 'time', 'tt', 'var'
+}
+
+SEPARATORS = {'br'}
+
+
+# Definition of whitespace in HTML:
+# https://www.w3.org/TR/html4/struct/text.html#h-9.1
+WHITESPACE_RE = re.compile(u'[\x20\x09\x0C\u200B\x0A\x0D]+')
+
+
+def squash_html_whitespace(text):
+    # use raw extract_text for preformatted content (like <pre> content or set
+    # by CSS rules)
+    # apply this function on top of
+    return WHITESPACE_RE.sub(' ', text)
+
+
+def _squash_artifical_nl(parts):
+    output, last_nl = [], False
+    for x in parts:
+        if x is not None:
+            output.append(x)
+            last_nl = False
+        elif not last_nl:
+            output.append(None)
+            last_nl = True
+    return output
+
+
+def _strip_artifical_nl(parts):
+    if not parts:
+        return parts
+    for start_idx, pt in enumerate(parts):
+        if isinstance(pt, str):
+            # 0, 1, 2, index of first string [start_idx:...
+            break
+    iterator = enumerate(parts[:start_idx - 1 if start_idx > 0 else None:-1])
+    for end_idx, pt in iterator:
+        if isinstance(pt, str):  # 0=None, 1=-1, 2=-2, index of last string
+            break
+    return parts[start_idx:-end_idx if end_idx > 0 else None]
+
+
+def _merge_original_parts(parts):
+    output, orp_buf = [], []
+
+    def flush():
+        if orp_buf:
+            item = squash_html_whitespace(''.join(orp_buf)).strip()
+            if item:
+                output.append(item)
+            orp_buf[:] = []
+
+    for x in parts:
+        if not isinstance(x, str):
+            flush()
+            output.append(x)
+        else:
+            orp_buf.append(x)
+    flush()
+    return output
+
+
+def extract_text_array(dom, squash_artifical_nl=True, strip_artifical_nl=True):
+    if callable(dom.tag):
+        return ''
+    r = []
+    if dom.tag in SEPARATORS:
+        r.append(True)  # equivalent of '\n' used to designate separators
+    elif dom.tag not in INLINE_TAGS:
+        # equivalent of '\n' used to designate artificially inserted newlines
+        r.append(None)
+    if dom.text is not None:
+        r.append(dom.text)
+    for child in dom.getchildren():
+        r.extend(extract_text_array(child, squash_artifical_nl=False,
+                                    strip_artifical_nl=False))
+        if child.tail is not None:
+            r.append(child.tail)
+    if dom.tag not in INLINE_TAGS and dom.tag not in SEPARATORS:
+        # equivalent of '\n' used to designate artificially inserted newlines
+        r.append(None)
+    if squash_artifical_nl:
+        r = _squash_artifical_nl(r)
+    if strip_artifical_nl:
+        r = _strip_artifical_nl(r)
+    return r
+
+
+def extract_text(dom, block_symbol='\n', sep_symbol='\n', squash_space=True):
+    a = extract_text_array(dom, squash_artifical_nl=squash_space)
+    if squash_space:
+        a = _strip_artifical_nl(_squash_artifical_nl(_merge_original_parts(a)))
+    result = ''.join(
+        block_symbol if x is None else (
+            sep_symbol if x is True else x
+        )
+        for x in a
+    )
+    if squash_space:
+        result = result.strip()
+    return result