Source code for wex.extractor

""" An extractor is a callable that returns or yields data. For example:

.. code-block:: python

    def extract(response):
        return "something"

The ``response`` parameter here is an instance of
:class:`wex.response.Response`.

Extractors can be combined in various ways.
"""

from __future__ import absolute_import, unicode_literals, print_function
from .value import yield_values


OMITTED = object()


class Chained(object):

    set_trace = None

    def __init__(self, *extractors):
        self.extractors = list(extractors)

    @property
    def __name__(self):
        return repr(self)

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, self.extractors)


    def chained(self, *args, **kw):

        if self.set_trace:
            # Give a hook for debugging
            self.set_trace()

        # Chained extractors are used in wex.entrypoints.
        # We re-seek the response to position 0 for each
        # extractor in the chain for convenience.
        seek = args and getattr(args[0], 'seek', None)

        for extractor in self.extractors:
            if seek:
                seek(0)
            for value in yield_values(extractor, *args, **kw):
                yield value

    __call__ = chained

    def append(self, extractor):
        self.extractors.append(extractor)
        return extractor

    def insert(self, index, extractor=None):
        def decorator(func):
            self.insert(index, func)
        if extractor is None:
            return decorator
        else:
            return decorator(extractor)


[docs]def chained(*extractors): """ Returns an extractor that chains the output of other extractors. The output is the output from each extractor in sequence. :param extractors: an iterable of extractor callables to chain For example an extractor function ``extract`` defined as follows: .. code-block:: python def extract1(response): yield "one" def extract2(response): yield "two" extract = chained(extract1, extract2) Would produce the following extraction output: .. code-block:: shell $ wex http://example.net/ "one" "two" """ return Chained(*extractors)
[docs]class Named(object): """ A extractor that is a collection of named extractors. Extractors can be added to the collection on construction using keyword arguments for the names or they can be added using :meth:`.add`. The names are labels in the output produced. For example, an extractor function ``extract`` defined as follows: .. code-block:: python extract = Named( name1 = (lambda response: "one"), name2 = (lambda response: "two"), ) Would produce the extraction output something like this: .. code-block:: shell $ wex http://example.net/ "name1" "one" "name2" "two" The ordering of sub-extractor output is arbitrary. """ set_trace = None def __init__(self, **kw): self.extractors = {} for k, v in kw.items(): self.add(v, k) @property def __name__(self): return repr(self) def __repr__(self): return '%s(%r)' % (self.__class__.__name__, self.extractors.keys()) def __len__(self): return len(self.extractors) def named(self, *args, **kwargs): if self.set_trace: # Give a hook for debugging self.set_trace() for name, extractor in self.extractors.items(): for value in yield_values(extractor, *args, **kwargs): yield value.label(name) __call__ = named
[docs] def add(self, extractor, label=None): """ Add an attribute extractor. :param callable extractor: The extractor to be added. :param str label: The label for the extractor. This may be ``None`` in which case the extractors ``__name__`` attribute will be used. This method returns the extractor added. This means it can also be used as a decorator. For example: .. code-block:: python attrs = Named() @attrs.add def attr1(response): return "one" """ if label is None: label = extractor.__name__ self.extractors[label] = extractor return extractor
[docs]def named(**kw): """ Returns a :class:`.Named` collection of extractors. """ return Named(**kw)
class Labelled(object): set_trace = None def __init__(self, labels, extractor): self.labels = labels self.extractor = extractor def get_labels(self, *args, **kw): labels = [] for label in self.labels: if callable(label): labels.append(label(*args, **kw)) else: labels.append(label) return labels def labelled(self, *args, **kw): if self.set_trace: self.set_trace() labels = self.get_labels(*args, **kw) if not all(labels): # don't yield if any labels are false return for value in yield_values(self.extractor, *args, **kw): yield value.label(*labels) __call__ = labelled
[docs]def labelled(*args): """ Returns an extractor decorator that will label the output an extractor. :param literals_or_callables: An iterable of labels or callables. Each item in ``literals_or_callables`` may be a literal or a callable. Any callable will called with the same parameters as the extractor and whatever is returned will by used as a label. For example an extractor function ``extract`` defined as follows: .. code-block:: python def extract1(response): yield "one" def label2(response): return "label2" extract = label("label1", label2)(extract1) Would produce the following extraction output: .. code-block:: shell $ wex http://example.net/ "label1" "label2" "one" Note that if any of the labels are `false <https://docs.python.org/2/library/stdtypes.html#truth-value-testing>`_ then no output will be generated from that extractor. """ return Labelled(args[:-1], args[-1])
def label(*labels): def decorator(extractor): return labelled(*(labels + (extractor,))) return decorator class If(object): def __init__(self, cond, if_true, if_false): self.cond = cond self.if_true = if_true self.if_false = if_false def if_(self, *args, **kw): if self.cond(*args, **kw): extractor = self.if_true else: extractor = self.if_false if extractor is None: return for value in yield_values(extractor, *args, **kw): yield value __call__ = if_ def if_(cond, if_true, if_false=None): return If(cond, if_true, if_false)