Source code for iterstring.iterstring

"""See top level package docstring for documentation"""

import re

########################################################################

# helper functions


[docs]def numerify(x):
    """Coerce string into float or int if possible"""
    try:
        x = float(x)
        if x % 1 == 0:
            x = int(x)
    except ValueError:
        pass
    return(x)


[docs]def listr(x, lstrip=True, rstrip=True, comments=True, coerce=True):
    """Convenience function for Istr(x).to_list()"""
    return Istr(x).to_list(
        lstrip=lstrip,
        rstrip=rstrip,
        comments=comments,
        coerce=coerce,
    )


[docs]def distr(x, lstrip=True, rstrip=True, comments=True, coerce=True):
    """Convenience function for Istr(x).to_dict()"""
    return Istr(x).to_dict(
        lstrip=lstrip,
        rstrip=rstrip,
        comments=comments,
        coerce=coerce,
    )


[docs]def tlist(x, delimiter=r'\s+', comments=True, coerce=True):
    """Convenience function for TokenStr(x).to_list()"""
    return TokenList(x).to_list(
        delimiter=delimiter,
        comments=comments,
        coerce=coerce,
    )


########################################################################


[docs]class Istr(str):
    """
    String (str) subclass that adds to_list and to_dict convenience methods

    - By default, strip whitespace from left and right of each item
    - By default, coerce items to numbers where possible (see coerce)
    - Iterating over the object treats it like a list
    - Indexing the object treats it like a dictionary
    - For dictionaries, When keys clash, the last one wins
    - dict(Istr) does NOT work (dict makes assumptions about the iterable)
        - list(Istr) and list comprehensions work fine
    - to_list() and to_dict() reprocess the string every time
        - So listr and sistr may be more efficient and predictable

    Examples
    --------

    >>> from iterstring import listr # or distr

    A simple use case:

    >>> some_list = listr('''
    item one # with a comment
      2
    three
    ''')
    >>> some_list
    ['item one', 2, 'three']
    >>> type(some_list)
    <class 'list'>

    Using the class directly:

    >>> from iterstring import Istr
    >>> asdf = Istr('''
    item one # with a comment
      2
    three
    ''')
    >>> asdf.to_list()
    ['item one', 2, 'three']
    >>> type(asdf)
    <class 'iterstring.Istr'>

    >>> [x for x in asdf]
    ['item one', 2, 'three']

    >>> fdsa = Istr('''
    item one # with a comment
      2 some other value
    key3 3.14159
    ''')
    >>> asdf.to_dict()
    {'item': 'one', 2: 'some other value', 'key3': 3.14159}
    >>> asdf.to_dict(coerce=False)
    {'item': 'one', '2': 'some other value', 'key3': '3.14159'}

    Methods
    -------
    to_list(lstrip=True, rstrip=True, comments=True, coerce=True)
        Create line-based list representation of string
    to_dict(lstrip=True, rstrip=True, comments=True, coerce=True)
        Create line-based dictionary representation of string
    """

[docs]    def to_list(self, lstrip=True, rstrip=True, comments=True, coerce=True):
        """Create a list using each line as an item"""
        lines = self.split("\n")
        if comments:
            # strip out comments
            lines = [re.sub(r'#.*', '', x) for x in lines]
        if lstrip:
            # remove leading whitespace
            lines = [x.lstrip() for x in lines]
        if rstrip:
            # remove trailing whitespace
            lines = [x.rstrip() for x in lines]
        # remove empty lines
        lines = list([x for x in lines if not re.match(r'^\s*$', x)])
        if coerce:
            # convert values to numeric types where that makes sense
            lines = [numerify(x) for x in lines]
        return lines

[docs]    def to_dict(self, lstrip=True, rstrip=True, comments=True, coerce=True):
        """Create a dictionary using the first token of each line as key"""
        lines = self.to_list(
            lstrip=lstrip,
            rstrip=rstrip,
            comments=comments,
            coerce=False,
        )
        # split each item into tupe of first token and remaining tokens
        lines = [(lambda x: tuple(re.split(r'\s+', x, 1)))(i) for i in lines]
        # create a disctionary out of the list of tuples
        kv = {k: v for k, v in lines}
        if coerce:
            # convert keys, values to numeric types where that makes sense
            kv = {numerify(k): numerify(v) for k, v in lines}
        return kv

    def __iter__(self):
        return iter(self.to_list())

    def __next__(self):
        return self.to_list().__next__()

    def __getitem__(self, i):
        return self.to_dict()[i]

    def __len__(self):
        return len(self.to_list())


[docs]class TokenList(str):
    """
    """
[docs]    def to_list(self, delimiter=r'\s+', comments=True, coerce=True):
        lines = self.split("\n")
        if comments:
            # strip out comments
            lines = [re.sub(r'#.*', '', x) for x in lines]
        text = ' '.join(lines)
        tokens = re.split(delimiter, text)
        # drop empty strings
        tokens = [x for x in tokens if not re.search(r'^\s*$', x)]
        if coerce:
            # convert values to numeric types where that makes sense
            tokens = [numerify(x) for x in tokens]
        return tokens

    def __iter__(self):
        return iter(self.to_list())

    def __next__(self):
        return self.to_list().__next__()

    def __len__(self):
        return len(self.to_list())
Source code for iterstring.iterstring

iterstring

Navigation

Related Topics