"""See top level package docstring for documentation"""
import re
########################################################################
# helper functions
[docs]def numerify(x):
"""Coerce string into float or int if possible"""
try:
x = float(x)
if x % 1 == 0:
x = int(x)
except ValueError:
pass
return(x)
[docs]def listr(x, lstrip=True, rstrip=True, comments=True, coerce=True):
"""Convenience function for Istr(x).to_list()"""
return Istr(x).to_list(
lstrip=lstrip,
rstrip=rstrip,
comments=comments,
coerce=coerce,
)
[docs]def distr(x, lstrip=True, rstrip=True, comments=True, coerce=True):
"""Convenience function for Istr(x).to_dict()"""
return Istr(x).to_dict(
lstrip=lstrip,
rstrip=rstrip,
comments=comments,
coerce=coerce,
)
[docs]def tlist(x, delimiter=r'\s+', comments=True, coerce=True):
"""Convenience function for TokenStr(x).to_list()"""
return TokenList(x).to_list(
delimiter=delimiter,
comments=comments,
coerce=coerce,
)
########################################################################
[docs]class Istr(str):
"""
String (str) subclass that adds to_list and to_dict convenience methods
- By default, strip whitespace from left and right of each item
- By default, coerce items to numbers where possible (see coerce)
- Iterating over the object treats it like a list
- Indexing the object treats it like a dictionary
- For dictionaries, When keys clash, the last one wins
- dict(Istr) does NOT work (dict makes assumptions about the iterable)
- list(Istr) and list comprehensions work fine
- to_list() and to_dict() reprocess the string every time
- So listr and sistr may be more efficient and predictable
Examples
--------
>>> from iterstring import listr # or distr
A simple use case:
>>> some_list = listr('''
item one # with a comment
2
three
''')
>>> some_list
['item one', 2, 'three']
>>> type(some_list)
<class 'list'>
Using the class directly:
>>> from iterstring import Istr
>>> asdf = Istr('''
item one # with a comment
2
three
''')
>>> asdf.to_list()
['item one', 2, 'three']
>>> type(asdf)
<class 'iterstring.Istr'>
>>> [x for x in asdf]
['item one', 2, 'three']
>>> fdsa = Istr('''
item one # with a comment
2 some other value
key3 3.14159
''')
>>> asdf.to_dict()
{'item': 'one', 2: 'some other value', 'key3': 3.14159}
>>> asdf.to_dict(coerce=False)
{'item': 'one', '2': 'some other value', 'key3': '3.14159'}
Methods
-------
to_list(lstrip=True, rstrip=True, comments=True, coerce=True)
Create line-based list representation of string
to_dict(lstrip=True, rstrip=True, comments=True, coerce=True)
Create line-based dictionary representation of string
"""
[docs] def to_list(self, lstrip=True, rstrip=True, comments=True, coerce=True):
"""Create a list using each line as an item"""
lines = self.split("\n")
if comments:
# strip out comments
lines = [re.sub(r'#.*', '', x) for x in lines]
if lstrip:
# remove leading whitespace
lines = [x.lstrip() for x in lines]
if rstrip:
# remove trailing whitespace
lines = [x.rstrip() for x in lines]
# remove empty lines
lines = list([x for x in lines if not re.match(r'^\s*$', x)])
if coerce:
# convert values to numeric types where that makes sense
lines = [numerify(x) for x in lines]
return lines
[docs] def to_dict(self, lstrip=True, rstrip=True, comments=True, coerce=True):
"""Create a dictionary using the first token of each line as key"""
lines = self.to_list(
lstrip=lstrip,
rstrip=rstrip,
comments=comments,
coerce=False,
)
# split each item into tupe of first token and remaining tokens
lines = [(lambda x: tuple(re.split(r'\s+', x, 1)))(i) for i in lines]
# create a disctionary out of the list of tuples
kv = {k: v for k, v in lines}
if coerce:
# convert keys, values to numeric types where that makes sense
kv = {numerify(k): numerify(v) for k, v in lines}
return kv
def __iter__(self):
return iter(self.to_list())
def __next__(self):
return self.to_list().__next__()
def __getitem__(self, i):
return self.to_dict()[i]
def __len__(self):
return len(self.to_list())
[docs]class TokenList(str):
"""
"""
[docs] def to_list(self, delimiter=r'\s+', comments=True, coerce=True):
lines = self.split("\n")
if comments:
# strip out comments
lines = [re.sub(r'#.*', '', x) for x in lines]
text = ' '.join(lines)
tokens = re.split(delimiter, text)
# drop empty strings
tokens = [x for x in tokens if not re.search(r'^\s*$', x)]
if coerce:
# convert values to numeric types where that makes sense
tokens = [numerify(x) for x in tokens]
return tokens
def __iter__(self):
return iter(self.to_list())
def __next__(self):
return self.to_list().__next__()
def __len__(self):
return len(self.to_list())