#!/usr/bin/env python ## ## sexpr.py - by Yusuke Shinyama ## ## * public domain * ## ## from http://www.unixuser.org/~euske/python/index.html: ## The following files are in public domain except where otherwise noted. THESE FILES COME WITH ABSOLUTELY NO WARRANTY. from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer ## SExprReader ## class SExprReader(AbstractFilter): """Usage: reader = SExprReader(consumer) reader.feed("(this is (sexpr))") reader.close() """ COMMENT_BEGIN = ";" COMMENT_END = "\n" SEPARATOR = " \t\n" PAREN_BEGIN = "(" PAREN_END = ")" QUOTE = '"' ESCAPE = "\\" def __init__(self, next_filter, comment_begin=COMMENT_BEGIN, comment_end=COMMENT_END, separator=SEPARATOR, paren_begin=PAREN_BEGIN, paren_end=PAREN_END, quote=QUOTE, escape=ESCAPE): AbstractFilter.__init__(self, next_filter) self.comment_begin = comment_begin self.comment_end = comment_end self.separator = separator self.paren_begin = paren_begin self.paren_end = paren_end self.quote = quote self.escape = escape self.special = comment_begin + separator + paren_begin + paren_end + quote + escape self.reset() return # SExprReader ignores any error and # try to continue as long as possible. # if you want to throw exception however, # please modify these methods. # called if redundant parantheses are found. def illegal_close_paren(self, i): print "Ignore a close parenthesis: %d" % i return # called if it reaches the end-of-file while the stack is not empty. def premature_eof(self, i, x): print "Premature end of file: %d parens left, partial=%s" % (i, x) return # reset the internal states. def reset(self): self.incomment = False # if within a comment. self.inquote = False # if within a quote. self.inescape = False # if within a escape. self.sym = '' # partially constructed symbol. # NOTICE: None != nil (an empty list) self.build = None # partially constructed list. self.build_stack = [] # to store a chain of partial lists. return self # analyze strings def feed(self, tokens): for (i,c) in enumerate(tokens): if self.incomment: # within a comment - skip self.incomment = (c not in self.comment_end) elif self.inescape or (c not in self.special): # add to the current working symbol self.sym += c self.inescape = False elif c in self.escape: # escape self.inescape = True elif self.inquote and (c not in self.quote): self.sym += c else: # special character (blanks, parentheses, or comment) if self.sym: # close the current symbol if self.build == None: self.feed_next(self.sym) else: self.build.append(self.sym) self.sym = '' if c in self.comment_begin: # comment self.incomment = True elif c in self.quote: # quote self.inquote = not self.inquote elif c in self.paren_begin: # beginning a new list. self.build_stack.append(self.build) empty = [] if self.build == None: # begin from a scratch. self.build = empty else: # begin from the end of the current list. self.build.append(empty) self.build = empty elif c in self.paren_end: # terminating the current list if self.build == None: # there must be a working list. self.illegal_close_paren(i) else: if len(self.build_stack) == 1: # current working list is the last one in the stack. self.feed_next(self.build) self.build = self.build_stack.pop() return self # terminate def terminate(self): # a working list should not exist. if self.build != None: # error - still try to construct a partial structure. if self.sym: self.build.append(self.sym) self.sym = '' if len(self.build_stack) == 1: x = self.build else: x = self.build_stack[1] self.build = None self.build_stack = [] self.premature_eof(len(self.build_stack), x) elif self.sym: # flush the current working symbol. self.feed_next(self.sym) self.sym = '' return self # closing. def close(self): AbstractFilter.close(self) self.terminate() return ## StrictSExprReader ## class SExprIllegalClosingParenError(ValueError): """It throws an exception with an ill-structured input.""" pass class SExprPrematureEOFError(ValueError): pass class StrictSExprReader(SExprReader): def illegal_close_paren(self, i): raise SExprIllegalClosingParenError(i) def premature_eof(self, i, x): raise SExprPrematureEOFError(i, x) ## str2sexpr ## class _SExprStrConverter(AbstractConsumer): results = [] def feed(self, s): _SExprStrConverter.results.append(s) return _str_converter = SExprReader(_SExprStrConverter()) _str_converter_strict = StrictSExprReader(_SExprStrConverter()) def str2sexpr(s): """parse a string as a sexpr.""" _SExprStrConverter.results = [] _str_converter.reset().feed(s).terminate() return _SExprStrConverter.results def str2sexpr_strict(s): """parse a string as a sexpr.""" _SExprStrConverter.results = [] _str_converter_strict.reset().feed(s).terminate() return _SExprStrConverter.results ## sexpr2str ## def sexpr2str(e): """convert a sexpr into Lisp-like representation.""" if not isinstance(e, list): return e return "("+" ".join(map(sexpr2str, e))+")" # test stuff def test(): assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \ [["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]] assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol" this\\ way\\ also. "escape is \\"better than\\" quote")''') == \ [['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']] str2sexpr("(this (is (a (parial (sentence") return # main if __name__ == "__main__": test()