1
2
3 import Martel
4 from Martel.Generate import *
5 from Martel.Generate import _generate
6
7 from Martel import convert_re, Parser
8 import re
9
10 from xml.sax import handler
11
12
14 class _Test(handler.ContentHandler, handler.ErrorHandler):
15 def __init__(self):
16 handler.ContentHandler.__init__(self)
17 self.good_parse = 0
18 def startDocument(self):
19 self.good_parse = 1
20 def fatalError(self, exc):
21 if isinstance(exc, Parser.ParserPositionException):
22
23 self.good_parse = 0
24
25 def error(self, exc):
26
27 raise exc
28
29 cb = _Test()
30
31 patterns = (
32 ("a", ("a",), ("A", "", "Z")),
33 ("[a-z]", ("a", "b", "q"), ("A", "-")),
34 ("[^abc]", ("A", "d", "f"), ("a", "b", "c", "ab", "")),
35 ("a+", ("a", "aaa"), ("A", "baa")),
36 ("a*", ("", "a", "aaa"), ()),
37 ("\\]", ("]",), ("a",)),
38 ("a*$", ("a", "aaa"), ("A", "baa", "aaaaab")),
39 ("(ab|ac)", ("ab", "ac"), ("aa", "A", "a", "cb")),
40 ("(ab|ac)*$", ("", "ab", "ac", "abacabac", "ababab"),
41 ("aa", "A", "a", "cb", "ababababaca")),
42 ("ab{3}$", ("abbb",), ("abb", "bbb", "abbbb")),
43 ("ab{3,}$", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb", "abbbc")),
44 ("ab{3,}", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb")),
45 ("abc$|abcd|bc|d", ("abc", "abcd", "bc", "d"),
46 ("xabc", "ab", "a", "", "abce")),
47 ("^a.*", ("a", "aa"), ("b", "ba", "c", "")),
48 ("^[^b]+", ("a", "aa", "c"), ("b", "ba", "")),
49 ("a(?!b).b?", ("aa", "ac", "aab"), ("a", "ab", "abc")),
50 ("a(?=[bc])..", ("abx", "acx", "aba"), ("ac", "ab", "adb")),
51
52 ("ab?[bc]?", ("a", "ab", "abb", "ac"), ("", "cab", "x")),
53 ("ab{2,4}c?", ("abb", "abbb", "abbbb", "abbbbc"),
54 ("ab", "abc", "xabbb")),
55 ("ab{2,4}$", ("abb", "abbb", "abbbb"),
56 ("ab", "abc", "xabbb", "abbbbc", "abbbbbb")),
57 ("ab{2,4}cd?", ("abbc", "abbbc", "abbbbc", "abbbbcd"),
58 ("abc", "abbbbbc", "abcbbc")),
59 ("ab?c", ("ac", "abc"), ("abb", "abbc", "abbbc")),
60
61 (r"\R", ("\n", "\r", "\r\n"), (" ", "\r\r", "\n\n", "\r\n ")),
62 (r"a\Rb\R", ("a\nb\n", "a\rb\r", "a\r\nb\r\n", "a\rb\r\n"),
63 ("ab", "a", "a\n\nb\n", "a\nb", "a\r\nb")),
64 (r"ID [^\R]+\R", ("ID A123\n", "ID A123\r", "ID A123\r\n"),
65 ("ID A123\n\n", "ID A123\r\r", "ID A123", "ID \n")),
66
67
68 (r"(?P<name>A+)B(?P=name)A", ("ABAA", "AABAAA", "AAABAAAA"),
69 ("ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")),
70
71 (r"(?P<name>A*)B(?P=name)A", ("BA", "ABAA", "AABAAA", "AAABAAAA"),
72 ("BAA", "ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")),
73 )
74 for re_pat, good_list, bad_list in patterns:
75 tree = Martel.Re(re_pat)
76 exp = tree.make_parser()
77 exp.setContentHandler(cb)
78 exp.setErrorHandler(cb)
79 if string.find(re_pat, r"\R") == -1:
80 pat = re.compile(re_pat)
81 else:
82 pat = None
83 pat2 = re.compile(str(tree))
84
85 for word in good_list:
86 exp.parseString(word)
87
88 if pat is not None:
89 m = pat.match(word)
90 assert m, "Re problem recognizing " + repr(word)
91 assert m.end() == len(word), "Did not parse all of %s: %d" % \
92 (repr(word), m.end())
93
94 m = pat2.match(word)
95 assert m, "created Re problem recognizing " + repr(word)
96 assert m.end() == len(word), "Did not parse all of created %s: %d"\
97 % (repr(word), m.end())
98
99 assert cb.good_parse, "Problem not recognizing %s with %s" % \
100 (repr(word), repr(re_pat))
101
102 for word in bad_list:
103 exp.parseString(word)
104
105 if pat is not None:
106 m = pat.match(word)
107 assert not m or m.end() != len(word), \
108 "Re should not recognize " + repr(word)
109
110 m = pat2.match(word)
111 assert not m or m.end() != len(word), \
112 "created Re should not recognize " + repr(word)
113
114 assert not cb.good_parse, \
115 "Should not recognize %s\ntagtable is %s" % \
116 (repr(word), repr(exp.tagtable))
117
118 if __name__ == "__main__":
119 test()
120