1
2
3
4
5
6
7
8
9 from itertools import islice
10 import re
11
12 import nltk_lite.tag as tag
13 from nltk_lite.corpora import brown
14
15
16
18 _classname = "DefaultTagger"
19
21 """
22 Marshals (saves to a plain text file) the tagger model.
23
24 @param filename: Name of the file to which save the model (will
25 be overwritten if it already exists).
26 @type filename: C{string}
27 """
28 handler = file(filename, "w")
29 handler.write(self._tag)
30 handler.close()
31
33 """
34 Unmarshals (loads from a plain text file) the tagger model. For
35 safety, this operation is intended to be performed only on
36 newly created taggers (i.e., without any previous model).
37
38 @param filename: Name of the file from which the model will
39 be read.
40 @type filename: C{string}
41 """
42 handler = file(filename, "r")
43 self._tag = handler.read()
44 handler.close()
45
47 _classname = "UnigramTagger"
48
50 """
51 Marshals (saves to a plain text file) the tagger model.
52
53 @param filename: Name of the file to which save the model (will
54 be overwritten if it already exists).
55 @type filename: C{string}
56 """
57 handler = file(filename, "w")
58
59 for text, tag in self._model.iteritems():
60 handler.write("%s:%s\n" % (text, tag))
61
62 handler.close()
63
65 """
66 Unmarshals (loads from a plain text file) the tagger model. For
67 safety, this operation is intended to be performed only on
68 newly created taggers (i.e., without any previous model).
69
70 @param filename: Name of the file from which the model will
71 be read.
72 @type filename: C{string}
73 """
74 handler = file(filename, "r")
75
76 pattern = re.compile(r'^(.+):(.+?)$', re.UNICODE)
77 for line in handler.readlines():
78 m = re.match(pattern, line)
79 text, tag = m.groups()
80 self._model[text] = tag
81
82 handler.close()
83
85 _classname = "AffixTagger"
86
88 """
89 Marshals (saves to a plain text file) the tagger model.
90
91 @param filename: Name of the file to which save the model (will
92 be overwritten if it already exists).
93 @type filename: C{string}
94 """
95 handler = file(filename, "w")
96
97 handler.write("length %i\n" % self._length)
98 handler.write("minlength %i\n" % self._minlength)
99
100 for text, tag in self._model.iteritems():
101 handler.write("%s:%s\n" % (text, tag))
102
103 handler.close()
104
106 """
107 Unmarshals (loads from a plain text file) the tagger model. For
108 safety, this operation is intended to be performed only on
109 newly created taggers (i.e., without any previous model).
110
111 @param filename: Name of the file from which the model will
112 be read.
113 @type filename: C{string}
114 """
115 handler = file(filename, "r")
116
117 lines = handler.readlines()
118
119 self._length = int(lines[0].split("length ")[1])
120 self._minlength = int(lines[1].split("minlength ")[1])
121
122 pattern = re.compile(r'^(.+):(.+?)$', re.UNICODE)
123 for line in lines[2:]:
124 m = re.match(pattern, line)
125 text, tag = m.groups()
126 self._model[text] = tag
127
128 handler.close()
129
131 _classname = "NgramTagger"
132
134 """
135 Marshals (saves to a plain text file) the tagger model.
136
137 @param filename: Name of the file to which save the model (will
138 be overwritten if it already exists).
139 @type filename: C{string}
140 """
141 handler = file(filename, "w")
142
143 handler.write("n %i\n" % self._n)
144
145 for entry in self._model:
146 context, text, tag = entry[0], entry[1], self._model[entry]
147
148 try:
149 entry_str = "[%s]:%s:%s\n" % (":".join(context), text, tag)
150 handler.write(entry_str)
151 except TypeError:
152
153 pass
154
155 handler.close()
156
158 """
159 Unmarshals (loads from a plain text file) the tagger model. For
160 safety, this operation is intended to be performed only on
161 newly created taggers (i.e., without any previous model).
162
163 @param filename: Name of the file from which the model will
164 be read.
165 @type filename: C{string}
166 """
167 handler = file(filename, "r")
168
169 lines = handler.readlines()
170
171 self._n = int(lines[0].split("n ")[1])
172
173
174 pattern = re.compile(r'^\[(.+)\]:(.+):(.+?)$', re.UNICODE)
175
176
177
178
179
180
181
182 context_pattern_str = r'^(.+?)%s$' % ( r':(.+?)' * (self._n-2) )
183 context_pattern = re.compile(context_pattern_str, re.UNICODE)
184
185 for line in lines[1:]:
186 m = re.match(pattern, line)
187 context, text, tag = m.groups()
188
189 c_m = re.match(context_pattern, context)
190 key = (c_m.groups(), text)
191 self._model[key] = tag
192
193 handler.close()
194
207