Package nltk_lite :: Package contrib :: Module hole
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.hole

  1  # Contributed by Peter Wang 
  2   
  3  from nltk_lite import tokenize 
  4  from nltk_lite.parse.featurechart import * 
  5  from nltk_lite.parse import GrammarFile, bracket_parse, tree 
  6  from nltk_lite.draw.tree import draw_trees 
  7   
  8  """ 
  9  An implementation of the Hole Semantics model, following Blackburn and Bos, 
 10  Representation and Inference for Natural Language (CSLI, 2005). 
 11   
 12  The semantic representations are built by the grammar hole.cfg. 
 13  This module contains driver code to read in sentences and parse them 
 14  according to a hole semantics grammar. 
 15   
 16  After parsing, the semantic representation is in the form of an underspecified 
 17  representation that is not easy to read.  We use a "plugging" algorithm to 
 18  convert that representation into first-order logic formulas.  These can be 
 19  displayed textually or graphically. 
 20  """ 
 21   
 22  # Note that in this code there may be multiple types of trees being referred to: 
 23  # 
 24  # 1. parse trees 
 25  # 2. the underspecified representation 
 26  # 3. first-order logic formula trees 
 27  # 4. the search space when plugging (search tree) 
 28  # 
 29   
30 -class HoleSemantics:
31 """ 32 This class holds the broken-down components of a hole semantics, i.e. it 33 extracts the holes, labels, logic formula fragments and constraints out of 34 a big conjunction of such as produced by the hole semantics grammar. It 35 then provides some operations on the semantics dealing with holes, labels 36 and finding legal ways to plug holes with labels. 37 """
38 - def __init__(self, usr):
39 """ 40 Constructor. `usr' is a tree of nodes that can take the forms: 41 (and t t'), (hole v), (label v), (: v phi), (leq v v) 42 where t, t' are subtrees, v is a variable, and phi is a formula fragment. 43 """ 44 self.holes = set() # set of variables which were asserted hole(x) 45 self.labels = set() # set of variables which were asserted label(x) 46 self.fragments = {} # mapping of label -> formula fragment 47 self.constraints = set() # set of Constraints 48 self._break_down(usr) 49 self.top_most_labels = self._find_top_most_labels() 50 self.top_hole = self._find_top_hole()
51
52 - def is_label(self, x):
53 """Return true if x is a label in this semantic representation.""" 54 return x in self.labels
55
56 - def is_hole(self, x):
57 """Return true if x is a hole in this semantic representation.""" 58 return x in self.holes
59
60 - def is_node(self, x):
61 """ 62 Return true if x is a node (label or hole) in this semantic 63 representation. 64 """ 65 return self.is_label(x) or self.is_hole(x)
66
67 - def _break_down(self, usr):
68 """ 69 Extract holes, labels, formula fragments and constraints from the hole 70 semantics underspecified representation (USR). 71 """ 72 assert isinstance(usr, Tree) 73 74 # (and X Y) 75 if usr.node == 'and': 76 self._break_down(usr[0]) 77 self._break_down(usr[1]) 78 79 # (hole H) -- H is a hole 80 elif usr.node == 'hole': 81 hole = usr[0] 82 self.holes.add(hole) 83 assert not self.is_label(hole) 84 85 # (label L) -- L is a label 86 elif usr.node == 'label': 87 label = usr[0] 88 self.labels.add(label) 89 assert not self.is_hole(label) 90 91 # (: L F) -- a formula fragment F with label L 92 elif usr.node == ':': 93 label = usr[0] 94 phi = usr[1] 95 assert not self.fragments.has_key(label) 96 self.fragments[label] = phi 97 98 # (leq L N) -- a constraint between the label L and node N 99 elif usr.node == 'leq': 100 lhs = usr[0] 101 rhs = usr[1] 102 self.constraints.add(Constraint(lhs, rhs)) 103 104 else: 105 raise ValueError(usr.node)
106
107 - def _find_top_most_labels(self):
108 """ 109 Return the set of labels which are not referenced directly as part of 110 another formula fragment. These will be the top-most labels for the 111 subtree that they are part of. 112 """ 113 top_most_labels = self.labels.copy() 114 for f in self.fragments.itervalues(): 115 for arg in f: 116 if self.is_label(arg): 117 top_most_labels.discard(arg) 118 return top_most_labels
119
120 - def _find_top_hole(self):
121 """ 122 Return the hole that will be the top of the formula tree. 123 """ 124 top_hole = self.holes.copy() 125 for f in self.fragments.itervalues(): 126 for arg in f: 127 if self.is_hole(arg): 128 top_hole.discard(arg) 129 assert len(top_hole) == 1 # it must be unique 130 return top_hole.pop()
131
132 - def pluggings(self):
133 """ 134 Calculate and return all the legal pluggings (mappings of labels to 135 holes) of this semantics given the constraints. 136 """ 137 record = [] 138 self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {}, 139 record) 140 return record
141
142 - def _plug_nodes(self, queue, potential_labels, plug_acc, record):
143 """ 144 Plug the nodes in `queue' with the labels in `potential_labels'. 145 146 Each element of `queue' is a tuple of the node to plug and the list of 147 ancestor holes from the root of the graph to that node. 148 149 `potential_labels' is a set of the labels which are still available for 150 plugging. 151 152 `plug_acc' is the incomplete mapping of holes to labels made on the 153 current branch of the search tree so far. 154 155 `record' is a list of all the complete pluggings that we have found in 156 total so far. It is the only parameter that is destructively updated. 157 """ 158 assert queue != [] 159 (node, ancestors) = queue[0] 160 if self.is_hole(node): 161 # The node is a hole, try to plug it. 162 self._plug_hole(node, ancestors, queue[1:], potential_labels, 163 plug_acc, record) 164 else: 165 assert self.is_label(node) 166 # The node is a label. Replace it in the queue by the holes and 167 # labels in the formula fragment named by that label. 168 phi = self.fragments[node] 169 head = [(a, ancestors) for a in phi if self.is_node(a)] 170 self._plug_nodes(head + queue[1:], potential_labels, 171 plug_acc, record)
172
173 - def _plug_hole(self, hole, ancestors0, queue, potential_labels0, 174 plug_acc0, record):
175 """ 176 Try all possible ways of plugging a single hole. 177 See _plug_nodes for the meanings of the parameters. 178 """ 179 # Add the current hole we're trying to plug into the list of ancestors. 180 assert hole not in ancestors0 181 ancestors = [hole] + ancestors0 182 183 # Try each potential label in this hole in turn. 184 for l in potential_labels0: 185 # Is the label valid in this hole? 186 if self._violates_constraints(l, ancestors): 187 continue 188 189 plug_acc = plug_acc0.copy() 190 plug_acc[hole] = l 191 potential_labels = potential_labels0.copy() 192 potential_labels.remove(l) 193 194 if len(potential_labels) == 0: 195 # No more potential labels. That must mean all the holes have 196 # been filled so we have found a legal plugging so remember it. 197 # 198 # Note that the queue might not be empty because there might 199 # be labels on there that point to formula fragments with 200 # no holes in them. _sanity_check_plugging will make sure 201 # all holes are filled. 202 self._sanity_check_plugging(plug_acc, self.top_hole, []) 203 record.append(plug_acc) 204 else: 205 # Recursively try to fill in the rest of the holes in the 206 # queue. The label we just plugged into the hole could have 207 # holes of its own so at the end of the queue. Putting it on 208 # the end of the queue gives us a breadth-first search, so that 209 # all the holes at level i of the formula tree are filled 210 # before filling level i+1. 211 # A depth-first search would work as well since the trees must 212 # be finite but the bookkeeping would be harder. 213 self._plug_nodes(queue + [(l, ancestors)], potential_labels, 214 plug_acc, record)
215
216 - def _violates_constraints(self, label, ancestors):
217 """ 218 Return True if the `label' cannot be placed underneath the holes given 219 by the set `ancestors' because it would violate the constraints imposed 220 on it. 221 """ 222 for c in self.constraints: 223 if c.lhs == label: 224 if c.rhs not in ancestors: 225 return True 226 return False
227
228 - def _sanity_check_plugging(self, plugging, node, ancestors):
229 """ 230 Make sure that a given plugging is legal. We recursively go through 231 each node and make sure that no constraints are violated. 232 We also check that all holes have been filled. 233 """ 234 if self.is_hole(node): 235 ancestors = [node] + ancestors 236 label = plugging[node] 237 else: 238 label = node 239 assert self.is_label(label) 240 for c in self.constraints: 241 if c.lhs == label: 242 assert c.rhs in ancestors 243 phi = self.fragments[label] 244 for arg in phi: 245 if self.is_node(arg): 246 self._sanity_check_plugging(plugging, arg, [label] + ancestors)
247
248 - def formula_tree(self, plugging):
249 """ 250 Return the first-order logic formula tree for this underspecified 251 representation using the plugging given. 252 """ 253 return self._formula_tree(plugging, self.top_hole)
254
255 - def _formula_tree(self, plugging, node):
256 if node in plugging: 257 return self._formula_tree(plugging, plugging[node]) 258 elif self.fragments.has_key(node): 259 frag = self.fragments[node] 260 children = [self._formula_tree(plugging, arg) for arg in frag] 261 return FOLTree(frag.node, children) 262 else: 263 return node
264 265
266 -class Constraint:
267 """ 268 This class represents a constraint of the form (L =< N), 269 where L is a label and N is a node (a label or a hole). 270 """
271 - def __init__(self, lhs, rhs):
272 self.lhs = lhs 273 self.rhs = rhs
274 - def __eq__(self, other):
275 if self.__class__ == other.__class__: 276 return self.lhs == other.lhs and self.rhs == other.rhs 277 else: 278 return False
279 - def __ne__(self, other):
280 return not (self == other)
281 - def __hash__(self):
282 return hash(repr(self))
283 - def __repr__(self):
284 return '(%s =< %s)' % (self.lhs, self.rhs)
285 286
287 -class FOLTree(Tree):
288 """ 289 A Tree for first-order logic formulas that prints differently. Nodes with 290 operator names are printed in infix. Nodes which have unrecognised names 291 are assumed to be predicates. 292 """
293 - def __str__(self):
294 if self.node == 'ALL': 295 var = self[0] 296 st = self[1] 297 return '(ALL %s %s)' % (var, st) 298 elif self.node == 'SOME': 299 var = self[0] 300 st = self[1] 301 return '(SOME %s %s)' % (var, st) 302 elif self.node == 'AND': 303 return '(%s /\ %s)' % (self[0], self[1]) 304 elif self.node == 'IMP': 305 return '(%s -> %s)' % (self[0], self[1]) 306 # add more operators here 307 else: 308 # otherwise treat it as a predicate with arguments 309 args = ', '.join([str(arg) for arg in self]) 310 return '%s(%s)' % (self.node, args)
311 312
313 -def main():
314 import sys 315 from optparse import OptionParser, OptionGroup 316 usage = """%%prog [options] [grammar_file]""" % globals() 317 318 opts = OptionParser(usage=usage) 319 opts.add_option("-c", "--components", 320 action="store_true", dest="show_components", default=0, 321 help="show hole semantics components") 322 opts.add_option("-r", "--raw", 323 action="store_true", dest="show_raw", default=0, 324 help="show the raw hole semantics expression") 325 opts.add_option("-d", "--drawtrees", 326 action="store_true", dest="draw_trees", default=0, 327 help="show formula trees in a GUI window") 328 opts.add_option("-v", "--verbose", 329 action="count", dest="verbosity", default=0, 330 help="show more information during parse") 331 332 (options, args) = opts.parse_args() 333 334 if len(args) > 0: 335 filename = args[0] 336 else: 337 filename = 'hole.cfg' 338 339 print 'Reading grammar file', filename 340 grammar = GrammarFile.read_file(filename) 341 parser = grammar.earley_parser(trace=options.verbosity) 342 343 # Prompt the user for a sentence. 344 print 'Sentence: ', 345 line = sys.stdin.readline()[:-1] 346 347 # Parse the sentence. 348 tokens = list(tokenize.whitespace(line)) 349 trees = parser.get_parse_list(tokens) 350 print 'Got %d different parses' % len(trees) 351 352 for tree in trees: 353 # Get the semantic feature from the top of the parse tree. 354 sem = tree[0].node['sem'].simplify() 355 356 # Skolemise away all quantifiers. All variables become unique. 357 sem = sem.skolemise() 358 359 # Reparse the semantic representation from its bracketed string format. 360 # I find this uniform structure easier to handle. It also makes the 361 # code mostly independent of the lambda calculus classes. 362 usr = bracket_parse(str(sem)) 363 364 # Break the hole semantics representation down into its components 365 # i.e. holes, labels, formula fragments and constraints. 366 hole_sem = HoleSemantics(usr) 367 368 # Maybe print the raw semantic representation. 369 if options.show_raw: 370 print 371 print 'Raw expression' 372 print usr 373 374 # Maybe show the details of the semantic representation. 375 if options.show_components: 376 print 377 print 'Holes: ', hole_sem.holes 378 print 'Labels: ', hole_sem.labels 379 print 'Constraints: ', hole_sem.constraints 380 print 'Top hole: ', hole_sem.top_hole 381 print 'Top labels: ', hole_sem.top_most_labels 382 print 'Fragments:' 383 for (l,f) in hole_sem.fragments.items(): 384 print '\t%s: %s' % (l, f) 385 386 # Find all the possible ways to plug the formulas together. 387 pluggings = hole_sem.pluggings() 388 389 # Build FOL formula trees using the pluggings. 390 trees = map(hole_sem.formula_tree, pluggings) 391 392 # Print out the formulas in a textual format. 393 n = 1 394 for tree in trees: 395 print 396 print '%d. %s' % (n, tree) 397 n += 1 398 399 # Maybe draw the formulas as trees. 400 if options.draw_trees: 401 draw_trees(*trees) 402 403 print 404 print 'Done.'
405 406 if __name__ == '__main__': 407 main() 408