1
2
3
4
5
6
7
8
9 """
10 Corpus reader for the Information Extraction and Entity Recognition Corpus.
11
12 NIST 1999 Information Extraction: Entity Recognition Evaluation
13 http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm
14
15 This corpus contains the NEWSWIRE development test data for the
16 NIST 1999 IE-ER Evaluation. The files were taken from the
17 subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt
18 and filenames were shortened.
19
20 The corpus contains the following files: APW_19980314, APW_19980424,
21 APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
22 """
23
24 from nltk_lite.corpora import get_basedir, extract
25 from nltk_lite import chunk
26 import os
27
28 items = ['APW_19980314', 'APW_19980424', 'APW_19980429',
29 'NYT_19980315', 'NYT_19980403', 'NYT_19980407']
30
31 item_name = {
32 'APW_19980314': 'Associated Press Weekly, 14 March 1998',
33 'APW_19980424': 'Associated Press Weekly, 24 April 1998',
34 'APW_19980429': 'Associated Press Weekly, 29 April 1998',
35 'NYT_19980315': 'New York Times, 15 March 1998',
36 'NYT_19980403': 'New York Times, 3 April 1998',
37 'NYT_19980407': 'New York Times, 7 April 1998',
38 }
39
49
53
61
62 if __name__ == '__main__':
63 demo()
64