1
2
3
4
5
6
7
8
9
“S” “NP”
“VP”
[“NN”, “This”] [“COP”, “is”] [“DT”, “a”] [“NN”, “book”]
10
t = tree(“S”, [
tree(“NP”, [tree([“NN”, “this”])]),
tree(“VP”, [
tree([“COP”, “is”]),
tree(“NP”, [
tree([“DT”, “a”]),
tree([“NN”, “book”])
])
]) ])
11
def phrase(tag, branches): return tree(tag, branches)
def word(tag, text):
return tree([tag, text])
def text(word):
return label(word)[1]
def tag(t):
“””Return the tag of a phrase or word.””” if is_leaf(t):
return label(t)[0] else:
return label(t)
12
13
suppes.parsed
(ROOT (S (NP (NN this)) (VP (COP is) (NP (DT a) (NN book))) (. .)))
(ROOT (S (NP (PRP I))
(VP (AUX ‘ve)
(ADVP (RB never))
(VP (VBN seen) (NP (DT such) (DT a) (JJ cute) (NN kangaroo))))
(. .)))
tree()
14
open(‘/some/file.txt’).read()
open(‘/some/file.txt’).readlines()
readlines()
open(‘suppes.parsed’).readlines()
15
str.strip()
‘ hello ‘.strip() str.split(sep=None)
sep
‘hi there ‘.split() str.replace(a, b)
ab
‘2+2’.replace(‘+’, ‘ + ‘)
16
str.strip()
‘ hello ‘.strip() # ‘hello’ str.split(sep=None)
sep
‘hi there ‘.split() str.replace(a, b)
ab
‘2+2’.replace(‘+’, ‘ + ‘)
16
str.strip()
‘ hello ‘.strip() # ‘hello’ str.split(sep=None)
sep
‘hi there ‘.split() # [‘hi’, ‘there’] str.replace(a, b)
ab
‘2+2’.replace(‘+’, ‘ + ‘)
16
str.strip()
‘ hello ‘.strip() # ‘hello’ str.split(sep=None)
sep
‘hi there ‘.split() # [‘hi’, ‘there’] str.replace(a, b)
ab
‘2+2’.replace(‘+’, ‘ + ‘) # ‘2 + 2’
16
[‘(ROOT (S (NP (NN this)) (VP (COP is) (NP (DT a) (NN book))) (. ?)))
‘\n’,..
[[‘(‘, ‘ROOT’, ‘(‘, ‘S’, ‘(‘, ‘NP’, ‘(‘, ‘NN’, ‘this’, ‘)’, ‘)’,
‘(‘, ‘VP’, ‘(‘, ‘COP’, ‘is’, ‘)’, ‘(‘, ‘NP’, ‘(‘, ‘DT’, ‘a’, ‘)’,
‘(‘, ‘NN’, ‘book’, ‘)’, ‘)’, ‘)’, ‘(‘, ‘.’, ‘?’, ‘)’, ‘)’, ‘)’],
…]
read_sentences
lines = open(‘suppes.parsed’).readlines()
tokens = read_sentences(lines)
17
\n
[…, ‘(‘, ‘NP’, ‘(‘, ‘DT’, ‘a’, ‘)’, ‘(‘, ‘JJ’, ‘big’, ‘)’, ‘(‘, ‘NN #i
def read_parse_tree(tokens, i):
# Read the tag, which is tokens[i], then advance i. # While the current item is a ‘(‘,
# call read_parse_tree to construct a branch. # Once the current item is a ‘)’,
# return a phrase from the tag and branches.
# Base case: there is no ‘(‘ or ‘)’
# because there is just text after the tag.
read_parse_tree
tree = read_parse_tree(tokens[0], 1)
18
‘
19
20
20
def index_trees(trees):
“””Return a dictionary from tags to lists of trees.””” index = {}
for t in trees:
for tag, node in nodes(t): if tag not in index:
index[tag] = []
index[tag].append(node)
return index
trees = [tokens_to_parse_tree(s) for s in all_sentences()] tree_index = index_trees(trees)
21
def gen_tree(t, tree_index, flip):
“””Return a version of t in which branches are randomly replaced. new_branches = []
if is_leaf(t):
return t
for b in branches(t):
if flip():
b = random.choice(tree_index[tag(b)])
new_branches.append(gen_tree(b, tree_index, flip)) return phrase(tag(t), new_branches)
22
“”
23
24