# Written by Eric Martin for COMP9021
”’
Given 2 words, creates an object which can provide the Levenshtein
distance between both words, the alignments of minimal cost between both
words, and the display of those alignments. The costs for insertion,
deletion and substitution are set by default to 1, 1 and 2,
respectively, but can be changed.
”’
class Levenshtein_distance:
”’
Given two words word_1 and word_2, builds a table of distances and
backtraces for the initial segments of word_1 and word_2, from which
the Levenshtein distance between both words can be computed, as well
as the possible alignments of minimal distance between both words.
”’
def __init__(self, word_1, word_2, insertion_cost=1, deletion_cost=1,
substitution_cost=2
):
self.word_1 = word_1
self.word_2 = word_2
self.insertion_cost = insertion_cost
self.deletion_cost = deletion_cost
self.substitution_cost = substitution_cost
self._table = self._get_distances_and_backtraces_table()
self._backtraces = [[self._table[i][j][1]
for j in range(len(self.word_2) + 1)
] for i in range(len(self.word_1) + 1)
]
self.aligned_pairs = self.get_aligned_pairs()
def _get_distances_and_backtraces_table(self):
N_1 = len(self.word_1) + 1
N_2 = len(self.word_2) + 1
# Think of table as a sequence of columns, read from left to
# right, each column being read from bottom to top, with word_1
# and word_2 positioned as follows.
#
# 2
# _
# d
# r
# o
# w
# .
# . w o r d _ 1
#
# Each position in the sequence of columns will record the
# minimal cost of converting the corresponding initial segment
# of word_1 with the corresponding initial segment of word_2,
# and also the last move — horizontal, vertical, diagonal —
# from a neighbouring position — left, below, or left and
# below, respectively — that can yield this minimal cost.
# ‘/’ corresponds to a substitution.
# ‘-‘ corresponds to a deletion.
# ‘|’ corresponds to an insertion.
table = [[(0, []) for _ in range(N_2)] for _ in range(N_1)]
# Bottom row: cost of deleting more and more letters from
# word_1.
for i in range(1, N_1):
table[i][0] = i, [‘-‘]
# Leftmost column: cost of insertion more and more letters from
# word_2.
for j in range(1, N_2):
table[0][j] = j, [‘|’]
d = {}
# Processing all other rows from bottom to top, and each row
# from left to right, determine the cost of each possible
# operation:
# – deleting current letter (of index i-1) of word_1;
# – inserting current letter (of index j-1) of word_2;
# – matching or substituting currents letters of word_1 and
# word_2.
for i in range(1, N_1):
for j in range(1, N_2):
d[‘-‘] = table[i – 1][j][0] + self.deletion_cost
d[‘|’] = table[i][j – 1][0] + self.insertion_cost
d[‘/’] = table[i – 1][j – 1][0]\
if self.word_1[i – 1] == self.word_2[j – 1]\
else table[i – 1][j – 1][0] + self.substitution_cost
minimal_cost = min(d.values())
table[i][j] = minimal_cost,\
[x for x in d if d[x] == minimal_cost]
return table
# We start at the top right corner of _backtraces, where we find the
# last directions taken to get there at minimal cost, and move
# backwards all the way to the bottom left corner, eventually
# generating all paths from the bottom left corner to the top right
# corner that yield that minimal cost.
def _compute_alignments(self, i, j):
if i == j == 0:
yield ”, ”
if ‘/’ in self._backtraces[i][j]:
for pair in self._compute_alignments(i – 1, j – 1):
yield pair[0] + self.word_1[i – 1],\
pair[1] + self.word_2[j – 1]
if ‘-‘ in self._backtraces[i][j]:
for pair in self._compute_alignments(i – 1, j):
yield pair[0] + self.word_1[i – 1], pair[1] + ‘_’
if ‘|’ in self._backtraces[i][j]:
for pair in self._compute_alignments(i, j – 1):
yield pair[0] + ‘_’, pair[1] + self.word_2[j – 1]
def distance(self):
”’
Returns the Levenshtein distance equal to the minimum number
of deletions, insertions and substitutions needed to transform
the first word into the second word, with deletions and
insertions incurring a cost of 1, and substitutions incurring a
cost of 2.
>>> Levenshtein_distance(”, ”).distance()
0
>>> Levenshtein_distance(‘ABCDE’, ”).distance()
5
>>> Levenshtein_distance(”, ‘ABCDE’).distance()
5
>>> Levenshtein_distance(‘A’, ‘A’).distance()
0
>>> Levenshtein_distance(‘A’, ‘B’).distance()
2
>>> Levenshtein_distance(‘AA’, ‘A’).distance()
1
>>> Levenshtein_distance(‘PARIS’, ‘LONDON’).distance()
11
>>> Levenshtein_distance(‘PAPER’, ‘POPE’).distance()
3
>>> Levenshtein_distance(‘DEPART’, ‘LEOPARD’).distance()
5
”’
return self._table[len(self.word_1)][len(self.word_2)][0]
def get_aligned_pairs(self):
”’
Returns the list of all possible ways to transform the first
word into the second word and minimising the Levenshtein
distance.
>>> Levenshtein_distance(”, ”).get_aligned_pairs()
[(”, ”)]
>>> Levenshtein_distance(‘ABCDE’, ”).get_aligned_pairs()
[(‘ABCDE’, ‘_____’)]
>>> Levenshtein_distance(”, ‘ABCDE’).get_aligned_pairs()
[(‘_____’, ‘ABCDE’)]
>>> Levenshtein_distance(‘A’, ‘A’).get_aligned_pairs()
[(‘A’, ‘A’)]
>>> Levenshtein_distance(‘A’, ‘B’).get_aligned_pairs()
[(‘A’, ‘B’), (‘_A’, ‘B_’), (‘A_’, ‘_B’)]
>>> Levenshtein_distance(‘AA’, ‘A’).get_aligned_pairs()
[(‘AA’, ‘_A’), (‘AA’, ‘A_’)]
>>> Levenshtein_distance(‘PAPER’, ‘POPE’).get_aligned_pairs()
[(‘PAPER’, ‘POPE_’), (‘P_APER’, ‘PO_PE_’), (‘PA_PER’, ‘P_OPE_’)]
>>> len(Levenshtein_distance(‘PARIS’,\
‘LONDON’\
).get_aligned_pairs())
3653
”’
return list(self._compute_alignments(len(self.word_1),
len(self.word_2)
)
)
def display_all_aligned_pairs(self):
”’
Displays all possible ways to transform the first word
into the second word and minimising the Levenshtein distance.
>>> Levenshtein_distance(‘DEPART’,\
‘LEOPARD’\
).display_all_aligned_pairs()
DE_PART
LEOPARD
_DE_PART
L_EOPARD
D_E_PART
_LEOPARD
DE_PAR_T
LEOPARD_
_DE_PAR_T
L_EOPARD_
D_E_PAR_T
_LEOPARD_
DE_PART_
LEOPAR_D
_DE_PART_
L_EOPAR_D
D_E_PART_
_LEOPAR_D
”’
print(‘\n\n’.join(‘\n’.join((pair[0], pair[1]))
for pair in self.aligned_pairs
)
)
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
# There is no gap between 2 and 2,
# between -3 and -4, between -4 and -3,
# between 6 and 7, between 7 and 6.
#
# There is an INCREASING gap of [2] between 1 and 3.
# There is a DECREASING gap of [2] between 3 and 1.
#
# There is an INCREASING gap of [3, 4] between 2 and 5.
# There is a DECREASING gap of [4, 3] between 5 and 2.
#
# There is an INCREASING gap of [-1, 0, 1, 2] between -2 and 3.
# There is a DECREASING gap of [2, 1, 0, -1] between 3 and -2.
#
# The list “gaps” to compute is a list of 2 lists:
# – the list of all INCREASING gaps between 2 successive members of L,
# with no duplicate, sorted in INCREASING gap length
# and for a given gap length, sorted in INCREASING first gap member;
# – the list of all DECREASING gaps between 2 successive members of L,
# with no duplicate, sorted in DECREASING gap length
# and for a given gap length, sorted in DECREASING first gap member.
#
# You can assume that L is a list of integers.
def f(L):
”’
>>> f([])
The increasing and decreasing gaps in [] are: [[], []]
>>> f([2])
The increasing and decreasing gaps in [2] are: [[], []]
>>> f([1, 2])
The increasing and decreasing gaps in [1, 2] are: [[], []]
>>> f([1, 3, 4, 7])
The increasing and decreasing gaps in [1, 3, 4, 7] are: [[[2], [5, 6]], []]
>>> f([7, 4, 3, 1])
The increasing and decreasing gaps in [7, 4, 3, 1] are: [[], [[6, 5], [2]]]
>>> f([1, 3, 1, 5, 1, 3, -1, 1])
The increasing and decreasing gaps in [1, 3, 1, 5, 1, 3, -1, 1] are: \
[[[0], [2], [2, 3, 4]], [[4, 3, 2], [2, 1, 0], [2]]]
>>> f([1, -1, 3, 1, 5, 1, 3, 1])
The increasing and decreasing gaps in [1, -1, 3, 1, 5, 1, 3, 1] are: \
[[[2], [0, 1, 2], [2, 3, 4]], [[4, 3, 2], [2], [0]]]
>>> f([2, 0, 0, 3, 7, 2, 2, 2, -2, 4])
The increasing and decreasing gaps in [2, 0, 0, 3, 7, 2, 2, 2, -2, 4] are: \
[[[1, 2], [4, 5, 6], [-1, 0, 1, 2, 3]], [[6, 5, 4, 3], [1, 0, -1], [1]]]
”’
gaps = []
# INSERT YOUR CODE HERE
print(‘The increasing and decreasing gaps in’, L, ‘are:’, gaps)
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
from collections import Counter
# Given a nonnegative integer n, define 3 integers n*, n- and n+ as follows.
#
# Let n* be:
# – 0 if n contains no occurrence of an odd digit;
# – otherwise, the number consisting of ALL ODD DIGITS in n,
# in INCREASING ORDER.
# For instance, if n is 29350459566388 then n* is 3355599.
#
# Let n- be n* with 1 replaced by 0
# 3 replaced by 2
# …
# 7 replaced by 6
# 9 replaced by 8
# UNLESS n* starts with 1, in which case n- is 0.
# For instance, if n* is 3355599 then n- is 2244488.
#
# Let n+ be n* with 1 replaced by 2
# 3 replaced by 4
# …
# 7 replaced by 8
# 9 replaced by 0
# UNLESS n* starts with 9, in which case n+ is 0.
# For instance, if n* is 3355599 then n+ is 4466600.
#
# Returns the triple (n*, n-, n+) as defined above.
# You can assume that n is a nonnegative integer.
def f(n):
”’
>>> f(2004280)
(0, 0, 0)
>>> f(1)
(1, 0, 2)
>>> f(9)
(9, 8, 0)
>>> f(5)
(5, 4, 6)
>>> f(23211)
(113, 0, 224)
>>> f(49909929)
(99999, 88888, 0)
>>> f(45445030303070033)
(33333557, 22222446, 44444668)
>>> f(889287767862576235673458)
(33555777779, 22444666668, 44666888880)
”’
return tuple()
# REPLACE THE RETURN STATEMENT ABOVE WITH YOUR CODE
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
from math import log, ceil
# Prints out all lines of the form k = b^p such that:
# – m <= k <= n
# - p >= 2
# from smallest k to largest k and for a given k,
# from smallest b to largest b.
#
# No indication on the range of values to be tested,
# just do your best…
#
# You can assume that m and n are positive integers.
def f(m, n):
”’
>>> f(10000, 1)
>>> f(17, 21)
>>> f(17, 210)
25 = 5^2
27 = 3^3
32 = 2^5
36 = 6^2
49 = 7^2
64 = 2^6
64 = 4^3
64 = 8^2
81 = 3^4
81 = 9^2
100 = 10^2
121 = 11^2
125 = 5^3
128 = 2^7
144 = 12^2
169 = 13^2
196 = 14^2
>>> f(110500, 117700)
110592 = 48^3
110889 = 333^2
111556 = 334^2
112225 = 335^2
112896 = 336^2
113569 = 337^2
114244 = 338^2
114921 = 339^2
115600 = 340^2
116281 = 341^2
116964 = 342^2
117649 = 7^6
117649 = 49^3
117649 = 343^2
>>> f(34359738368, 34359848368)
34359738368 = 2^35
34359738368 = 32^7
34359738368 = 128^5
34359812496 = 185364^2
34359822251 = 3251^3
”’
pass
# REPLACE PASS ABOVE WITH YOUR CODE
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
from math import hypot
# The distance between 2 adjacent points in the grid,
# either horizontally or vertically, is considered to be 1.
# x coordinates are read horizontally, starting from 1,
# increasing from left to right.
# y coordinates are read vertically, starting from 1,
# increasing from top to bottom.
#
# The first point of a solution is to the left, or above, the second point
# of the solution.
#
# Solutions are printed from smallest to largest x coordinates,
# and for a given x coordinate, from smallest to largest y coordinates.
def f(grid):
”’
>>> f([[0, 0, 0],\
[0, 0, 0]])
The maximum distance between 2 points in the grid is 0
That distance is between the following pairs of points:
>>> f([[0, 0, 1],\
[0, 0, 0]])
The maximum distance between 2 points in the grid is 0
That distance is between the following pairs of points:
>>> f([[0, 0, 1],\
[0, 0, 1]])
The maximum distance between 2 points in the grid is 1.0
That distance is between the following pairs of points:
(3, 1) — (3, 2)
>>> f([[1, 0, 1],\
[1, 0, 1]])
The maximum distance between 2 points in the grid is 2.23606797749979
That distance is between the following pairs of points:
(1, 1) — (3, 2)
(3, 1) — (1, 2)
>>> f([[0, 0, 0, 0],\
[0, 1, 0, 0],\
[0, 0, 1, 0],\
[0, 0, 0, 0],\
[1, 1, 0, 0],\
[1, 1, 0, 0]])
The maximum distance between 2 points in the grid is 4.123105625617661
That distance is between the following pairs of points:
(2, 2) — (1, 6)
”’
max_distance = 0
# INSERT YOUR CODE HERE
print(‘The maximum distance between 2 points in the grid is’,
max_distance
)
print(“That distance is between the following pairs of points:”)
print()
# REPLACE THE PRINT() STATEMENT ABOVE WITH YOUR CODE
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
from collections import defaultdict
from re import findall
from levenshtein_distance import *
# Words in the text are defined as being ALL LOWERCASE,
# NOT preceded and NOT followed by a letter (be it lowercase or uppercase),
# and OF LENGTH STRICTLY GREATER THAN 3.
#
# A word in the text is not in the dictionary if its all uppercase version
# is not in the dictionary.
#
# Words in the text that are not in the dictionary AND THAT OCCUR
# AT LEAST TWICE IN THE TEXT are output, followed by the line number(s)
# where they occur in the text (line numbers are output only once in the
# unlikely case such words would appear more than once on the same line),
# two successive line numbers being separated by a comma and a space.
# Lines in the text are numbered starting from 1.
# You might find the enumerate() function useful.
#
# You will score marks if you do not also output associated suggestion(s),
# namely, the (lowercase version) of the word(s) in the dictionary whose
# Levenshtein distance to the unknown word is minimal.
# For the program to be efficient enough, it is necessary to slightly edit
# (in a straightforward way) levenshtein_distance.py.
# You should then get the output within 30 seconds.
#
# Both dictionary.txt and the file whose name is provided as argument to
# the function are stored in the working directory.
#
# Will be tested on other text files, of a similar size as the sample file.
def f(filename):
”’
>>> f(‘atale_poe.txt’)
minarets: 193, 206
Did you mean minaret?
oriels: 194, 206
Did you mean oils or ores or orgies or tories?
vermicular: 368, 375
Did you mean vehicular?
”’
pass
# REPLACE PASS ABOVE WITH YOUR CODE
if __name__ == ‘__main__’:
import doctest
doctest.testmod()
# Returns the list of all lists of the form [L[i_0], …, L[i_k]] such that:
# – i_0 < ... < i_k
# - L[i_0] < ... < L[i_k]
# - there is NO list of the form [L[j_0], ..., L[j_k']] such that:
# * j_0 < ... < j_k'
# * L[j_0] < ... < L[j_k']
# * {i_0, ..., i_k} is a strict subset of {j_0, ..., j_k'}.
#
# The solutions are output in lexicographic order of the associated tuples
# (i_0, ..., i_k).
#
# Will be tested on inputs that, for some of them, are too large for a brute
# force approach to be efficient enough. Think recursively.
#
# You can assume that L is a list of DISTINCT integers.
def f(L):
'''
>>> f([3, 2, 1])
[[3], [2], [1]]
>>> f([2, 1, 3, 4])
[[2, 3, 4], [1, 3, 4]]
>>> f([4, 7, 6, 1, 3, 5, 8, 2])
[[4, 7, 8], [4, 6, 8], [4, 5, 8], [1, 3, 5, 8], [1, 2]]
>>> f([3, 4, 6, 10, 2, 7, 1, 5, 8, 9])
[[3, 4, 6, 10], [3, 4, 6, 7, 8, 9], [3, 4, 5, 8, 9], [2, 7, 8, 9], \
[2, 5, 8, 9], [1, 5, 8, 9]]
”’
solutions = []
# INSERT YOUR CODE HERE
return solutions
# POSSIBLY DEFINE ANOTHER FUNCTION
if __name__ == ‘__main__’:
import doctest
doctest.testmod()