Train BPE on a toy text example
bpe algorithm: https://web.stanford.edu/~jurafsky/slp3/2.pdf (2.4.3)
In [3]:
import re, collections
text = “The aims for this subject is for students to develop an understanding of the main algorithms used in natural language processing, for use in a diverse range of applications including text classification, machine translation, and question answering. Topics to be covered include part-of-speech tagging, n-gram language modelling, syntactic parsing and deep learning. The programming language used is Python, see for more information on its use in the workshops, assignments and installation at home.”
# text = ‘low ‘*5 +’lower ‘*2+’newest ‘*6 +’widest ‘*3
#what is this function
def get_vocab(text):
vocab = collections.defaultdict(int)
for word in text.strip().split():
#note: we use the special token (instead of underscore in the lecture) to denote the end of a word
vocab[‘ ‘.join(list(word)) + ‘ ‘] += 1
return vocab
def get_stats(vocab):
pairs = collections.defaultdict(int)
for word, freq in vocab.items():
symbols = word.split()
for i in range(len(symbols)-1):
pairs[symbols[i],symbols[i+1]] += freq
return pairs
def merge_vocab(pair, v_in):
v_out = {}
bigram = re.escape(‘ ‘.join(pair))
# matches unmerged bigrams
p = re.compile(r'(?, {‘T h e ‘: 2, ‘a i m s ‘: 1, ‘f o r ‘: 4, ‘t h i s ‘: 1, ‘s u b j e c t ‘: 1, ‘i s ‘: 2, ‘s t u d e n t s ‘: 1, ‘t o ‘: 2, ‘d e v e l o p ‘: 1, ‘a n ‘: 1, ‘u n d e r s t a n d i n g ‘: 1, ‘o f ‘: 2, ‘t h e ‘: 2, ‘m a i n ‘: 1, ‘a l g o r i t h m s ‘: 1, ‘u s e d ‘: 2, ‘i n ‘: 3, ‘n a t u r a l ‘: 1, ‘l a n g u a g e ‘: 3, ‘p r o c e s s i n g , ‘: 1, ‘u s e ‘: 2, ‘a ‘: 1, ‘d i v e r s e ‘: 1, ‘r a n g e ‘: 1, ‘a p p l i c a t i o n s ‘: 1, ‘i n c l u d i n g ‘: 1, ‘t e x t ‘: 1, ‘c l a s s i f i c a t i o n , ‘: 1, ‘m a c h i n e ‘: 1, ‘t r a n s l a t i o n , ‘: 1, ‘a n d ‘: 3, ‘q u e s t i o n ‘: 1, ‘a n s w e r i n g . ‘: 1, ‘T o p i c s ‘: 1, ‘b e ‘: 1, ‘c o v e r e d ‘: 1, ‘i n c l u d e ‘: 1, ‘p a r t – o f – s p e e c h ‘: 1, ‘t a g g i n g , ‘: 1, ‘n – g r a m ‘: 1, ‘m o d e l l i n g , ‘: 1, ‘s y n t a c t i c ‘: 1, ‘p a r s i n g ‘: 1, ‘d e e p ‘: 1, ‘l e a r n i n g . ‘: 1, ‘p r o g r a m m i n g ‘: 1, ‘P y t h o n , ‘: 1, ‘s e e ‘: 1, ‘m o r e ‘: 1, ‘i n f o r m a t i o n ‘: 1, ‘o n ‘: 1, ‘i t s ‘: 1, ‘w o r k s h o p s , ‘: 1, ‘a s s i g n m e n t s ‘: 1, ‘i n s t a l l a t i o n ‘: 1, ‘a t ‘: 1, ‘h o m e . ‘: 1})
==========
Tokens Before BPE
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 39, ‘‘: 73, ‘a’: 38, ‘i’: 37, ‘m’: 12, ‘s’: 34, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 45, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 22, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1})
Number of tokens: 31
==========
Iter: 0
Best pair: (‘i’, ‘n’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 39, ‘‘: 73, ‘a’: 38, ‘i’: 19, ‘m’: 12, ‘s’: 34, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 27, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 22, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 18})
Number of tokens: 32
==========
Iter: 1
Best pair: (‘e’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 57, ‘a’: 38, ‘i’: 19, ‘m’: 12, ‘s’: 34, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 27, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 22, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 18, ‘e‘: 16})
Number of tokens: 33
==========
Iter: 2
Best pair: (‘a’, ‘n’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 57, ‘a’: 27, ‘i’: 19, ‘m’: 12, ‘s’: 34, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 16, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 22, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 18, ‘e‘: 16, ‘an’: 11})
Number of tokens: 34
==========
Iter: 3
Best pair: (‘s’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 47, ‘a’: 27, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 16, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 22, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 18, ‘e‘: 16, ‘an’: 11, ‘s‘: 10})
Number of tokens: 35
==========
Iter: 4
Best pair: (‘in’, ‘g’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 47, ‘a’: 27, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 29, ‘r’: 22, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 16, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9})
Number of tokens: 36
==========
Iter: 5
Best pair: (‘o’, ‘r’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 47, ‘a’: 27, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 21, ‘r’: 14, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 16, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8})
Number of tokens: 37
==========
Iter: 6
Best pair: (‘o’, ‘n’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 47, ‘a’: 27, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 13, ‘r’: 14, ‘t’: 29, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8, ‘on’: 8})
Number of tokens: 38
==========
Iter: 7
Best pair: (‘a’, ‘t’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 47, ‘a’: 20, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 13, ‘r’: 14, ‘t’: 22, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 7, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8, ‘on’: 8, ‘at’: 7})
Number of tokens: 39
==========
Iter: 8
Best pair: (‘,’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 40, ‘a’: 20, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 13, ‘r’: 14, ‘t’: 22, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 15, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8, ‘on’: 8, ‘at’: 7, ‘,‘: 7})
Number of tokens: 40
==========
Iter: 9
Best pair: (‘d’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 34, ‘a’: 20, ‘i’: 19, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 13, ‘r’: 14, ‘t’: 22, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 9, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8, ‘on’: 8, ‘at’: 7, ‘,‘: 7, ‘d‘: 6})
Number of tokens: 41
==========
Iter: 10
Best pair: (‘i’, ‘on’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 34, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 9, ‘o’: 13, ‘r’: 14, ‘t’: 22, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 9, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 8, ‘on’: 2, ‘at’: 7, ‘,‘: 7, ‘d‘: 6, ‘ion’: 6})
Number of tokens: 42
==========
Iter: 11
Best pair: (‘f’, ‘or’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 11, ‘e’: 23, ‘‘: 34, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 22, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 9, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 7, ‘,‘: 7, ‘d‘: 6, ‘ion’: 6, ‘for’: 5})
Number of tokens: 43
==========
Iter: 12
Best pair: (‘t’, ‘h’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 23, ‘‘: 34, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 17, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 9, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 7, ‘,‘: 7, ‘d‘: 6, ‘ion’: 6, ‘for’: 5, ‘th’: 5})
Number of tokens: 44
==========
Iter: 13
Best pair: (‘d’, ‘e’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 34, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 17, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 7, ‘,‘: 7, ‘d‘: 6, ‘ion’: 6, ‘for’: 5, ‘th’: 5, ‘de’: 5})
Number of tokens: 45
==========
Iter: 14
Best pair: (‘at’, ‘ion’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 34, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 17, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 5, ‘th’: 5, ‘de’: 5, ‘ation’: 5})
Number of tokens: 46
==========
Iter: 15
Best pair: (‘for’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 30, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 24, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 17, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4})
Number of tokens: 47
==========
Iter: 16
Best pair: (‘s’, ‘t’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 30, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 20, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 9, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4})
Number of tokens: 48
==========
Iter: 17
Best pair: (‘ing’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 26, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 20, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 9, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4})
Number of tokens: 49
==========
Iter: 18
Best pair: (‘in’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 20, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 14, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4})
Number of tokens: 50
==========
Iter: 19
Best pair: (‘u’, ‘s’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 13, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 11, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4})
Number of tokens: 51
==========
Iter: 20
Best pair: (‘an’, ‘g’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 20, ‘i’: 13, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 9, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4})
Number of tokens: 52
==========
Iter: 21
Best pair: (‘a’, ‘g’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 16, ‘i’: 13, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 13, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4})
Number of tokens: 53
==========
Iter: 22
Best pair: (‘i’, ‘c’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 16, ‘i’: 9, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 10, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4})
Number of tokens: 54
==========
Iter: 23
Best pair: (‘i’, ‘s‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 16, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 13, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 8, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3})
Number of tokens: 55
==========
Iter: 24
Best pair: (‘n’, ‘t’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 18, ‘‘: 22, ‘a’: 16, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 3, ‘l’: 16, ‘p’: 11, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3})
Number of tokens: 56
==========
Iter: 25
Best pair: (‘v’, ‘e’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 15, ‘‘: 22, ‘a’: 16, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 13, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 16, ‘p’: 11, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3})
Number of tokens: 57
==========
Iter: 26
Best pair: (‘o’, ‘p’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 15, ‘‘: 22, ‘a’: 16, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 4, ‘o’: 10, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 16, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3})
Number of tokens: 58
==========
Iter: 27
Best pair: (‘o’, ‘f’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 15, ‘‘: 22, ‘a’: 16, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 16, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3})
Number of tokens: 59
==========
Iter: 28
Best pair: (‘a’, ‘l’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 15, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 13, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 6, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3})
Number of tokens: 60
==========
Iter: 29
Best pair: (‘e’, ‘d‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 13, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 4, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3})
Number of tokens: 61
==========
Iter: 30
Best pair: (‘l’, ‘ang’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 10, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 3})
Number of tokens: 62
==========
Iter: 31
Best pair: (‘lang’, ‘u’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 4, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 3})
Number of tokens: 63
==========
Iter: 32
Best pair: (‘langu’, ‘ag’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 16, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 3})
Number of tokens: 64
==========
Iter: 33
Best pair: (‘languag’, ‘e‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 16, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3})
Number of tokens: 65
==========
Iter: 34
Best pair: (‘s’, ‘s’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 7, ‘s‘: 7, ‘ing’: 5, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 7, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3})
Number of tokens: 66
==========
Iter: 35
Best pair: (‘ing’, ‘,‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 9, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 10, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 7, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3})
Number of tokens: 67
==========
Iter: 36
Best pair: (‘c’, ‘l’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 7, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 3, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3})
Number of tokens: 68
==========
Iter: 37
Best pair: (‘an’, ‘d‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 22, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 3, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 4, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3})
Number of tokens: 69
==========
Iter: 38
Best pair: (‘.’, ‘‘)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 19, ‘a’: 13, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 14, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 4, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3})
Number of tokens: 70
==========
Iter: 39
Best pair: (‘a’, ‘r’)
Tokens: defaultdict(, {‘T’: 3, ‘h’: 6, ‘e’: 12, ‘‘: 19, ‘a’: 10, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 4, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3})
Number of tokens: 71
==========
Iter: 40
Best pair: (‘T’, ‘h’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 12, ‘‘: 19, ‘a’: 10, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 13, ‘an’: 4, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 2})
Number of tokens: 72
==========
Iter: 41
Best pair: (‘Th’, ‘e‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 12, ‘‘: 19, ‘a’: 10, ‘i’: 6, ‘m’: 12, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 7, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2})
Number of tokens: 73
==========
Iter: 42
Best pair: (‘m’, ‘s‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 12, ‘‘: 19, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 6, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 5, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2})
Number of tokens: 74
==========
Iter: 43
Best pair: (‘e’, ‘c’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 19, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 10, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 5, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2})
Number of tokens: 75
==========
Iter: 44
Best pair: (‘t’, ‘‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 17, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 8, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 5, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 3, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2})
Number of tokens: 76
==========
Iter: 45
Best pair: (‘nt’, ‘s‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 17, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 7, ‘r’: 11, ‘t’: 8, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2})
Number of tokens: 77
==========
Iter: 46
Best pair: (‘t’, ‘o’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 17, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 2})
Number of tokens: 78
==========
Iter: 47
Best pair: (‘to’, ‘‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 15, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 4, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 4, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2})
Number of tokens: 79
==========
Iter: 48
Best pair: (‘d’, ‘ing‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 15, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 3, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2})
Number of tokens: 80
==========
Iter: 49
Best pair: (‘of’, ‘‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 11, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 5, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2})
Number of tokens: 81
==========
Iter: 50
Best pair: (‘th’, ‘e‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 10, ‘i’: 6, ‘m’: 10, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 9, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2})
Number of tokens: 82
==========
Iter: 51
Best pair: (‘m’, ‘a’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 9, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 4, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 3, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2})
Number of tokens: 83
==========
Iter: 52
Best pair: (‘us’, ‘ed‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 11, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 8, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 9, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 2, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2})
Number of tokens: 84
==========
Iter: 53
Best pair: (‘p’, ‘r’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 5, ‘r’: 9, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 9, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 2, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 2})
Number of tokens: 85
==========
Iter: 54
Best pair: (‘pr’, ‘o’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 9, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 9, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 2, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2})
Number of tokens: 86
==========
Iter: 55
Best pair: (‘us’, ‘e‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 9, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 3, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2})
Number of tokens: 87
==========
Iter: 56
Best pair: (‘ve’, ‘r’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 5, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 4, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2})
Number of tokens: 88
==========
Iter: 57
Best pair: (‘ic’, ‘ation’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 5, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 3, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2})
Number of tokens: 89
==========
Iter: 58
Best pair: (‘in’, ‘cl’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 7, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 2})
Number of tokens: 90
==========
Iter: 59
Best pair: (‘incl’, ‘u’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 8, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 3, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2})
Number of tokens: 91
==========
Iter: 60
Best pair: (‘a’, ‘ss’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 6, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 2})
Number of tokens: 92
==========
Iter: 61
Best pair: (‘ass’, ‘i’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 10, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 4, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2})
Number of tokens: 93
==========
Iter: 62
Best pair: (‘an’, ‘s’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 7, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 3, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2})
Number of tokens: 94
==========
Iter: 63
Best pair: (‘l’, ‘ation’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 2, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 3, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2})
Number of tokens: 95
==========
Iter: 64
Best pair: (‘ing’, ‘.‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 6, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 3, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2})
Number of tokens: 96
==========
Iter: 65
Best pair: (‘p’, ‘ar’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 7, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 5, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2})
Number of tokens: 97
==========
Iter: 66
Best pair: (‘g’, ‘r’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 6, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 2})
Number of tokens: 98
==========
Iter: 67
Best pair: (‘gr’, ‘a’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 4, ‘i’: 4, ‘m’: 8, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 2})
Number of tokens: 99
==========
Iter: 68
Best pair: (‘gra’, ‘m’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 10, ‘‘: 13, ‘a’: 4, ‘i’: 4, ‘m’: 6, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2})
Number of tokens: 100
==========
Iter: 69
Best pair: (‘m’, ‘e’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 4, ‘i’: 4, ‘m’: 4, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2})
Number of tokens: 101
==========
Iter: 70
Best pair: (‘a’, ‘i’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 2, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 1})
Number of tokens: 102
==========
Iter: 71
Best pair: (‘ai’, ‘ms‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 3, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 3, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1})
Number of tokens: 103
==========
Iter: 72
Best pair: (‘th’, ‘is‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 8, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 5, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1})
Number of tokens: 104
==========
Iter: 73
Best pair: (‘s’, ‘u’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 4, ‘b’: 2, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 1})
Number of tokens: 105
==========
Iter: 74
Best pair: (‘su’, ‘b’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 4, ‘b’: 1, ‘j’: 1, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 1})
Number of tokens: 106
==========
Iter: 75
Best pair: (‘sub’, ‘j’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 4, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 2, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 1})
Number of tokens: 107
==========
Iter: 76
Best pair: (‘subj’, ‘ec’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 4, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 2, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 1})
Number of tokens: 108
==========
Iter: 77
Best pair: (‘subjec’, ‘t‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 4, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 4, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1})
Number of tokens: 109
==========
Iter: 78
Best pair: (‘st’, ‘u’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 5, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 1})
Number of tokens: 110
==========
Iter: 79
Best pair: (‘stu’, ‘de’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 4, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 2, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 1})
Number of tokens: 111
==========
Iter: 80
Best pair: (‘stude’, ‘nts‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 4, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 1, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1})
Number of tokens: 112
==========
Iter: 81
Best pair: (‘de’, ‘ve’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 5, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 1})
Number of tokens: 113
==========
Iter: 82
Best pair: (‘deve’, ‘l’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 3, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 1})
Number of tokens: 114
==========
Iter: 83
Best pair: (‘devel’, ‘op’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 13, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 1})
Number of tokens: 115
==========
Iter: 84
Best pair: (‘develop’, ‘‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 12, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 2, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1})
Number of tokens: 116
==========
Iter: 85
Best pair: (‘an’, ‘‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 3, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 5, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 1, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1})
Number of tokens: 117
==========
Iter: 86
Best pair: (‘u’, ‘n’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 1, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 3, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 1})
Number of tokens: 118
==========
Iter: 87
Best pair: (‘un’, ‘de’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 5, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 1, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 1})
Number of tokens: 119
==========
Iter: 88
Best pair: (‘unde’, ‘r’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 1, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 3, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 1})
Number of tokens: 120
==========
Iter: 89
Best pair: (‘under’, ‘st’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 1, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 1})
Number of tokens: 121
==========
Iter: 90
Best pair: (‘underst’, ‘an’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 2, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 1})
Number of tokens: 122
==========
Iter: 91
Best pair: (‘understan’, ‘ding‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 4, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 2, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1})
Number of tokens: 123
==========
Iter: 92
Best pair: (‘ma’, ‘in‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 3, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 3, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1})
Number of tokens: 124
==========
Iter: 93
Best pair: (‘al’, ‘g’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 3, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 1})
Number of tokens: 125
==========
Iter: 94
Best pair: (‘alg’, ‘or’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 3, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 1})
Number of tokens: 126
==========
Iter: 95
Best pair: (‘algor’, ‘i’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 2, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 2, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 0, ‘algori’: 1})
Number of tokens: 127
==========
Iter: 96
Best pair: (‘algori’, ‘th’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 2, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 1, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 1, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 0, ‘algori’: 0, ‘algorith’: 1})
Number of tokens: 128
==========
Iter: 97
Best pair: (‘algorith’, ‘ms‘)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 2, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 4, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 2, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 1, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 0, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 0, ‘algori’: 0, ‘algorith’: 0, ‘algorithms‘: 1})
Number of tokens: 129
==========
Iter: 98
Best pair: (‘n’, ‘at’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 2, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 2, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 3, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 1, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 1, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 0, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 0, ‘algori’: 0, ‘algorith’: 0, ‘algorithms‘: 1, ‘nat’: 1})
Number of tokens: 130
==========
Iter: 99
Best pair: (‘nat’, ‘u’)
Tokens: defaultdict(, {‘T’: 1, ‘h’: 4, ‘e’: 8, ‘‘: 11, ‘a’: 3, ‘i’: 2, ‘m’: 4, ‘s’: 7, ‘f’: 1, ‘o’: 3, ‘r’: 4, ‘t’: 6, ‘u’: 1, ‘b’: 1, ‘j’: 0, ‘c’: 4, ‘d’: 2, ‘n’: 3, ‘v’: 0, ‘l’: 4, ‘p’: 4, ‘g’: 2, ‘,’: 0, ‘x’: 1, ‘q’: 1, ‘w’: 2, ‘.’: 0, ‘-‘: 3, ‘y’: 2, ‘P’: 1, ‘k’: 1, ‘in’: 3, ‘e‘: 7, ‘an’: 0, ‘s‘: 3, ‘ing’: 0, ‘or’: 2, ‘on’: 2, ‘at’: 1, ‘,‘: 4, ‘d‘: 0, ‘ion’: 1, ‘for’: 1, ‘th’: 1, ‘de’: 2, ‘ation’: 1, ‘for‘: 4, ‘st’: 2, ‘ing‘: 2, ‘in‘: 3, ‘us’: 0, ‘ang’: 1, ‘ag’: 1, ‘ic’: 2, ‘is‘: 2, ‘nt’: 1, ‘ve’: 0, ‘op’: 2, ‘of’: 1, ‘al’: 2, ‘ed‘: 1, ‘lang’: 0, ‘langu’: 0, ‘languag’: 0, ‘language‘: 3, ‘ss’: 1, ‘ing,‘: 3, ‘cl’: 1, ‘and‘: 3, ‘.‘: 1, ‘ar’: 1, ‘Th’: 0, ‘The‘: 2, ‘ms‘: 0, ‘ec’: 1, ‘t‘: 1, ‘nts‘: 1, ‘to’: 0, ‘to‘: 2, ‘ding‘: 1, ‘of‘: 2, ‘the‘: 2, ‘ma’: 1, ‘used‘: 2, ‘pr’: 0, ‘pro’: 2, ‘use‘: 2, ‘ver’: 2, ‘ication’: 2, ‘incl’: 0, ‘inclu’: 2, ‘ass’: 0, ‘assi’: 2, ‘ans’: 2, ‘lation’: 2, ‘ing.‘: 2, ‘par’: 2, ‘gr’: 0, ‘gra’: 0, ‘gram’: 2, ‘me’: 2, ‘ai’: 0, ‘aims‘: 1, ‘this‘: 1, ‘su’: 0, ‘sub’: 0, ‘subj’: 0, ‘subjec’: 0, ‘subject‘: 1, ‘stu’: 0, ‘stude’: 0, ‘students‘: 1, ‘deve’: 0, ‘devel’: 0, ‘develop’: 0, ‘develop‘: 1, ‘an‘: 1, ‘un’: 0, ‘unde’: 0, ‘under’: 0, ‘underst’: 0, ‘understan’: 0, ‘understanding‘: 1, ‘main‘: 1, ‘alg’: 0, ‘algor’: 0, ‘algori’: 0, ‘algorith’: 0, ‘algorithms‘: 1, ‘nat’: 0, ‘natu’: 1})
Number of tokens: 131
==========
After training, used the BPE dictionaries to tokenise sentences
In [4]:
def get_tokens_from_vocab(vocab):
tokens_frequencies = collections.defaultdict(int)
vocab_tokenization = {}
for word, freq in vocab.items():
word_tokens = word.split()
for token in word_tokens:
tokens_frequencies[token] += freq
vocab_tokenization[”.join(word_tokens)] = word_tokens
return tokens_frequencies, vocab_tokenization
def measure_token_length(token):
if token[-4:] == ‘‘:
return len(token[:-4]) + 1
else:
return len(token)
def tokenize_word(string, sorted_tokens, unknown_token=’‘):
if string == ”:
return []
if sorted_tokens == []:
return [unknown_token] * len(string)
string_tokens = []
for i in range(len(sorted_tokens)):
token = sorted_tokens[i]
token_reg = re.escape(token.replace(‘.’, ‘[.]’))
matched_positions = [(m.start(0), m.end(0)) for m in re.finditer(token_reg, string)]
# if no match found in the string, go to next token
if len(matched_positions) == 0:
continue
# collect end position of each sub-word in the string
substring_end_positions = [matched_position[0] for matched_position in matched_positions]
substring_start_position = 0
for substring_end_position in substring_end_positions:
# slice for sub-word
substring = string[substring_start_position:substring_end_position]
# tokenize this sub-word with tokens remaining
string_tokens += tokenize_word(string=substring, sorted_tokens=sorted_tokens[i+1:], unknown_token=unknown_token)
string_tokens += [token]
substring_start_position = substring_end_position + len(token)
# tokenize the remaining string
remaining_substring = string[substring_start_position:]
string_tokens += tokenize_word(string=remaining_substring, sorted_tokens=sorted_tokens[i+1:], unknown_token=unknown_token)
break
else:
# return list of unknown token if no match is found for the string
string_tokens = [unknown_token] * len(string)
return string_tokens
def sort_tokens(tokens_frequencies):
sorted_tokens_tuple = sorted(tokens_frequencies.items(), key=lambda item: (measure_token_length(item[0]), item[1]), reverse=True)
sorted_tokens = [token for (token, freq) in sorted_tokens_tuple]
return sorted_tokens
#display the vocab
tokens_frequencies, vocab_tokenization = get_tokens_from_vocab(vocab)
#sort tokens by length and frequency
sorted_tokens = sort_tokens(tokens)
print(“Tokens =”, sorted_tokens, “\n”)
sentence_1 = ‘I like natural language processing!’
sentence_2 = ‘I like natural languaaage processing!’
sentence_list = [sentence_1, sentence_2]
for sentence in sentence_list:
print(‘==========’)
print(“Sentence =”, sentence)
for word in sentence.split():
word = word + “”
print(‘Tokenizing word: {}…’.format(word))
if word in vocab_tokenization:
print(vocab_tokenization[word])
else:
print(tokenize_word(string=word, sorted_tokens=sorted_tokens, unknown_token=’‘))
Tokens = [‘understanding‘, ‘algorithms‘, ‘language‘, ‘students‘, ‘understan’, ‘subject‘, ‘develop‘, ‘algorith’, ‘ication’, ‘languag’, ‘develop’, ‘underst’, ‘lation’, ‘subjec’, ‘algori’, ‘ing,‘, ‘used‘, ‘inclu’, ‘ing.‘, ‘ation’, ‘ding‘, ‘aims‘, ‘this‘, ‘main‘, ‘langu’, ‘stude’, ‘devel’, ‘under’, ‘algor’, ‘for‘, ‘and‘, ‘ing‘, ‘The‘, ‘the‘, ‘use‘, ‘assi’, ‘gram’, ‘nts‘, ‘natu’, ‘lang’, ‘incl’, ‘subj’, ‘deve’, ‘unde’, ‘in‘, ‘is‘, ‘to‘, ‘of‘, ‘pro’, ‘ver’, ‘ans’, ‘par’, ‘ion’, ‘for’, ‘ang’, ‘ed‘, ‘an‘, ‘ing’, ‘ms‘, ‘ass’, ‘gra’, ‘sub’, ‘stu’, ‘alg’, ‘nat’, ‘e‘, ‘,‘, ‘in’, ‘s‘, ‘or’, ‘on’, ‘de’, ‘st’, ‘ic’, ‘op’, ‘al’, ‘me’, ‘at’, ‘th’, ‘ag’, ‘nt’, ‘of’, ‘ss’, ‘cl’, ‘.‘, ‘ar’, ‘ec’, ‘t‘, ‘ma’, ‘an’, ‘d‘, ‘us’, ‘ve’, ‘Th’, ‘to’, ‘pr’, ‘gr’, ‘ai’, ‘su’, ‘un’, ‘‘, ‘e’, ‘s’, ‘t’, ‘h’, ‘m’, ‘r’, ‘c’, ‘l’, ‘p’, ‘a’, ‘o’, ‘n’, ‘-‘, ‘i’, ‘d’, ‘g’, ‘w’, ‘y’, ‘T’, ‘f’, ‘u’, ‘b’, ‘x’, ‘q’, ‘P’, ‘k’, ‘j’, ‘v’, ‘,’, ‘.’]
==========
Sentence = I like natural language processing!
Tokenizing word: I…
[‘‘, ‘‘]
Tokenizing word: like…
[‘l’, ‘i’, ‘k’, ‘e‘]
Tokenizing word: natural…
[‘natu’, ‘r’, ‘al’, ‘‘]
Tokenizing word: language…
[‘language‘]
Tokenizing word: processing!…
[‘pro’, ‘c’, ‘e’, ‘ss’, ‘ing’, ‘‘, ‘‘]
==========
Sentence = I like natural languaaage processing!
Tokenizing word: I…
[‘‘, ‘‘]
Tokenizing word: like…
[‘l’, ‘i’, ‘k’, ‘e‘]
Tokenizing word: natural…
[‘natu’, ‘r’, ‘al’, ‘‘]
Tokenizing word: languaaage…
[‘langu’, ‘a’, ‘a’, ‘ag’, ‘e‘]
Tokenizing word: processing!…
[‘pro’, ‘c’, ‘e’, ‘ss’, ‘ing’, ‘‘, ‘‘]
In [ ]: