程序代写代做代考 # -*- coding: UTF-8 -*-

# -*- coding: UTF-8 -*-
“””
Universität Tübingen – Seminar für Sprachwissenschaft
VL ‘Programming and Data Analysis’ WS 2019-2020
© Johannes Dellert, Gerhard Jäger

Assignment 07: Analyzing the Spanish Copulas
Tests
“””

import unittest, pickle
from ex_07 import *

file_name = “spanish_tagged_spacy.txt”

class TestTask1(unittest.TestCase):

def setUp(self):
# load tagged sentences from file
self.sentences = load_sentences(file_name)
# store loaded sentences in pickle file to make testing later tasks faster
with open(“sentences.pkl”, “wb”) as sents_pkl:
pickle.dump(self.sentences, sents_pkl)

# Test 1: successful processing
def test_load_sentences1(self):
self.assertIsNotNone(self.sentences)

# Test 2: result is a list of correct size
def test_load_sentences2(self):
self.assertIsInstance(self.sentences, type(list()))
self.assertEqual(len(self.sentences), 71201)

# Test 3: each sentence is a list of tuples of length 2
def test_load_sentences3(self):
self.assertIsInstance(self.sentences[0], type(list()))
self.assertIsInstance(self.sentences[0][0], type(tuple()))
self.assertEqual(len(self.sentences[0][0]), 2)

# Test 4: sentences have the correct number of tokens
def test_load_sentences4(self):
self.assertEqual(len(self.sentences[0]), 6)
self.assertEqual(len(self.sentences[1]), 9)

# Test 5: first elements of tuples contain the words:
def test_load_sentences5(self):
self.assertEqual(self.sentences[0][0][0], “No”)
self.assertEqual(self.sentences[0][1][0], “,”)
self.assertEqual(self.sentences[2][0][0], “¿”)
self.assertEqual(self.sentences[2][1][0], “Qué”)

# Test 6: second elements of tuples contain the tags:
def test_load_sentences6(self):
self.assertEqual(self.sentences[0][0][1], “ADV”)
self.assertEqual(self.sentences[0][1][1], “PUNCT”)
self.assertEqual(self.sentences[2][0][1], “PUNCT”)
self.assertEqual(self.sentences[2][1][1], “PRON”)

class TestTask2(unittest.TestCase):

# Test 7: no change for forms ending in -o
def test_lemmatize1(self):
self.assertEqual(lemmatize(“pequeño”),”pequeño”)

# Test 8: normalize adjectives in -a to -o
def test_lemmatize2(self):
self.assertEqual(lemmatize(“pequeña”),”pequeño”)

# Test 9: normalize adjectives in -os to -o
def test_lemmatize3(self):
self.assertEqual(lemmatize(“pequeños”),”pequeño”)

# Test 10: normalize adjectives in -as to -o
def test_lemmatize4(self):
self.assertEqual(lemmatize(“pequeñas”),”pequeño”)

# Test 11: normalize adjectives in -ces to -z
def test_lemmatize5(self):
self.assertEqual(lemmatize(“capaces”),”capaz”)

# Test 12: normalize adjectives in -és
def test_lemmatize6(self):
self.assertEqual(lemmatize(“ingleses”),”inglés”)
self.assertEqual(lemmatize(“inglesa”),”inglés”)
self.assertEqual(lemmatize(“inglesas”),”inglés”)

# Test 13: normalize to -nte/-ble/-bre/-nse
def test_lemmatize7(self):
self.assertEqual(lemmatize(“interesantes”),”interesante”)
self.assertEqual(lemmatize(“agradables”),”agradable”)
self.assertEqual(lemmatize(“libres”),”libre”)
self.assertEqual(lemmatize(“canadienses”),”canadiense”)

# Test 14: normalize to -l/-r/-n
def test_lemmatize8(self):
self.assertEqual(lemmatize(“iguales”),”igual”)
self.assertEqual(lemmatize(“lanares”),”lanar”)

# Test 15: normalize other adjectives in -es to -e
def test_lemmatize9(self):
self.assertEqual(lemmatize(“tristes”),”triste”)
self.assertEqual(lemmatize(“grandes”),”grande”)

# Test 16: no change for other forms
def test_lemmatize10(self):
self.assertEqual(lemmatize(“capaz”),”capaz”)
self.assertEqual(lemmatize(“inglés”),”inglés”)
self.assertEqual(lemmatize(“igual”),”igual”)

class TestTask3(unittest.TestCase):

def setUp(self):
# load sentences generated in Task 1
with open(“sentences.pkl”, “rb”) as sents_pkl:
self.sentences = pickle.load(sents_pkl)

# generate frequency dictionaries
self.freq_ser, self.freq_estar = count_occurrences(self.sentences)
# store generated frequency dictionaries in pickle file to make testing later tasks faster
with open(“freqs.pkl”, “wb”) as freqs_pkl:
pickle.dump((self.freq_ser, self.freq_estar), freqs_pkl)

# Test 17: successful processing
def test_count_occurrences1(self):
self.assertIsNotNone(self.freq_ser)
self.assertIsNotNone(self.freq_estar)

# Test 18: correct size of dicts
def test_count_occurrences2(self):
self.assertEqual(len(self.freq_ser), 2421)
self.assertEqual(len(self.freq_estar), 2421)

# Test 19: keys are strings
def test_count_occurrences3(self):
self.assertIsInstance(next(iter(self.freq_ser.keys())), type(“”))
self.assertIsInstance(next(iter(self.freq_estar.keys())), type(“”))

# Test 20: values are integers
def test_count_occurrences4(self):
self.assertIsInstance(next(iter(self.freq_ser.values())), type(1))
self.assertIsInstance(next(iter(self.freq_estar.values())), type(1))

# Test 21: correct counts in freq_ser
def test_count_occurrences5(self):
self.assertEqual(self.freq_ser[“diferente”],94)
self.assertEqual(self.freq_ser[“amarillo”],34)

# Test 22: correct counts in freq_estar
def test_count_occurrences6(self):
self.assertEqual(self.freq_estar[“acostado”],10)
self.assertEqual(self.freq_estar[“parado”],65)

# Test 23: first keys in alphabetical order are identical and correct
def test_count_occurrences7(self):
self.assertEqual(sorted(self.freq_ser.keys())[0:5],sorted(self.freq_estar.keys())[0:5])
sorted_list = sorted(self.freq_ser.keys())
self.assertEqual(sorted_list[2],”abandonado”)
self.assertEqual(sorted_list[3],”abarrotado”)
self.assertEqual(sorted_list[4],”abducido”)

# Test 24: zero counts in the respective other frequency dictionary
def test_count_occurrences8(self):
self.assertEqual(self.freq_ser[“fundido”], 0)
self.assertEqual(self.freq_ser[“adornado”], 0)
self.assertEqual(self.freq_estar[“inaccesible”], 0)
self.assertEqual(self.freq_estar[“inglés”], 0)

class TestTask4(unittest.TestCase):

def setUp(self):
# load sentences generated in Task 1
with open(“sentences.pkl”, “rb”) as sents_pkl:
self.sentences = pickle.load(sents_pkl)
# load frequency dictionaries generated in Task 3
with open(“freqs.pkl”, “rb”) as freqs_pkl:
self.freq_ser, self.freq_estar = pickle.load(freqs_pkl)

# generate occcurrence sets
self.ser, self.estar, self.both = get_occurrence_sets(self.freq_ser, self.freq_estar)
# store generated occurrence sets in pickle file to make testing later tasks faster
with open(“sets.pkl”, “wb”) as sets_pkl:
pickle.dump((self.ser, self.estar, self.both), sets_pkl)

# Test 25: successful processing
def test_get_occurrence_sets1(self):
self.assertIsNotNone(self.ser)
self.assertIsNotNone(self.estar)
self.assertIsNotNone(self.both)

# Test 26: all three results are sets
def test_get_occurrence_sets2(self):
self.assertIsInstance(self.ser, type(set()))
self.assertIsInstance(self.estar, type(set()))
self.assertIsInstance(self.both, type(set()))

# Test 27: no overlap between the three sets
def test_get_occurrence_sets3(self):
self.assertEqual(len(self.ser & self.estar), 0)
self.assertEqual(len(self.ser & self.both), 0)
self.assertEqual(len(self.both & self.estar), 0)

# Test 28: clear cases in self.ser
def test_get_occurrence_sets4(self):
self.assertIn(“pobre”,self.ser)
self.assertIn(“lindo”,self.ser)

# Test 29: clear cases in self.estar
def test_get_occurrence_sets5(self):
self.assertIn(“ocupado”,self.estar)
self.assertIn(“vivo”,self.estar)

# Test 30: clear cases in self.both
def test_get_occurrence_sets6(self):
self.assertIn(“bueno”,self.both)
self.assertIn(“bajo”,self.both)

# Test 31: correct sizes of sets
def test_get_occurrence_sets7(self):
self.assertEqual(len(self.ser), 152)
self.assertEqual(len(self.estar), 90)
self.assertEqual(len(self.both), 87)

# Test 32: boundary cases (e.g. = 10 and < 10 overall, 1 to 9) def test_get_occurrence_sets8(self): self.assertIn("típico",self.ser) self.assertIn("feo",self.both) self.assertIn("abierto",self.both) class TestTask5(unittest.TestCase): def setUp(self): with open("bibliography.tex") as bib_file: self.contents = bib_file.read() # Test 33: paper titles are extract correctly def test_authors(self): self.assertEqual(search_bibliography("authors", self.contents), ['Barwise, J.', 'Chomsky, N. \\& Lightfoot, D.', 'Chomsky, N.', 'Montague, R.', 'Montague, R.', 'OGrady, W., Aronoff, M. \\& Dobrovolsky, M.']) # Test 34: years are extracted correctly def test_years(self): self.assertEqual(search_bibliography("years", self.contents), ['1977', '2002', '2014', '1970', '1973', '1989']) # Test 35: book pages are extracted correctly def test_pages(self): self.assertEqual(search_bibliography("pages", self.contents), ['5--46', '373--398', '221--224']) # Test 36: book titles are extracted correctly def test_booktitles(self): self.assertEqual(search_bibliography("colltitles", self.contents), ['Studies in Logic and the Foundations of Mathematics', 'Approaches to natural language']) # Test 37: authors are swapped correctly def test_transform_names(self): self.assertIn("N. Chomsky \\& D. Lightfoot (2002)", search_bibliography("transform_names", self.contents)) if __name__ == '__main__': unittest.main()