# -*- coding: utf-8 -*-

import sys
import unittest
from os.path import abspath, dirname

rootDir = dirname(dirname(abspath(__file__)))
sys.path.insert(0, rootDir)


from pyglossary.glossary_v2 import Glossary
from pyglossary.plugins.ebook_kobo import (
	Writer,
)


class GetPrefixTest(unittest.TestCase):
	def case(self, word, prefix):
		glos = Glossary()
		w = Writer(glos)
		self.assertEqual(
			w.get_prefix(word),
			prefix,
		)

	def test_examples(self):
		# examples from https://pgaskin.net/dictutil/dicthtml/prefixes.html
		self.case("test", "te")
		self.case("a", "aa")
		self.case("Èe", "èe")
		self.case("multiple words", "mu")
		self.case("àççèñts", "àç")
		self.case("à", "àa")
		self.case("ç", "ça")
		self.case("", "11")
		self.case(" ", "11")
		self.case(" x", "xa")
		self.case(" 123", "11")
		self.case("x 23", "xa")
		self.case("д ", "д")
		self.case("дaд", "дa")
		self.case("未未", "未未")
		self.case("未", "未a")
		self.case("  未", "11")
		self.case(" 未", "未a")

	# the rest of test cases are from
	# https://github.com/pgaskin/dictutil/blob/master/kobodict/util_test.go

	def test_dicthtml_en(self):
		self.case("a-", "11")
		self.case("-an", "11")
		self.case("GB", "gb")

	def test_dicthtml_fr(self):
		self.case("ébahir", "éb")
		self.case("a1", "11")
		self.case("ô", "ôa")
		self.case("kébab", "ké")
		self.case("aérer", "aé")
		self.case("living-room", "li")

	# dicthtml-ja
	# Note, Kanji not currently implemented, so not testing (note, the logic
	# is in a separate function, anyways).
	# self.case("あ", "あ")
	# self.case("アークとう", "アー")

	def test_dictword_spaces(self):
		# generated by dictword-test: spaces
		self.case("  ", "11")
		self.case("   ", "11")
		self.case("\t\t", "11")
		self.case("\t\f\t", "11")
		self.case("x ", "xa")
		self.case(" xx", "xa")

		# generated by dictword-test: spaces where trim/prefix order matters
		self.case("  x", "11")
		self.case("  xy", "11")
		self.case("  xyz", "11")
		self.case("x z", "xa")

	def test_dictword_cyrillic(self):
		# generated by dictword-test: cyrillic
		self.case(" д", "д")
		self.case(" дд", "д")
		self.case("д", "д")
		self.case("aд", "aд")
		self.case("дa", "дa")
		self.case("aдa", "aд")

	def test_dictword_uppercase_accented(self):
		# generated by dictword-test: uppercase accented letters
		self.case("Ȅe", "ȅe")
		self.case("eȄ", "eȅ")
		self.case("Ȅ", "ȅa")
		self.case("Ȅ!", "11")

	def test_dictword_cjk(self):
		# generated by dictword-test: cjk
		self.case("x未", "x未")
		self.case("未x", "未x")
		self.case("xy未", "xy")
		self.case("还没", "还没")

	def test_dictword_misc(self):
		# generated by dictword-test: misc
		self.case("!", "11")
		self.case("!!", "11")
		self.case("!!!", "11")
		self.case("x!", "11")
		self.case("x!!", "11")
		self.case("xx!", "xx")
		self.case("xxx!", "xx")
		self.case("  !", "11")
		self.case(" !!", "11")
		self.case(" !!!", "11")
		self.case(" !", "11")
		self.case("  !!", "11")
		self.case("   !!!", "11")
		self.case(" x!", "xa")
		self.case(" x!!", "xa")
		self.case(" xx!", "xa")
		self.case(" xxx!", "xa")

	def test_synthetic(self):
		self.case("x\x00y", "xa")
		self.case("\x00xy", "11")


if __name__ == "__main__":
	unittest.main()
