Mass content adding/software/Flexion generator/cases-serbian.py
Appearance
This work is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or any later version. This work is distributed in the hope that it will be useful, but without any warranty; without even the implied warranty of merchantability or fitness for a particular purpose. See version 2 and version 3 of the GNU General Public License for more details.
|
For the list of recognized authors of this page look at the page Mass content adding/software/Flexion generator/cases-serbian.py/authors.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Copyright (C) 2006 Milos Rancic, Milos Radonjic
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
not included:
- 2nd declinsion, neutrum (2nd declinsion here is 1st declinsion neutrum)
- 4th declinsion, feminine
- alternation l=>o
- for sure something more...
Classes:
- Expression:
getSyllNum() : Returns the number of syllabs
output() : Returns a string that contains information about the inflected expression and can be displayed
- N(oun)
__init__() : N(expression[, wordNumber[, wordType[, gender[, alive[, tantum[, syllabsNumber[, a0[, foreign]]]]]]]])
aviable values: gender: m(asculine), f(eminine), n(euter); alive, a0, foreign: boolean;
tantum: p(luralia tantum), s(ingularia tantum), else: regular
get() : get(<case + s/p>); cases: nom, gen, dat, acc, ins, loc, voc; e.g.: word.get(noms)
- J (hybride adjective)
- A(djective)
- V(erb)
- P(ronoun)
- O (number)
- D (adverb)
- R (preposition)
- C(onjunction)
- T (Particle)
- U(zvik)
"""
import sys
class GenderError(Exception):
""" Raised if the gender could not be determined """
class Expression:
def __init__(self, expression, wordNumber, syllabsNumber, a0, foreign):
self.wordNumber = wordNumber
self.a0 = a0
self.foreign = foreign
self.expression = expression
if syllabsNumber == '':
self.syllabsNumber = self.getSyllNum()
else:
self.syllabsNumber = syllabsNumber
def output(self):
""" Returns a string that contains information about the inflected expression and can be displayed """
if isinstance(self, N):
out = u"*** Singular ***\r\n"
out += u"nom (ko/sta): %s\r\n" % self.case['noms']
out += u"gen (od koga/od cega): %s\r\n" % self.case['gens']
out += u"dat (kome/cemu): %s\r\n" % self.case['dats']
out += u"acc (koga/sta): %s\r\n" % self.case['accs']
out += u"ins (s kim/cim): %s\r\n" % self.case['inss']
out += u"loc (o kome/o cemu): %s\r\n" % self.case['locs']
out += u"voc (hej, oj): %s\r\n\r\n" % self.case['vocs']
out += u"*** Plural ***\r\n"
out += u"nom (ko/sta): %s\r\n" % self.case['nomp']
out += u"gen (od koga/od cega): %s\r\n" % self.case['genp']
out += u"dat (kome/cemu): %s\r\n" % self.case['datp']
out += u"acc (koga/sta): %s\r\n" % self.case['accp']
out += u"ins (s kim/cim): %s\r\n" % self.case['insp']
out += u"loc (o kome/o cemu): %s\r\n" % self.case['locp']
out += u"voc (hej, oj): %s\r\n\r\n" % self.case['vocp']
out += u"******************************************************\r\n\r\n"
else:
out = u'%s does not have any inflected forms' % self.expression
return out
def getSyllNum(self):
"""
This method counts the number of syllabs which is just an approximation and works with 95% of cases;
better approximations will be implemented using some other tools
"""
sylp = [ u"а", u"А", u"е", u"Е", u"и", u"И", u"о", u"О", u"у", u"У" ]
syln = [
u"л", u"Л", u"љ", u"Љ", u"м", u"М", u"н", u"Н",
u"њ", u"Њ", u"ј", u"Ј", u"в", u"В", u"б", u"Б",
u"п", u"П", u"ф", u"Ф", u"д", u"Д", u"т", u"Т",
u"з", u"З", u"с", u"С", u"ц", u"Ц", u"ж", u"Ж",
u"ш", u"Ш", u"џ", u"Џ", u"ч", u"Ч", u"ђ", u"Ђ",
u"ћ", u"Ћ", u"х", u"Х", u"г", u"Г", u"к", u"К"
]
syls = [ u"р", u"Р" ]
num = 0
for w, c in enumerate(self.expression):
c = self.expression[w]
if w>0:
pre = self.expression[w-1]
if w<len(self.expression)-1:
pos = self.expression[w+1]
if c in sylp:
num += 1
elif c in syls:
if w == 0:
if pos in syln:
num += 1
elif w == len(self.expression)-1:
if pre in syln:
num += 1
else:
if pre in syln and pos in syln:
num += 1
self.syllabsNumber = num
return num
class N(Expression):
""" Noun """
def __init__(self, expression, wordNumber=1, wordType=u'n', gender='', alive=False, tantum=u'reg', syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
self.wordType = wordType
self.gender = gender
if gender in ('', None):
self.determineGender()
self.alive = alive
self.tantum = tantum
self.infix_1 = u'' # sing./plur.
self.infix_2 = u'' # sing.
self.infix_3 = u'' # plur.
self.root = {} # dictionary for the roots for every case
self.ending = {} # dictionary for the endings for every case
self.case = {} # case dictionary
self.inflect() # method fills the case dictionary
def get(self, case):
""" Returns the inflected form for the given case """
return self.case[case]
def determineGender(self):
""" This method tries to determine the gender of an expression. Does not work everytime """
if self.gender in (None, ''):
if (self.expression[-1] in (u"о", u"е") or self.expression[-1] == u"а" and self.tantum == 'p') and self.gender != 'f':
self.gender = 'n' # neuter
elif (self.expression[-1] == u"а" or self.expression[-1] == u"е" and self.tantum == 'p') and self.gender != 'n':
self.gender = 'f' #feminine
elif self.gender != 'f':
self.gender = 'm' # masculine
if self.expression.endswith(u"ост"): # 4th declinsion nouns
self.gender = 'f' #feminine
if self.gender in (None, ''):
raise GenderError(u'The gender for this word could not be determined!')
sys.exit()
def testInfixes(self):
""" This method detects needed infixes """
if self.expression[-1] == u"и":
# if pluralia tantum, treat the word as regular for now
if self.tantum == 'p':
self.root['0'] = self.expression[:-1]
else:
self.infix_1 = u'ј' # infix_1
if self.expression.endswith(u"ин"):
self.root['0'] = self.expression[:-2]
self.infix_2 = u"ин" # infix_2
if self.gender == 'm' and self.syllabsNumber == 1:
if self.expression[-1] in (u"љ", u"њ", u"ј", u"ж", u"ш", u"џ", u"ч", u"ђ", u"ћ"):
self.infix_3 = u"ев" # infix_3
else:
self.infix_3 = u"ов"
def testSufixes(self):
""" This method detects needed suffixes """
if self.expression[-1] == u"е":
if self.foreign:
self.root['0'] = self.expression
else:
self.ending['sNoms'] = u"е"
self.ending['sAccs'] = self.ending['sNoms']
self.ending['sInss'] = u"ем"
self.ending['sVocs'] = u"е"
elif self.expression[-1] == u"ј":
self.ending['sInss'] = u"ем"
if self.expression[-1] in (u"о", u"е") and self.gender == 'm':
# male gender -o/-e
self.ending['sNomp'] = u"и"
self.ending['sAccp'] = u"е"
self.ending['sVocp'] = self.ending['sNomp']
## if the thing is alive and has masculine gender, accusative singular is equal to genitive singular ##
if self.gender == 'm' and self.alive:
self.ending['sAccs'] = self.ending['sGens']
elif self.expression[-1] == u"т" and self.gender == "f": # ju > u
self.ending['sInss'] = u"у"
def defineRegularRoots(self):
""" This method sets all roots equal root['0'] """
self.root['noms'] = self.root['0']
self.root['gens'] = self.root['0']
self.root['dats'] = self.root['0']
self.root['accs'] = self.root['0']
self.root['inss'] = self.root['0']
self.root['locs'] = self.root['0']
self.root['vocs'] = self.root['0']
self.root['nomp'] = self.root['0']
self.root['genp'] = self.root['0']
self.root['datp'] = self.root['0']
self.root['accp'] = self.root['0']
self.root['insp'] = self.root['0']
self.root['locp'] = self.root['0']
self.root['vocp'] = self.root['0']
def palatalize(self):
self.defineRegularRoots() # fills the root dictionary
""" This method palatalizes some characters """
if self.root['0'][-1] in (u"к", u"ц") and self.gender == 'm' and self.syllabsNumber > 1:
self.root['vocs'] = self.root['0'][:-1] + u"ч"
self.root['nomp'] = self.root['0'][:-1] + u"ц"
self.root['datp'] = self.root['nomp']
self.root['insp'] = self.root['nomp']
self.root['locp'] = self.root['nomp']
self.root['vocp'] = self.root['nomp']
elif self.root['0'][-1] in (u"г", u"з") and self.gender == 'm' and self.syllabsNumber > 1:
self.root['vocs'] = self.root['0'][:-1] + u"ж"
self.root['nomp'] = self.root['0'][:-1] + u"з"
self.root['datp'] = self.root['nomp']
self.root['insp'] = self.root['nomp']
self.root['locp'] = self.root['nomp']
self.root['vocp'] = self.root['nomp']
elif self.root['0'][-1] == u"х" and self.gender == 'm' and self.syllabsNumber > 1:
self.root['vocs'] = self.root['0'][:-1] + u"ш"
self.root['nomp'] = self.root['0'][:-1] + u"с"
self.root['datp'] = self.root['nomp']
self.root['insp'] = self.root['nomp']
self.root['locp'] = self.root['nomp']
self.root['vocp'] = self.root['nomp']
elif self.root['0'][-1] == u"к" and self.wordType == 'n' and self.gender == 'f':
self.root['dats'] = self.root['0'][:-1] + u"ц"
self.root['locs'] = self.root['dats']
elif self.root['0'][-1] == u"г" and self.wordType == 'n' and self.gender == 'f':
self.root['dats'] = self.root['0'][:-1] + u"з"
self.root['locs'] = self.root['dats']
elif self.root['0'][-1] == u"х" and self.wordType == 'n' and self.gender == 'f':
self.root['dats'] = self.root['0'][:-1] + u"с"
self.root['locs'] = self.root['dats']
elif self.root['0'][-1] == u"т" and self.gender == 'f': # t > ć
self.root['inss'] = self.root['0'][:-1] + u"ћ"
def testAlternation(self):
""" Method for fixing some mistakes if a0 == True """
if self.a0:
if self.root['0'][-2:] == u"ар":
self.root['gens'] = self.root['0'][:-2] + root['0'][-1]
self.root['dats'] = self.root['gens']
self.root['accs'] = self.root['gens']
self.root['inss'] = self.root['gens']
self.root['locs'] = self.root['gens']
self.root['vocs'] = self.root['gens']
self.root['nomp'] = self.root['gens']
self.root['datp'] = self.root['gens']
self.root['accp'] = self.root['gens']
self.root['insp'] = self.root['gens']
self.root['locp'] = self.root['gens']
self.root['vocp'] = self.root['genp']
def inflect(self):
"""
This method determines inflected forms and saves them in the dictionary 'case'.
It is automatically called by the constructor.
"""
### First step: determining the endings for every case
if (self.expression.endswith(u'а') or self.expression[-1] == u"е" and self.tantum == 'p') and self.gender != 'n':
self.root['0'] = self.expression[:-1]
self.ending['sNoms'] = u"а"
self.ending['sGens'] = u"е"
self.ending['sDats'] = u"и"
self.ending['sAccs'] = u"у"
self.ending['sInss'] = u"ом"
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = u"о"
self.ending['sNomp'] = u"е"
self.ending['sGenp'] = u"а"
self.ending['sDatp'] = u"ама"
self.ending['sAccp'] = u"е"
self.ending['sInsp'] = self.ending['sDatp']
self.ending['sLocp'] = self.ending['sDatp']
self.ending['sVocp'] = self.ending['sNomp']
elif (self.expression[-1] in (u"о", u"е") or self.expression[-1] == u"а" and self.tantum == 'p') and self.gender != 'f':
self.root['0'] = self.expression[:-1]
self.ending['sNoms'] = u"о"
self.ending['sGens'] = u"а"
self.ending['sDats'] = u"у"
self.ending['sAccs'] = self.ending['sNoms']
self.ending['sInss'] = u"ом"
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = u"о"
self.ending['sNomp'] = u"а"
self.ending['sGenp'] = u"а"
self.ending['sDatp'] = u"има"
self.ending['sAccp'] = u"а"
self.ending['sInsp'] = self.ending['sDatp']
self.ending['sLocp'] = self.ending['sDatp']
self.ending['sVocp'] = self.ending['sNomp']
# if noun ends with anything other then 'e', 'o', 'a'
# and NOT explicitely feminine => 1st (masculine) declinsion
elif self.gender != 'f':
self.root['0'] = self.expression
self.ending[u'sNoms'] = u""
self.ending[u'sGens'] = u"а"
self.ending[u'sDats'] = u"у"
self.ending[u'sAccs'] = self.ending[u'sNoms']
self.ending[u'sInss'] = u"ом"
self.ending[u'sLocs'] = self.ending[u'sDats']
self.ending[u'sVocs'] = u"е"
self.ending[u'sNomp'] = u"и"
self.ending[u'sGenp'] = u"а"
self.ending[u'sDatp'] = u"има"
self.ending[u'sAccp'] = u"е"
self.ending[u'sInsp'] = self.ending[u'sDatp']
self.ending[u'sLocp'] = self.ending[u'sDatp']
self.ending[u'sVocp'] = self.ending[u'sNomp']
# if anything other AND explicitely feminine AND foreign word => 0 feminine declinsion
elif self.gender == 'f' and self.foreign:
self.root['0'] = self.expression
self.ending['sNoms'] = u""
self.ending['sGens'] = self.ending['sNoms']
self.ending['sDats'] = self.ending['sNoms']
self.ending['sAccs'] = self.ending['sNoms']
self.ending['sInss'] = self.ending['sNoms']
self.ending['sLocs'] = self.ending['sNoms']
self.ending['sVocs'] = self.ending['sNoms']
self.ending['sNomp'] = self.ending['sNoms']
self.ending['sGenp'] = self.ending['sNoms']
self.ending['sDatp'] = self.ending['sNoms']
self.ending['sAccp'] = self.ending['sNoms']
self.ending['sInsp'] = self.ending['sNoms']
self.ending['sLocp'] = self.ending['sNoms']
self.ending['sVocp'] = self.ending['sNoms']
# there are some exceptions: 4th declinsion feminine; not covered yet
# (TODO: old dual numbers)
# exception is, also, noun "doba", neuter
# if anything other AND explicitely feminine => 4th declinsion
# not solved "mati"
else:
self.root['0'] = self.expression
self.ending['sNoms'] = u""
self.ending['sGens'] = u"и"
self.ending['sDats'] = u"и"
self.ending['sAccs'] = self.ending['sNoms']
self.ending['sInss'] = u"ју" # TODO: u"и"
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = u"и"
self.ending['sNomp'] = u"и"
self.ending['sGenp'] = self.ending['sNomp']
self.ending['sDatp'] = u"има"
self.ending['sAccp'] = self.ending['sNomp']
self.ending['sInsp'] = self.ending['sDatp']
self.ending['sLocp'] = self.ending['sDatp']
self.ending['sVocp'] = self.ending['sNomp']
### Second step: detecting mistakes, infixes, etc.
self.testInfixes() # tests if the word needs infixes
self.palatalize() # sets roots for every case
self.testAlternation() # tests if the word alternates a > 0
self.testSufixes() # tests some sufix attributes of the word
### Third step: joining all parts together. Pattern: CASE = ROOT + INFIX(es) + ENDING
if isinstance(self, (N, A, P, O)):
if self.tantum != 'p':
self.case['noms'] = self.root['noms'] + self.infix_2 + self.ending['sNoms']#
self.case['gens'] = self.root['gens'] + self.infix_1 + self.infix_2 + self.ending['sGens']#
self.case['dats'] = self.root['dats'] + self.infix_1 + self.infix_2 + self.ending['sDats']#
self.case['accs'] = self.root['accs'] + self.infix_1 + self.infix_2 + self.ending['sAccs']#
self.case['inss'] = self.root['inss'] + self.infix_1 + self.infix_2 + self.ending['sInss']#
self.case['locs'] = self.root['locs'] + self.infix_1 + self.infix_2 + self.ending['sLocs']#
self.case['vocs'] = self.root['vocs'] + self.infix_2 + self.ending['sVocs']#
else:
self.case['noms'] = "n/a"
self.case['gens'] = self.case['noms']
self.case['dats'] = self.case['noms']
self.case['accs'] = self.case['noms']
self.case['inss'] = self.case['noms']
self.case['locs'] = self.case['noms']
self.case['vocs'] = self.case['noms']
if self.tantum != 's':
self.case['nomp'] = self.root['nomp'] + self.infix_1 + self.infix_3 + self.ending['sNomp']#
self.case['genp'] = self.root['genp'] + self.infix_1 + self.infix_3 + self.ending['sGenp']#
self.case['datp'] = self.root['datp'] + self.infix_1 + self.infix_3 + self.ending['sDatp']#
self.case['accp'] = self.root['accp'] + self.infix_1 + self.infix_3 + self.ending['sAccp']#
self.case['insp'] = self.case['datp']
self.case['locp'] = self.case['datp']
self.case['vocp'] = self.case['nomp']
else:
self.case['nomp'] = "n/a"
self.case['genp'] = self.case['noms']
self.case['datp'] = self.case['noms']
self.case['accp'] = self.case['noms']
self.case['insp'] = self.case['noms']
self.case['locp'] = self.case['noms']
self.case['vocp'] = self.case['noms']
class J(Expression, N):
""" Adjective which became a noun (hybrid declinsion) """
def __init__(self, expression, wordNumber=1, wordType=u'j', gender='', alive=True, tantum=u'reg', syllabsNumber=1, a0=False, foreign=False):
N.__init__(self, expression, wordNumber, wordType, gender, alive, tantum, syllabsNumber, a0, foreign)
def inflect(self): # overriding N.inflect()
if self.gender == 'f':
self.root['0'] = self.expression[:-1]
self.ending['sNoms'] = u'а'
self.ending['sGens'] = u'е'
self.ending['sDats'] = u'ој'
self.ending['sAccs'] = u'у'
self.ending['sInss'] = u'ом'
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = u'о'
self.ending['sNomp'] = u'е'
self.ending['sGenp'] = u'их'
self.ending['sDatp'] = u'им(а)'
self.ending['sAccp'] = self.ending['sNomp']
# self.ending['sInsp'] = self.ending['sDatp']
# self.ending['sLocp'] = self.ending['sDatp']
# self.ending['sVocp'] = self.ending['sNomp']
elif self.gender == 'm':
self.root['0'] = self.expression[:-1]
self.ending['sNoms'] = u'и'
self.ending['sGens'] = u'ог(а)'
self.ending['sDats'] = u'ом(е)'
self.ending['sAccs'] = u'ог' # alive == True
self.ending['sInss'] = u'им'
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = self.ending['sNoms']
self.ending['sNomp'] = u'и'
self.ending['sGenp'] = u'их'
self.ending['sDatp'] = u'им(а)'
self.ending['sAccp'] = u'е'
# self.ending['sInsp'] = self.ending['sDatp']
# self.ending['sLocp'] = self.ending['sDatp']
# self.ending['sVocp'] = self.ending['sNomp']
elif self.gender == 'n':
self.root['0'] = self.expression[:-1]
self.ending['sNoms'] = u'о'
self.ending['sGens'] = u'ог(а)'
self.ending['sDats'] = u'ом(е)'
self.ending['sAccs'] = self.ending['sNoms']
self.ending['sInss'] = u'им'
self.ending['sLocs'] = self.ending['sDats']
self.ending['sVocs'] = self.ending['sNoms']
self.ending['sNomp'] = u'а'
self.ending['sGenp'] = u'их'
self.ending['sDatp'] = u'им(а)'
self.ending['sAccp'] = self.ending['sNomp']
# self.ending['sInsp'] = self.ending['sDatp']
# self.ending['sLocp'] = self.ending['sDatp']
# self.ending['sVocp'] = self.ending['sNomp']
self.testSufixes()
self.palatalize()
self.case['noms'] = self.root['noms'] + self.ending['sNoms']#
self.case['gens'] = self.root['gens'] + self.ending['sGens']#
self.case['dats'] = self.root['dats'] + self.ending['sDats']#
self.case['accs'] = self.root['accs'] + self.ending['sAccs']#
self.case['inss'] = self.root['inss'] + self.ending['sInss']#
self.case['locs'] = self.root['locs'] + self.ending['sLocs']#
self.case['vocs'] = self.root['vocs'] + self.ending['sVocs']#
self.case['nomp'] = self.root['nomp'] + self.ending['sNomp']#
self.case['genp'] = self.root['genp'] + self.ending['sGenp']#
self.case['datp'] = self.root['datp'] + self.ending['sDatp']#
self.case['accp'] = self.root['accp'] + self.ending['sAccp']#
self.case['insp'] = self.case['datp']
self.case['locp'] = self.case['datp']
self.case['vocp'] = self.case['nomp']
def testSufixes(self): # overriding N.testSufixes()
if self.gender == 'm' and not self.alive:
self.ending['sAccs'] = u'и'
def determineGender(self): # overriding N.determineGender()
if self.expression.endswith(u'а'):
self.gender = 'f'
elif self.expression.endswith(u'о'):
self.gender = 'n'
elif self.expression.endswith(u'и'):
self.gender = 'm'
else:
raise GenderError(u'This word does not seem to be a hybride adjective!')
sys.exit()
return self.gender
class A(Expression):
""" Adjective """
def __init__(self, gender):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
self.gender = gender
class P(Expression):
""" Pronoun """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class O(Expression):
""" Number """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class V(Expression):
""" Verb """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class D(Expression):
""" Adverb """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class R(Expression):
""" Preposition """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class C(Expression):
""" Conjunction """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class T(Expression):
""" Particle """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)
class U(Expression):
""" 'Uzvik' """
def __init__(self, expression, wordNumber=1, syllabsNumber='', a0=False, foreign=False):
Expression.__init__(self, expression, wordNumber, syllabsNumber, a0, foreign)