############################################################# # # # arabic.ccg # # # ############################################################# # Author: Ben Wing <ben@666.com> # Date: April 2006 # This is a grammar for a fragment of Arabic. It's particularly # useful for demonstrating the extended use of macros to handle # complicated morphological inflections. # See the `tiny' grammar (tiny.ccg) for more info about the format # of this file. feature { CASE<2>: nom, acc, gen; NUM<2>: sg, du, pl; GEND<2>: m, f; STATE<2>: cons, non-cons {indef, def}; ANIM<2>: hum, nonhum; PERS<2>: 1st, 2nd, 3rd; RESUMPTIVE<2>: nonres, res; SEM-NUM<X:NUM>: sg-X, du-X, pl-X; SEM-PERS<X:PERS>: 1st-X, 2nd-X, 3rd-X; TENSE<E>: past, pres; MOOD<E>: indic, subj, juss; # Here's a more complicated hierarchy, from the original tiny grammar. ontology: sem-obj { phys-obj { animate-being { person }, thing }, situation { change { action }, state } }; } rule { no typeraise; typeraise +: n => s; typeraise - $: n => s; typeraise - $: pp => s; typeraise - $: pp/n => s; typechange: s$1 | n[nom] => s$1 ; typechange: n<~2>[cons] => n<2>[3rd,def] /* n[gen,def] ; typechange: n<~2>[cons] => n<2>[3rd,indef] /* n[gen,indef] ; } ########################################################################## # Morphological entries # # (morph.xml) # ########################################################################## word wa:Conj; # "and" word anna:Comp; # "that", introducing sentential complements word inna:Comp; # same, but only after the verb qaal "say" word maa:InterrogPro(thing): 3rd; # "what" word man:InterrogPro(person): 3rd; # "who" word li:Prep; # "what" word fii:Prep; # "who" # This word means "this". word haadhaa { *: sg, m; haadhihi: sg, f; ha_ulaahi: pl; # Bizarrely, this word declines for case only in the dual. haadhaani: du, m, nom; haadhayni: du, m, acc; haadhayni: du, m, gen; haataani: du, f, nom; haatayni: du, f, acc; haatayni: du, f, gen; } # This word means "that". word dhaalik { *: sg, m; tilka: sg, f; ulaa_ika: pl; # Bizarrely, this word declines for case only in the dual. dhaanika: du, m, nom; dhaynika: du, m, acc; dhaynika: du, m, gen; taanika: du, f, nom; taynika: du, f, acc; taynika: du, f, gen; } # This is the relative pronoun. word al-ladhii { *: sg, m; al-latii: sg, f; al-ladhiina: pl, m; al-laati: pl, f; # Bizarrely, this word declines for case only in the dual. al-ladhaani: du, m, nom; al-ladhayni: du, m, acc; al-ladhayni: du, m, gen; al-lataani: du, f, nom; al-latayni: du, f, acc; al-latayni: du, f, gen; } word pro:Pro { ana: 1st, 1st-X, sg, sg-X; anta: 2nd, 2nd-X, sg, sg-X, m; anti: 2nd, 2nd-X, sg, sg-X, f; huwa: 3rd, 3rd-X, sg, sg-X, m; hiya: 3rd, 3rd-X, sg, sg-X, f; naHnu: 1st, 1st-X, pl, pl-X; antun: 2nd, 2nd-X, pl, pl-X, m; antunna: 2nd, 2nd-X, pl, pl-X, f; hum: 3rd, 3rd-X, pl, pl-X, m; hunna: 3rd, 3rd-X, pl, pl-X, f; } word ii:: 1st, 1st-X, sg, sg-X; word nii:: 1st, 1st-X, sg, sg-X; word ka:: 2nd, 2nd-X, sg, sg-X, m; word ki:: 2nd, 2nd-X, sg, sg-X, f; word hu:: 3rd, 3rd-X, sg, sg-X, m; word haa:: 3rd, 3rd-X, sg, sg-X, f; word naa:: 1st, 1st-X, pl, pl-X; word kum:: 2nd, 2nd-X, pl, pl-X, m; word kunna:: 2nd, 2nd-X, pl, pl-X, f; word hum:: 3rd, 3rd-X, pl, pl-X, m; word hunna:: 3rd, 3rd-X, pl, pl-X, f; ############################################# # Nouns # ############################################# # This shows how a reasonably complicated morphology can be accommodated. # It is certainly possible that some of this may (and probably should) # be offloaded into a separate morphology-processing engine. However, # even in that case there is often a good deal more to the lexicon. # We show a couple examples of complete paradigms, in order to make it # easier to understand what's going on below. # Here is a typical noun (kitaab "book") with a broken plural (kutub "books"). # For nouns with broken plurals, the plural is typically declined like # the singular. Note that Arabic nouns are conjugated for three numbers # (singular, dual, plural), three cases (nominative, accusative, dative), # and three states (indefinite, definite, construct). (The construct state # is used for nouns that are modified by other nouns -- e.g. "book" in # "the book of Mary".) # Form Nominative Accusative Dative # --------------------------------------------------------------- # sg.indef kitaabun kitaaban kitaabin # sg.def al-kitaabu al-kitaaba al-kitaabi # sg.cons kitaabu kitaaba kitaabi # # du.indef kitaabaani kitaabayni kitaabayni # du.def al-kitaabaani al-kitaabayni al-kitaabayni # du.cons kitaabaa kitaabay kitaabay # # pl.indef kutubun kutuban kutubin # pl.def al-kutubu al-kutuba al-kutubi # pl.cons kutubu kutuba kutubi # Here is a typical noun (mudarris "teacher") with a different kind of # plural, a so-called "strong masculine plural", which has its own declension. # Form Nominative Accusative Dative # --------------------------------------------------------------- # sg.indef mudarrisun mudarrisan mudarrisin # sg.def al-mudarrisu al-mudarrisa al-mudarrisi # sg.cons mudarrisu mudarrisa mudarrisi # # du.indef mudarrisaani mudarrisayni mudarrisayni # du.def al-mudarrisaani al-mudarrisayni al-mudarrisayni # du.cons mudarrisaa mudarrisay mudarrisay # # pl.indef mudarrisuuna mudarrisiina mudarrisiina # pl.def al-mudarrisuuna al-mudarrisiina al-mudarrisiina # pl.cons mudarrisuu mudarrisii mudarrisii # Here, we make heavy use of macros. # This macro says: Every time an expression of the form # three-form-decl(...) occurs, replace it with the text that comes after. # The parameters will be substituted into the text. The braces that # denote the macro's text do *NOT* form part of the text that is substituted. # Note that macro substitutions are processed recursively: If the text # of a macro substitution contains calls to other macros, they will also # be processed. This makes "inheritance" very easy to implement. # This macro is used for a particular paradigm corresponding to a # particular number of a word. def three-different-form-decl(indef-form, def-form, cons-form, indef-nom, indef-acc, indef-gen, def-nom, def-acc, def-gen, cons-nom, cons-acc, cons-gen, morph-num, sem-num, gend) { indef-form.indef-nom: morph-num, sem-num, gend, nom, indef; indef-form.indef-acc: morph-num, sem-num, gend, acc, indef; indef-form.indef-gen: morph-num, sem-num, gend, gen, indef; add-al(def-form.def-nom): morph-num, sem-num, gend, nom, def; add-al(def-form.def-acc): morph-num, sem-num, gend, acc, def; add-al(def-form.def-gen): morph-num, sem-num, gend, gen, def; cons-form.cons-nom: morph-num, sem-num, gend, nom, cons; cons-form.cons-acc: morph-num, sem-num, gend, acc, cons; cons-form.cons-gen: morph-num, sem-num, gend, gen, cons; } # It's questionable whether we should do this. This assimilates al- # to a following coronal consonant, e.g. ar-rajul, as-sigaara, # ath-thalj, an-nuur, aDH-DHuhr, etc. def add-al(form) regsub('^al-([std]h|DH|[tdszrnTDSZL])', 'a\1-\1', al-.form) def three-form-decl(form, indef-nom, indef-acc, indef-gen, def-nom, def-acc, def-gen, cons-nom, cons-acc, cons-gen, morph-num, sem-num, gend) { three-different-form-decl(form, form, form, indef-nom, indef-acc, indef-gen, def-nom, def-acc, def-gen, cons-nom, cons-acc, cons-gen, morph-num, sem-num, gend) } # Using the above macro, we create two more macros to handle two common # paradigm types: Accusative and genitive are the same, and the # definite is either the same as the construct (two-form-decl-1) or # the same as the indefinite (two-form-decl-2). def two-form-decl-1(form, non-cons-nom, non-cons-obl, cons-nom, cons-obl, morph-num, sem-num, gend) { three-form-decl(form, non-cons-nom, non-cons-obl, non-cons-obl, non-cons-nom, non-cons-obl, non-cons-obl, cons-nom, cons-obl, cons-obl, morph-num, sem-num, gend) } def two-form-decl-2(form, indef-nom, indef-obl, non-indef-nom, non-indef-obl, morph-num, sem-num, gend) { three-form-decl(form, indef-nom, indef-obl, indef-obl, non-indef-nom, non-indef-obl, non-indef-obl, non-indef-nom, non-indef-obl, non-indef-obl, morph-num, sem-num, gend) } # In turn we create macros for particular paradigms: strong masculine ("uun"), # strong feminine ("aat"), dual, and basic triptote (the paradigm for # "kitaab" above and, in general, most singulars). # Note that an alternative to using braces is to put the macro text on # the same line as the `def' part of the macro (backslashes can be used # to join multiple lines together). def uun-plural(form) two-form-decl-1(form, uuna, iina, uu, ii, pl, pl-X, m) def aat-plural(form) \ two-form-decl-2(form, aatun, aatin, aatu, aati, pl, pl-X, f) def dual(form, gend) \ two-form-decl-1(form, aani, ayni, aa, ay, du, du-X, gend) def triptote(form, morph-num, sem-num, gend) \ three-form-decl(form, un, an, in, u, a, i, u, a, i, morph-num, sem-num, gend) # Here we define macros for full paradigms for words. Note how semicolons # are not used, because they are supplied by the macro text itself. # (Consult the text for three-form-decl() above, and remember that the # braces denoting the macro text are not actually part of the text. This # means that if you really want braces as the outermost thing in some # macro text, you'll need to supply two levels of braces.) def thing(sing, plur) { word sing:N(thing) { triptote(sing, sg, sg-X, m) dual(sing, m) triptote(plur, sg, pl-X, f) } } def fem-thing(sing, plur) { word sing:N(thing) { triptote(sing.t, sg, sg-X, f) dual(sing.t, f) triptote(plur, sg, pl-X, f) } } def person(sing, plur, gend) { word sing:N(person) { triptote(sing, sg, sg-X, gend) dual(sing, gend) triptote(plur, pl, pl-X, gend) } } def male(sing, plur) person(sing, plur, m) def female(sing, plur) person(sing, plur, f) def strong-male(sing) { word sing:N(person) { triptote(sing, sg, sg-X, m) dual(sing, m) uun-plural(sing) } } # Here we define the actual words. Note how short these definitions are, # specifying only what's unpredictable. thing(kitaab, kutub) thing(waqt, _awqaat) thing(Harf, Huruuf) thing(dars, duruus) thing(waqt, _awqaat) fem-thing(sigaara, sagaayir) fem-thing(madiina, mudun) male(rajul, rijaal) male(walad, _awlaad) male(Taalib, Tullaab) female(bint, _abnaat) strong-male(mudarris) word imra_a:N(person) { three-different-form-decl(imra_at, mar_at, imra_at, un, an, in, u, a, i, u, a, i, sg, sg-X, f) three-different-form-decl(imra_at, mar_at, imra_at, aani, ayni, ayni, aani, ayni, ayni, aa, ay, ay, du, du-X, f) triptote(nisaa_, pl, pl-X, f) } def extended_construct_word(stem, plur) { word stem:N(person) { three-form-decl(stem, un, an, in, u, a, i, uu, aa, ii, sg, sg-X, m) dual(stem, m) triptote(plur, pl, pl-X, m) } } extended_construct_word(_ax, _ixwaan) extended_construct_word(_ab, _abnaa_) # Typical paradigms: # 1sg 'aktaa 'aktaa 'akta | # 2sg.m taktaa taktaa takta | # 2sg.f taktayna taktay taktay | # 3sg.m yaktaa yaktaa yakta | # 3sg.f taktaa taktaa takta | # 2du taktayaani taktayaa taktayaa | # 3du.m yaktayaani yaktayaa yaktayaa | # 3du.f taktayaani taktayaa taktayaa | # 1pl naktaa naktaa nakta | # 2pl.m taktawna taktaw taktaw | # 2pl.f taktayna taktayna taktayna | # 3pl.m yaktawna yaktaw yaktaw | # 3pl.f yaktayna yaktayna yaktayna | # # 1sg 'aktuu 'aktuwa 'aktu | 'aktii 'aktiya 'akti # 2sg.m taktuu taktuwa taktu | taktii taktiya takti # 2sg.f taktiina taktii taktii | taktiina taktii taktii # 3sg.m yaktuu yaktuwa yaktu | yaktii yaktiya yakti # 3sg.f taktuu taktuwa taktu | taktii taktiya takti # 2du taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa # 3du.m yaktuwaani yaktuwaa yaktuwaa | yaktiyaani yaktiyaa yaktiyaa # 3du.f taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa # 1pl naktuu naktuwa naktu | naktii naktiya nakti # 2pl.m taktuuna taktuu taktuu | taktuuna taktuu taktuu # 2pl.f taktuuna taktuuna taktuuna | taktiina taktiina taktiina # 3pl.m yaktuuna yaktuu yaktuu | yaktuuna yaktuu yaktuu # 3pl.f yaktuuna yaktuuna yaktuuna | yaktiina yaktiina yaktiina def two-form-past(formv, formc) { formc.tu: past, 1st, sg; formc.ta: past, 2nd, m, sg; formc.ti: past, 2nd, f, sg; formv.a: past, 3rd, m, sg; formv.at: past, 3rd, f, sg; formc.tumaa: past, 2nd, du; formv.aa: past, 3rd, m, du; formv.ataa: past, 3rd, f, du; formc.naa: past, 1st, pl; formc.tum: past, 2nd, m, pl; formc.tunna: past, 2nd, f, pl; formv.uu: past, 3rd, m, pl; formc.na: past, 3rd, f, pl; } def 3rd-weak-past-ay(form) { form.ay.tu: past, 1st, sg; form.ay.ta: past, 2nd, m, sg; form.ay.ti: past, 2nd, f, sg; form.aa: past, 3rd, m, sg; form.at: past, 3rd, f, sg; form.ay.tumaa: past, 2nd, du; form.ay.aa: past, 3rd, m, du; form.ataa: past, 3rd, f, du; form.ay.naa: past, 1st, pl; form.ay.tum: past, 2nd, m, pl; form.ay.tunna: past, 2nd, f, pl; form.aw: past, 3rd, m, pl; form.ay.na: past, 3rd, f, pl; } def 3rd-weak-past-aw(form) { form.aw.tu: past, 1st, sg; form.aw.ta: past, 2nd, m, sg; form.aw.ti: past, 2nd, f, sg; form.aa: past, 3rd, m, sg; form.at: past, 3rd, f, sg; form.aw.tumaa: past, 2nd, du; form.aw.aa: past, 3rd, m, du; form.ataa: past, 3rd, f, du; form.aw.naa: past, 1st, pl; form.aw.tum: past, 2nd, m, pl; form.aw.tunna: past, 2nd, f, pl; form.aw: past, 3rd, m, pl; form.aw.na: past, 3rd, f, pl; } def 3rd-weak-past-ii(form) { form.ii.tu: past, 1st, sg; form.ii.ta: past, 2nd, m, sg; form.ii.ti: past, 2nd, f, sg; form.iya: past, 3rd, m, sg; form.iyat: past, 3rd, f, sg; form.ii.tumaa: past, 2nd, du; form.iy.aa: past, 3rd, m, du; form.iy.ataa: past, 3rd, f, du; form.ii.naa: past, 1st, pl; form.ii.tum: past, 2nd, m, pl; form.ii.tunna: past, 2nd, f, pl; form.uu: past, 3rd, m, pl; form.ii.na: past, 3rd, f, pl; } def strong-past(form) two-form-past(form, form) # In general, almost all Arabic present-tense verbs of a particular mood # can be defined using five forms. Verbs with a hamza in the first radical # have a problem in the first-singular; ideally this should be handled # automatically using a regexp or something of that sort, but we don't have # such support currently, so we use an optional param. def gen-pres(mood, fsing, fsing-fem, fdual, fplur-masc, fplur-fem) { # This shows how you can use regular expressions if need be. # regsub(string, regex, repl) is a special built-in that does regular- # expression substitution on STRING, replacing all occurrences of # REGEX with REPL. Regular-expression syntax is as in Python. # In this case, Arabic verbs have a phonetic rule that eliminates # two glottal stops occurring near each other at the beginning of a # word. For example, _a_kulu -> _aakulu, and _u_kalu -> _uukalu. # (That is, the vowel is lengthened.) # _ . regsub(foo, bar, fsing): pres, mood, 1st, sg; _ . regsub('^([aiu])_', '\1\1', fsing): pres, mood, 1st, sg; # _.fsing: pres, mood, 1st, sg; t.fsing: pres, mood, 2nd, m, sg; t.fsing-fem: pres, mood, 2nd, f, sg; y.fsing: pres, mood, 3rd, m, sg; t.fsing: pres, mood, 3rd, f, sg; t.fdual: pres, mood, 2nd, du; y.fdual: pres, mood, 3rd, m, du; t.fdual: pres, mood, 3rd, f, du; n.fsing: pres, mood, 1st, pl; t.fplur-masc: pres, mood, 2nd, m, pl; t.fplur-fem: pres, mood, 2nd, f, pl; y.fplur-masc: pres, mood, 3rd, m, pl; y.fplur-fem: pres, mood, 3rd, f, pl; } # The "two-form" present uses normal (non-3rd-weak) endings but may # have two forms of the root, one form vocalic endings (almost all of them) # and one for consonant endings (only the feminine plural). This # encompasses 2nd-weak verbs and doubled verbs, and (trivially) strong verbs. def two-form-pres-indic(formv, formc) { gen-pres(indic, formv.u, formv.iina, formv.aani, formv.uuna, formc.na) } def two-form-pres-subj(formv, formc) { gen-pres(subj, formv.a, formv.ii, formv.aa, formv.uu, formc.na) } # The jussive is different because the base form (fsing) has no ending. # This means that it may assume the consonant form instead of the vowel # form, or may have a number of variants (in particular, for doubled # verbs). So the base form needs to be given explicitly. def two-form-pres-juss(base, formv, formc) { gen-pres(juss, base, formv.ii, formv.aa, formv.uu, formc.na) } def strong-pres(form) { two-form-pres-indic(form, form) two-form-pres-subj(form, form) two-form-pres-juss(form, form, form) } def 2nd-weak-pres(formv, formc) { two-form-pres-indic(formv, formc) two-form-pres-subj(formv, formc) two-form-pres-juss(formc, formv, formc) } def doubled-pres(formv, formc) { two-form-pres-indic(formv, formc) two-form-pres-subj(formv, formc) two-form-pres-juss(formc, formv, formc) two-form-pres-juss(formv.a, formv, formc) two-form-pres-juss(formv.i, formv, formc) } # Verbs whose third radical is a /w/ or a /y/ have all manner of exceptional # forms; easiest just to list them. In general, there are three types, # depending on whether the base singular forms end in -aa, -ii, or -uu. def 3rd-weak-pres-aa(form) { gen-pres(indic, form.aa, form.ayna, form.ayaani, form.awna, form.ayna) gen-pres(subj, form.aa, form.ay, form.ayaa, form.aw, form.ayna) # Note the shortened vowel here. gen-pres(juss, form.a, form.ay, form.ayaa, form.aw, form.ayna) } def 3rd-weak-pres-ii(form) { gen-pres(indic, form.ii, form.iina, form.iyaani, form.uuna, form.iina) gen-pres(subj, form.iya, form.ii, form.iyaa, form.uu, form.iina) # Note the shortened vowel here. gen-pres(juss, form.i, form.ii, form.iyaa, form.uu, form.iina) } def 3rd-weak-pres-uu(form) { gen-pres(indic, form.uu, form.iina, form.uwaani, form.uuna, form.uuna) gen-pres(subj, form.uwa, form.ii, form.uwaa, form.uu, form.uuna) # Note the shortened vowel here. gen-pres(juss, form.u, form.ii, form.uwaa, form.uu, form.uuna) } def 2nd-weak-verb(pastv, props, pastc, presv, presc) { word pastv: props { two-form-past(pastv, pastc) 2nd-weak-pres(presv, presc) } } # Note the way that macro calls can be constructed as well. Here, the # value of PAST_TYPE is the suffix at the end of the macro name. def 3rd-weak-verb(past_stem, props, past_type, pres_stem, pres_type) { word past_stem . past_type: props { 3rd-weak-past- . past_type(past_stem) 3rd-weak-pres- . pres_type(pres_stem) } } def strong-verb(past, props, pres) { word past: props { strong-past(past) strong-pres(pres) } } 2nd-weak-verb(kaan, TransV(pred=be), kun, akuun, akun) 2nd-weak-verb(naam, IntransV(pred=sleep), nim, anaam, anam) 2nd-weak-verb(qaal, SayV(pred=say), qul, aquul, aqul) strong-verb(katab, TransV(pred=write), aktub) strong-verb(dhahab, IntransV(pred=go), adhhab) # Note that the following verb, which begins with a glottal stop, # will have a modification made to it in the first-person singular present. # (See above.) strong-verb(_akal, IntransV TransV (pred=eat), a_kul) 3rd-weak-verb(ra_, TransV(pred=see), ay, ar, aa) 3rd-weak-verb(_a9T, DitransV(pred=give), ay, u9T, ii) 3rd-weak-verb(laq, TransV(pred=find), ii, alq, aa) strong-verb(9araf, ThinkV(pred=know), a9rif) # see also 9alam strong-verb(tafakkar, ThinkV(pred=think), atafakkar) strong-verb(ta9allam, ThinkV(pred=learn), ata9allam) family N { entry: n<2>[X, 3rd, nonres]: X:sem-obj(*); } family InterrogPro(Pro) { entry: s/*(s/n<2>[res]); entry: s/*(s|n<2>[nonres,nom]); entry: s/*(s/n<2>[nonres,acc]); member: maa, man; } family Pro { entry: n<2>[X, nom, def, nonres]: X:sem-obj(*); member: pro; } family Rel { entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[res]); entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s|n<2>[nonres,nom]); entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[nonres,acc]); member: al-ladhii; } family AndConj(Conj) { entry: n[pl, CASE, STATE] \* n[CASE, STATE] /* n[CASE, STATE]; entry: s$1 \* s$1 /* s$1; member: wa; } family Det(indexRel=det) { entry: n<2>[X, def, nonres] /^ n<2>[X]: X:sem-obj(<det>*); member: haadhaa, dhaalik; } # good luck on this one! construct even more complicated ones! # #ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu al-kilaaba al-latii akalat sagaayira mudarrisii al-waladi #"the man that wrote the books saw and gave to his daughter the dogs that ate the cigarettes of the boy's teachers." # Works, correctly: #ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina #Bad #ar-rajula al-ladhii katabat al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina #"the man that wrote the books saw and gave to his daughter the cigarettes that I said that the boy thought that the woman gave them to those teachers" # Fails, correctly: #ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat hu li ha-ulaahi al-mudarrisiina # Works: # ar-rajulu al-ladhii al-waladu _a9Taa as-sagaayira li binti hu dhahaba # ar-rajulu _a9Taa li binti hu as-sagaayira # ar-rajulu al-ladhii waladu hu _a9Taa as-sagaayira li al-binti dhahaba # Won't work: # ar-rajulu al-ladhii al-waladu _a9Taa li binti hu as-sagaayira dhahaba # ar-rajulu al-ladhii waladu hu _a9Taa li al-binti as-sagaayira dhahaba family PossClitic(Cli, indexRel=poss) { entry: n<~1>[X, def, nonres] \* n<1>[X, cons]: X:sem-obj(<poss>*); entry: (n<~1>[X, def, nonres] / n<2>[res]) \* n<1>[X, cons]: X:sem-obj(<poss>*); member: ii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna; } family ObjClitic(Cli, indexRel=poss) { entry: (s$1 | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]); entry: (s$1 / n<~2>[res] | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]); member: nii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna; } family Adj(indexRel=adj) { entry: n<2>[X, NUM, GEND, CASE, STATE] \ n<2>[X]: X:sem-obj(<adj>*); } family Prep-Nom(Prep, indexRel="*NoSem*") { # The pp<~3> notation generates an 'inheritsFrom' tag rather than # an 'id' tag for the feature structure. entry: pp<~3>[lex=*] /< n<3>[gen,nonres]; member: li, fii; } family Comp(indexRel="*NoSem*") { entry: sbar<~1>[lex=*] / s<1>; member: anna, inna; } family IntransV(V) { entry: s[E] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]: E:action(* ^ <Actor>X:animate-being); } family SayV(V) { entry: s[E] / sbar[Z, lex=inna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]: E:action(* ^ <Actor>X:animate-being ^ <Situation>Z); } family ThinkV(V) { entry: s[E] / sbar[Z, lex=anna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]: E:action(* ^ <Actor>X:animate-being ^ <Situation>Z); } family TransV(V) { entry: s[E] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]: E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj); } family DitransV(V) { # The first slash (on the pp) is marked with a mode allowing backward xcomp. entry: s[E] /< pp[Z,lex=li] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]: E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj ^ <Recipient>Z:animate-being) ; } testbed { # different states of subject rajulun dhahaba: 1; ar-rajulu dhahaba: 1; rajulu dhahaba: 0; # subject-verb agreement rajulun dhahabuu: 0; ar-rajulu dhahabuu: 0; rajulu dhahabuu: 0; # gender agreement ar-rajulu dhahaba: 1; ar-rajulu dhahabat: 0; al-bintu dhahaba: 0; al-bintu dhahabat: 1; # possession rajulun hu dhahaba: 0; ar-rajulu hu dhahaba: 0; rajulu hu dhahaba: 1; # subject case ar-rajula dhahaba: 0; ar-rajuli dhahaba: 0; # case in construct phrase _axuu ar-rajuli dhahaba: 1; _axuu ar-rajula dhahaba: 0; _axuu ar-rajulu dhahaba: 0; # construct state in construct phrase _axun ar-rajuli dhahaba: 0; al-_axu ar-rajuli dhahaba: 0; # object case ar-rajulu ra_aa al-kitaaba: 1; ar-rajulu ra_aa al-kitaabi: 0; ar-rajulu ra_aa al-kitaabu: 0; # preposition case ar-rajulu _a9Taa al-kitaaba li al-waladi: 1; ar-rajulu _a9Taa al-kitaaba li al-waladu: 0; ar-rajulu _a9Taa al-kitaaba li al-walada: 0; # subcategorization ar-rajulu ra_aa al-kitaaba li al-waladi: 0; # backward xcomp ar-rajulu _a9Taa li al-waladi al-kitaaba: 1; _a9Taa ar-rajulu li al-waladi al-kitaaba: 1; # object clitics ana ra_aytu hu: 1; ra_aytu hu ana: 1; ra_aytu ana hu: 0; hu ra_aytu ana: 0; hu ana ra_aytu: 0; huwa ra_aa nii: 1; huwa ra_aa ii: 0; huwa ra_aa ana: 0; ar-rajulu _a9Taa haa li al-waladi: 1; _a9Taa haa ar-rajulu li al-waladi: 1; # relative clauses # "I gave it to the man that the girl saw him" _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at hu: 3; # "I gave it to the man that the girl saw her" _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at haa: 0; # "I gave it to the man that the girl saw" _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at: 0; # "I gave it to the man that the girl saw the boy" _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at al-walada: 0; }