#############################################################
#                                                           #
#                         arabic.ccg                        #
#                                                           #
#############################################################

# Author: Ben Wing <ben@666.com>
# Date: April 2006

# This is a grammar for a fragment of Arabic.  It's particularly
# useful for demonstrating the extended use of macros to handle
# complicated morphological inflections.

# See the `tiny' grammar (tiny.ccg) for more info about the format
# of this file.

feature {
  CASE<2>: nom, acc, gen;
  NUM<2>: sg, du, pl;
  GEND<2>: m, f;
  STATE<2>: cons, non-cons {indef, def};
  ANIM<2>: hum, nonhum;
  PERS<2>: 1st, 2nd, 3rd;
  RESUMPTIVE<2>: nonres, res;
  SEM-NUM<X:NUM>: sg-X, du-X, pl-X;
  SEM-PERS<X:PERS>: 1st-X, 2nd-X, 3rd-X;
  TENSE<E>: past, pres;
  MOOD<E>: indic, subj, juss;

  # Here's a more complicated hierarchy, from the original tiny grammar.
  ontology: sem-obj {
              phys-obj {
                animate-being {
                  person
                },
                thing
              },
              situation {
                change {
                  action
                },
                state
              }
            };
}

rule {
  no typeraise;
  typeraise +: n => s;
  typeraise - $: n => s;
  typeraise - $: pp => s;
  typeraise - $: pp/n => s;
  typechange: s$1 | n[nom] => s$1 ;
  typechange: n<~2>[cons] => n<2>[3rd,def] /* n[gen,def] ;
  typechange: n<~2>[cons] => n<2>[3rd,indef] /* n[gen,indef] ;
}

##########################################################################
#                           Morphological entries                        #
#                                 (morph.xml)                            #
##########################################################################

word wa:Conj; # "and"
word anna:Comp; # "that", introducing sentential complements
word inna:Comp; # same, but only after the verb qaal "say"

word maa:InterrogPro(thing): 3rd; # "what"
word man:InterrogPro(person): 3rd; # "who"

word li:Prep; # "what"
word fii:Prep; # "who"

# This word means "this".  
word haadhaa {
  *: sg, m;
  haadhihi: sg, f;
  ha_ulaahi: pl;

  # Bizarrely, this word declines for case only in the dual.
  haadhaani: du, m, nom;
  haadhayni: du, m, acc;
  haadhayni: du, m, gen;

  haataani: du, f, nom;
  haatayni: du, f, acc;
  haatayni: du, f, gen;
}

# This word means "that".  
word dhaalik {
  *: sg, m;
  tilka: sg, f;
  ulaa_ika: pl;

  # Bizarrely, this word declines for case only in the dual.
  dhaanika: du, m, nom;
  dhaynika: du, m, acc;
  dhaynika: du, m, gen;

  taanika: du, f, nom;
  taynika: du, f, acc;
  taynika: du, f, gen;
}

# This is the relative pronoun.
word al-ladhii {
  *: sg, m;
  al-latii: sg, f;
  al-ladhiina: pl, m;
  al-laati: pl, f;

  # Bizarrely, this word declines for case only in the dual.
  al-ladhaani: du, m, nom;
  al-ladhayni: du, m, acc;
  al-ladhayni: du, m, gen;

  al-lataani: du, f, nom;
  al-latayni: du, f, acc;
  al-latayni: du, f, gen;
}

word pro:Pro {
  ana: 1st, 1st-X, sg, sg-X;
  anta: 2nd, 2nd-X, sg, sg-X, m;
  anti: 2nd, 2nd-X, sg, sg-X, f;
  huwa: 3rd, 3rd-X, sg, sg-X, m;
  hiya: 3rd, 3rd-X, sg, sg-X, f;
  naHnu: 1st, 1st-X, pl, pl-X;
  antun: 2nd, 2nd-X, pl, pl-X, m;
  antunna: 2nd, 2nd-X, pl, pl-X, f;
  hum: 3rd, 3rd-X, pl, pl-X, m;
  hunna: 3rd, 3rd-X, pl, pl-X, f;
}

word ii:: 1st, 1st-X, sg, sg-X;
word nii:: 1st, 1st-X, sg, sg-X;
word ka:: 2nd, 2nd-X, sg, sg-X, m;
word ki:: 2nd, 2nd-X, sg, sg-X, f;
word hu:: 3rd, 3rd-X, sg, sg-X, m;
word haa:: 3rd, 3rd-X, sg, sg-X, f;
word naa:: 1st, 1st-X, pl, pl-X;
word kum:: 2nd, 2nd-X, pl, pl-X, m;
word kunna:: 2nd, 2nd-X, pl, pl-X, f;
word hum:: 3rd, 3rd-X, pl, pl-X, m;
word hunna:: 3rd, 3rd-X, pl, pl-X, f;

#############################################
#                    Nouns                  #
#############################################

# This shows how a reasonably complicated morphology can be accommodated.
# It is certainly possible that some of this may (and probably should)
# be offloaded into a separate morphology-processing engine.  However,
# even in that case there is often a good deal more to the lexicon.

# We show a couple examples of complete paradigms, in order to make it
# easier to understand what's going on below.

# Here is a typical noun (kitaab "book") with a broken plural (kutub "books").
# For nouns with broken plurals, the plural is typically declined like
# the singular.  Note that Arabic nouns are conjugated for three numbers
# (singular, dual, plural), three cases (nominative, accusative, dative),
# and three states (indefinite, definite, construct). (The construct state
# is used for nouns that are modified by other nouns -- e.g. "book" in
# "the book of Mary".)

# Form		Nominative	Accusative	Dative
# ---------------------------------------------------------------
# sg.indef	kitaabun	kitaaban	kitaabin
# sg.def	al-kitaabu	al-kitaaba	al-kitaabi
# sg.cons	kitaabu		kitaaba		kitaabi
# 
# du.indef	kitaabaani	kitaabayni	kitaabayni
# du.def	al-kitaabaani	al-kitaabayni	al-kitaabayni
# du.cons	kitaabaa	kitaabay	kitaabay
# 
# pl.indef	kutubun		kutuban		kutubin
# pl.def	al-kutubu	al-kutuba	al-kutubi
# pl.cons	kutubu		kutuba		kutubi

# Here is a typical noun (mudarris "teacher") with a different kind of
# plural, a so-called "strong masculine plural", which has its own declension.

# Form		Nominative	Accusative	Dative
# ---------------------------------------------------------------
# sg.indef	mudarrisun	mudarrisan	mudarrisin
# sg.def	al-mudarrisu	al-mudarrisa	al-mudarrisi
# sg.cons	mudarrisu	mudarrisa	mudarrisi
#
# du.indef	mudarrisaani	mudarrisayni	mudarrisayni
# du.def	al-mudarrisaani	al-mudarrisayni	al-mudarrisayni
# du.cons	mudarrisaa	mudarrisay	mudarrisay
#
# pl.indef	mudarrisuuna	mudarrisiina	mudarrisiina
# pl.def	al-mudarrisuuna	al-mudarrisiina	al-mudarrisiina
# pl.cons	mudarrisuu	mudarrisii	mudarrisii

# Here, we make heavy use of macros.

# This macro says: Every time an expression of the form
# three-form-decl(...) occurs, replace it with the text that comes after.
# The parameters will be substituted into the text.  The braces that
# denote the macro's text do *NOT* form part of the text that is substituted.
# Note that macro substitutions are processed recursively: If the text
# of a macro substitution contains calls to other macros, they will also
# be processed.  This makes "inheritance" very easy to implement.

# This macro is used for a particular paradigm corresponding to a
# particular number of a word.

def three-different-form-decl(indef-form, def-form, cons-form,
			      indef-nom, indef-acc, indef-gen,
 			      def-nom, def-acc, def-gen,
	  	              cons-nom, cons-acc, cons-gen,
	 	              morph-num, sem-num, gend) {
  indef-form.indef-nom: morph-num, sem-num, gend, nom, indef;
  indef-form.indef-acc: morph-num, sem-num, gend, acc, indef;
  indef-form.indef-gen: morph-num, sem-num, gend, gen, indef;

  add-al(def-form.def-nom): morph-num, sem-num, gend, nom, def;
  add-al(def-form.def-acc): morph-num, sem-num, gend, acc, def;
  add-al(def-form.def-gen): morph-num, sem-num, gend, gen, def;

  cons-form.cons-nom: morph-num, sem-num, gend, nom, cons;
  cons-form.cons-acc: morph-num, sem-num, gend, acc, cons;
  cons-form.cons-gen: morph-num, sem-num, gend, gen, cons;
}

# It's questionable whether we should do this.  This assimilates al-
# to a following coronal consonant, e.g. ar-rajul, as-sigaara,
# ath-thalj, an-nuur, aDH-DHuhr, etc.
def add-al(form) regsub('^al-([std]h|DH|[tdszrnTDSZL])', 'a\1-\1', al-.form)

def three-form-decl(form, indef-nom, indef-acc, indef-gen,
		    def-nom, def-acc, def-gen,
		    cons-nom, cons-acc, cons-gen,
		    morph-num, sem-num, gend) {
  three-different-form-decl(form, form, form,
			    indef-nom, indef-acc, indef-gen,
 			    def-nom, def-acc, def-gen,
	  	            cons-nom, cons-acc, cons-gen,
	 	            morph-num, sem-num, gend)
}

# Using the above macro, we create two more macros to handle two common
# paradigm types: Accusative and genitive are the same, and the
# definite is either the same as the construct (two-form-decl-1) or
# the same as the indefinite (two-form-decl-2).

def two-form-decl-1(form, non-cons-nom, non-cons-obl,
		    cons-nom, cons-obl, morph-num, sem-num, gend) {
  three-form-decl(form, non-cons-nom, non-cons-obl, non-cons-obl,
		  non-cons-nom, non-cons-obl, non-cons-obl,
		  cons-nom, cons-obl, cons-obl,
		  morph-num, sem-num, gend)
}

def two-form-decl-2(form, indef-nom, indef-obl,
		    non-indef-nom, non-indef-obl, morph-num, sem-num, gend) {
  three-form-decl(form, indef-nom, indef-obl, indef-obl,
		  non-indef-nom, non-indef-obl, non-indef-obl,
		  non-indef-nom, non-indef-obl, non-indef-obl,
		  morph-num, sem-num, gend)
}

# In turn we create macros for particular paradigms: strong masculine ("uun"),
# strong feminine ("aat"), dual, and basic triptote (the paradigm for
# "kitaab" above and, in general, most singulars).

# Note that an alternative to using braces is to put the macro text on
# the same line as the `def' part of the macro (backslashes can be used
# to join multiple lines together).

def uun-plural(form) two-form-decl-1(form, uuna, iina, uu, ii, pl, pl-X, m)
def aat-plural(form) \
  two-form-decl-2(form, aatun, aatin, aatu, aati, pl, pl-X, f)
def dual(form, gend) \
  two-form-decl-1(form, aani, ayni, aa, ay, du, du-X, gend)
def triptote(form, morph-num, sem-num, gend) \
  three-form-decl(form, un, an, in, u, a, i, u, a, i, morph-num, sem-num, gend)

# Here we define macros for full paradigms for words.  Note how semicolons
# are not used, because they are supplied by the macro text itself.
# (Consult the text for three-form-decl() above, and remember that the
# braces denoting the macro text are not actually part of the text.  This
# means that if you really want braces as the outermost thing in some
# macro text, you'll need to supply two levels of braces.)

def thing(sing, plur) {
  word sing:N(thing) {
    triptote(sing, sg, sg-X, m)
    dual(sing, m)
    triptote(plur, sg, pl-X, f)
  }
}

def fem-thing(sing, plur) {
  word sing:N(thing) {
    triptote(sing.t, sg, sg-X, f)
    dual(sing.t, f)
    triptote(plur, sg, pl-X, f)
  }
}

def person(sing, plur, gend) {
  word sing:N(person) {
    triptote(sing, sg, sg-X, gend)
    dual(sing, gend)
    triptote(plur, pl, pl-X, gend)
  }
}

def male(sing, plur) person(sing, plur, m)
def female(sing, plur) person(sing, plur, f)

def strong-male(sing) {
  word sing:N(person) {
    triptote(sing, sg, sg-X, m)
    dual(sing, m)
    uun-plural(sing)
  }
}

# Here we define the actual words.  Note how short these definitions are,
# specifying only what's unpredictable.

thing(kitaab, kutub)
thing(waqt, _awqaat)
thing(Harf, Huruuf)
thing(dars, duruus)
thing(waqt, _awqaat)
fem-thing(sigaara, sagaayir)
fem-thing(madiina, mudun)
male(rajul, rijaal)
male(walad, _awlaad)
male(Taalib, Tullaab)
female(bint, _abnaat)
strong-male(mudarris)

word imra_a:N(person) {
  three-different-form-decl(imra_at, mar_at, imra_at,
			    un, an, in, u, a, i, u, a, i,
                            sg, sg-X, f)
  three-different-form-decl(imra_at, mar_at, imra_at,
			    aani, ayni, ayni, aani, ayni, ayni, aa, ay, ay,
                            du, du-X, f)
  triptote(nisaa_, pl, pl-X, f)
}

def extended_construct_word(stem, plur) {
  word stem:N(person) {
    three-form-decl(stem, un, an, in, u, a, i, uu, aa, ii, sg, sg-X, m)
    dual(stem, m)
    triptote(plur, pl, pl-X, m)
  }
}

extended_construct_word(_ax, _ixwaan)
extended_construct_word(_ab, _abnaa_)

# Typical paradigms:

# 1sg    'aktaa      'aktaa    'akta    |
# 2sg.m  taktaa      taktaa    takta    |
# 2sg.f  taktayna    taktay    taktay   |
# 3sg.m  yaktaa      yaktaa    yakta    |
# 3sg.f  taktaa      taktaa    takta    |
# 2du    taktayaani  taktayaa  taktayaa |
# 3du.m  yaktayaani  yaktayaa  yaktayaa |
# 3du.f  taktayaani  taktayaa  taktayaa |
# 1pl    naktaa      naktaa    nakta    |
# 2pl.m  taktawna    taktaw    taktaw   |
# 2pl.f  taktayna    taktayna  taktayna |
# 3pl.m  yaktawna    yaktaw    yaktaw   |
# 3pl.f  yaktayna    yaktayna  yaktayna |
# 
# 1sg    'aktuu      'aktuwa   'aktu    | 'aktii      'aktiya   'akti    
# 2sg.m  taktuu      taktuwa   taktu    | taktii      taktiya   takti    
# 2sg.f  taktiina    taktii    taktii   | taktiina    taktii    taktii   
# 3sg.m  yaktuu      yaktuwa   yaktu    | yaktii      yaktiya   yakti    
# 3sg.f  taktuu      taktuwa   taktu    | taktii      taktiya   takti    
# 2du    taktuwaani  taktuwaa  taktuwaa | taktiyaani  taktiyaa  taktiyaa 
# 3du.m  yaktuwaani  yaktuwaa  yaktuwaa | yaktiyaani  yaktiyaa  yaktiyaa 
# 3du.f  taktuwaani  taktuwaa  taktuwaa | taktiyaani  taktiyaa  taktiyaa 
# 1pl    naktuu      naktuwa   naktu    | naktii      naktiya   nakti    
# 2pl.m  taktuuna    taktuu    taktuu   | taktuuna    taktuu    taktuu   
# 2pl.f  taktuuna    taktuuna  taktuuna | taktiina    taktiina  taktiina 
# 3pl.m  yaktuuna    yaktuu    yaktuu   | yaktuuna    yaktuu    yaktuu   
# 3pl.f  yaktuuna    yaktuuna  yaktuuna | yaktiina    yaktiina  yaktiina 


def two-form-past(formv, formc) {
  formc.tu: past, 1st, sg;
  formc.ta: past, 2nd, m, sg;
  formc.ti: past, 2nd, f, sg;
  formv.a: past, 3rd, m, sg;
  formv.at: past, 3rd, f, sg;

  formc.tumaa: past, 2nd, du;
  formv.aa: past, 3rd, m, du;
  formv.ataa: past, 3rd, f, du;

  formc.naa: past, 1st, pl;
  formc.tum: past, 2nd, m, pl;
  formc.tunna: past, 2nd, f, pl;
  formv.uu: past, 3rd, m, pl;
  formc.na: past, 3rd, f, pl;
}

def 3rd-weak-past-ay(form) {
  form.ay.tu: past, 1st, sg;
  form.ay.ta: past, 2nd, m, sg;
  form.ay.ti: past, 2nd, f, sg;
  form.aa: past, 3rd, m, sg;
  form.at: past, 3rd, f, sg;

  form.ay.tumaa: past, 2nd, du;
  form.ay.aa: past, 3rd, m, du;
  form.ataa: past, 3rd, f, du;

  form.ay.naa: past, 1st, pl;
  form.ay.tum: past, 2nd, m, pl;
  form.ay.tunna: past, 2nd, f, pl;
  form.aw: past, 3rd, m, pl;
  form.ay.na: past, 3rd, f, pl;
}

def 3rd-weak-past-aw(form) {
  form.aw.tu: past, 1st, sg;
  form.aw.ta: past, 2nd, m, sg;
  form.aw.ti: past, 2nd, f, sg;
  form.aa: past, 3rd, m, sg;
  form.at: past, 3rd, f, sg;

  form.aw.tumaa: past, 2nd, du;
  form.aw.aa: past, 3rd, m, du;
  form.ataa: past, 3rd, f, du;

  form.aw.naa: past, 1st, pl;
  form.aw.tum: past, 2nd, m, pl;
  form.aw.tunna: past, 2nd, f, pl;
  form.aw: past, 3rd, m, pl;
  form.aw.na: past, 3rd, f, pl;
}

def 3rd-weak-past-ii(form) {
  form.ii.tu: past, 1st, sg;
  form.ii.ta: past, 2nd, m, sg;
  form.ii.ti: past, 2nd, f, sg;
  form.iya: past, 3rd, m, sg;
  form.iyat: past, 3rd, f, sg;

  form.ii.tumaa: past, 2nd, du;
  form.iy.aa: past, 3rd, m, du;
  form.iy.ataa: past, 3rd, f, du;

  form.ii.naa: past, 1st, pl;
  form.ii.tum: past, 2nd, m, pl;
  form.ii.tunna: past, 2nd, f, pl;
  form.uu: past, 3rd, m, pl;
  form.ii.na: past, 3rd, f, pl;
}

def strong-past(form) two-form-past(form, form)

# In general, almost all Arabic present-tense verbs of a particular mood
# can be defined using five forms.  Verbs with a hamza in the first radical
# have a problem in the first-singular; ideally this should be handled
# automatically using a regexp or something of that sort, but we don't have
# such support currently, so we use an optional param.

def gen-pres(mood, fsing, fsing-fem, fdual, fplur-masc, fplur-fem) {
  # This shows how you can use regular expressions if need be.
  # regsub(string, regex, repl) is a special built-in that does regular-
  # expression substitution on STRING, replacing all occurrences of
  # REGEX with REPL. Regular-expression syntax is as in Python.
  # In this case, Arabic verbs have a phonetic rule that eliminates
  # two glottal stops occurring near each other at the beginning of a
  # word.  For example, _a_kulu -> _aakulu, and _u_kalu -> _uukalu.
  # (That is, the vowel is lengthened.)
  # _ . regsub(foo, bar, fsing): pres, mood, 1st, sg;
  _ . regsub('^([aiu])_', '\1\1', fsing): pres, mood, 1st, sg;
  # _.fsing: pres, mood, 1st, sg;
  t.fsing: pres, mood, 2nd, m, sg;
  t.fsing-fem: pres, mood, 2nd, f, sg;
  y.fsing: pres, mood, 3rd, m, sg;
  t.fsing: pres, mood, 3rd, f, sg;

  t.fdual: pres, mood, 2nd, du;
  y.fdual: pres, mood, 3rd, m, du;
  t.fdual: pres, mood, 3rd, f, du;

  n.fsing: pres, mood, 1st, pl;
  t.fplur-masc: pres, mood, 2nd, m, pl;
  t.fplur-fem: pres, mood, 2nd, f, pl;
  y.fplur-masc: pres, mood, 3rd, m, pl;
  y.fplur-fem: pres, mood, 3rd, f, pl;
}

# The "two-form" present uses normal (non-3rd-weak) endings but may
# have two forms of the root, one form vocalic endings (almost all of them)
# and one for consonant endings (only the feminine plural).  This
# encompasses 2nd-weak verbs and doubled verbs, and (trivially) strong verbs.

def two-form-pres-indic(formv, formc) {
  gen-pres(indic, formv.u, formv.iina, formv.aani, formv.uuna, formc.na)
}

def two-form-pres-subj(formv, formc) {
  gen-pres(subj, formv.a, formv.ii, formv.aa, formv.uu, formc.na)
}

# The jussive is different because the base form (fsing) has no ending.
# This means that it may assume the consonant form instead of the vowel
# form, or may have a number of variants (in particular, for doubled
# verbs).  So the base form needs to be given explicitly.

def two-form-pres-juss(base, formv, formc) {
  gen-pres(juss, base, formv.ii, formv.aa, formv.uu, formc.na)
}

def strong-pres(form) {
  two-form-pres-indic(form, form)
  two-form-pres-subj(form, form)
  two-form-pres-juss(form, form, form)
}

def 2nd-weak-pres(formv, formc) {
  two-form-pres-indic(formv, formc)
  two-form-pres-subj(formv, formc)
  two-form-pres-juss(formc, formv, formc)
}

def doubled-pres(formv, formc) {
  two-form-pres-indic(formv, formc)
  two-form-pres-subj(formv, formc)
  two-form-pres-juss(formc, formv, formc)
  two-form-pres-juss(formv.a, formv, formc)
  two-form-pres-juss(formv.i, formv, formc)
}

# Verbs whose third radical is a /w/ or a /y/ have all manner of exceptional
# forms; easiest just to list them.  In general, there are three types,
# depending on whether the base singular forms end in -aa, -ii, or -uu.

def 3rd-weak-pres-aa(form) {
  gen-pres(indic, form.aa, form.ayna, form.ayaani, form.awna, form.ayna)
  gen-pres(subj, form.aa, form.ay, form.ayaa, form.aw, form.ayna)
  # Note the shortened vowel here.
  gen-pres(juss, form.a, form.ay, form.ayaa, form.aw, form.ayna)
}

def 3rd-weak-pres-ii(form) {
  gen-pres(indic, form.ii, form.iina, form.iyaani, form.uuna, form.iina)
  gen-pres(subj, form.iya, form.ii, form.iyaa, form.uu, form.iina)
  # Note the shortened vowel here.
  gen-pres(juss, form.i, form.ii, form.iyaa, form.uu, form.iina)
}

def 3rd-weak-pres-uu(form) {
  gen-pres(indic, form.uu, form.iina, form.uwaani, form.uuna, form.uuna)
  gen-pres(subj, form.uwa, form.ii, form.uwaa, form.uu, form.uuna)
  # Note the shortened vowel here.
  gen-pres(juss, form.u, form.ii, form.uwaa, form.uu, form.uuna)
}

def 2nd-weak-verb(pastv, props, pastc, presv, presc) {
  word pastv: props {
    two-form-past(pastv, pastc)
    2nd-weak-pres(presv, presc)
  }
}

# Note the way that macro calls can be constructed as well.  Here, the
# value of PAST_TYPE is the suffix at the end of the macro name.

def 3rd-weak-verb(past_stem, props, past_type, pres_stem, pres_type) {
  word past_stem . past_type: props {
    3rd-weak-past- . past_type(past_stem)
    3rd-weak-pres- . pres_type(pres_stem)
  }
}

def strong-verb(past, props, pres) {
  word past: props {
    strong-past(past)
    strong-pres(pres)
  }
}

2nd-weak-verb(kaan, TransV(pred=be), kun, akuun, akun)
2nd-weak-verb(naam, IntransV(pred=sleep), nim, anaam, anam)
2nd-weak-verb(qaal, SayV(pred=say), qul, aquul, aqul)
strong-verb(katab, TransV(pred=write), aktub)
strong-verb(dhahab, IntransV(pred=go), adhhab)
# Note that the following verb, which begins with a glottal stop,
# will have a modification made to it in the first-person singular present.
# (See above.)
strong-verb(_akal, IntransV TransV (pred=eat), a_kul)
3rd-weak-verb(ra_, TransV(pred=see), ay, ar, aa)
3rd-weak-verb(_a9T, DitransV(pred=give), ay, u9T, ii)
3rd-weak-verb(laq, TransV(pred=find), ii, alq, aa)
strong-verb(9araf, ThinkV(pred=know), a9rif) # see also 9alam
strong-verb(tafakkar, ThinkV(pred=think), atafakkar)
strong-verb(ta9allam, ThinkV(pred=learn), ata9allam)

family N {
  entry: n<2>[X, 3rd, nonres]: X:sem-obj(*);
}

family InterrogPro(Pro) {
  entry: s/*(s/n<2>[res]);
  entry: s/*(s|n<2>[nonres,nom]);
  entry: s/*(s/n<2>[nonres,acc]);
  member: maa, man;
}

family Pro {
  entry: n<2>[X, nom, def, nonres]: X:sem-obj(*);
  member: pro;
}

family Rel {
  entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[res]);
  entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s|n<2>[nonres,nom]);
  entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[nonres,acc]);
  member: al-ladhii;
}

family AndConj(Conj) {
  entry: n[pl, CASE, STATE] \* n[CASE, STATE] /* n[CASE, STATE];
  entry: s$1 \* s$1 /* s$1;
  member: wa;
}

family Det(indexRel=det) {
  entry: n<2>[X, def, nonres] /^ n<2>[X]: X:sem-obj(<det>*);
  member: haadhaa, dhaalik;
}

# good luck on this one! construct even more complicated ones!  
#
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu al-kilaaba al-latii akalat sagaayira mudarrisii al-waladi

#"the man that wrote the books saw and gave to his daughter the dogs that ate the cigarettes of the boy's teachers."

# Works, correctly:

#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina

#Bad

#ar-rajula al-ladhii katabat al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina

#"the man that wrote the books saw and gave to his daughter the cigarettes that I said that the boy thought that the woman gave them to those teachers"

# Fails, correctly:

#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat hu li ha-ulaahi al-mudarrisiina

# Works:

# ar-rajulu al-ladhii al-waladu _a9Taa as-sagaayira li binti hu dhahaba
# ar-rajulu _a9Taa li binti hu as-sagaayira
# ar-rajulu al-ladhii waladu hu _a9Taa as-sagaayira li al-binti dhahaba

# Won't work:

# ar-rajulu al-ladhii al-waladu _a9Taa li binti hu as-sagaayira dhahaba
# ar-rajulu al-ladhii waladu hu _a9Taa li al-binti as-sagaayira dhahaba



family PossClitic(Cli, indexRel=poss) {
  entry: n<~1>[X, def, nonres] \* n<1>[X, cons]: X:sem-obj(<poss>*);
  entry: (n<~1>[X, def, nonres] / n<2>[res]) \* n<1>[X, cons]: X:sem-obj(<poss>*);
  member: ii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}

family ObjClitic(Cli, indexRel=poss) {
  entry: (s$1 | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
  entry: (s$1 / n<~2>[res] | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
  member: nii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}

family Adj(indexRel=adj) {
  entry: n<2>[X, NUM, GEND, CASE, STATE] \ n<2>[X]:
         X:sem-obj(<adj>*);
}

family Prep-Nom(Prep, indexRel="*NoSem*") {
  # The pp<~3> notation generates an 'inheritsFrom' tag rather than
  # an 'id' tag for the feature structure.
  entry: pp<~3>[lex=*] /< n<3>[gen,nonres];
  member: li, fii;
}

family Comp(indexRel="*NoSem*") {
  entry: sbar<~1>[lex=*] / s<1>;
  member: anna, inna;
}

family IntransV(V) {
  entry: s[E] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
	E:action(* ^ <Actor>X:animate-being);
}

family SayV(V) {
  entry: s[E] / sbar[Z, lex=inna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
        E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}

family ThinkV(V) {
  entry: s[E] / sbar[Z, lex=anna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
        E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}

family TransV(V) {
  entry: s[E] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
        E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj);
}

family DitransV(V) {
  # The first slash (on the pp) is marked with a mode allowing backward xcomp.
  entry: s[E] /< pp[Z,lex=li] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
        E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj ^
	         <Recipient>Z:animate-being) ;
}

testbed {
  # different states of subject
  rajulun dhahaba: 1;
  ar-rajulu dhahaba: 1;
  rajulu dhahaba: 0;

  # subject-verb agreement
  rajulun dhahabuu: 0;
  ar-rajulu dhahabuu: 0;
  rajulu dhahabuu: 0;

  # gender agreement
  ar-rajulu dhahaba: 1;
  ar-rajulu dhahabat: 0;
  al-bintu dhahaba: 0;
  al-bintu dhahabat: 1;

  # possession
  rajulun hu dhahaba: 0;
  ar-rajulu hu dhahaba: 0;
  rajulu hu dhahaba: 1;

  # subject case
  ar-rajula dhahaba: 0;
  ar-rajuli dhahaba: 0;

  # case in construct phrase
  _axuu ar-rajuli dhahaba: 1;
  _axuu ar-rajula dhahaba: 0;
  _axuu ar-rajulu dhahaba: 0;

  # construct state in construct phrase
  _axun ar-rajuli dhahaba: 0;
  al-_axu ar-rajuli dhahaba: 0;

  # object case
  ar-rajulu ra_aa al-kitaaba: 1;
  ar-rajulu ra_aa al-kitaabi: 0;
  ar-rajulu ra_aa al-kitaabu: 0;

  # preposition case
  ar-rajulu _a9Taa al-kitaaba li al-waladi: 1;
  ar-rajulu _a9Taa al-kitaaba li al-waladu: 0;
  ar-rajulu _a9Taa al-kitaaba li al-walada: 0;

  # subcategorization
  ar-rajulu ra_aa al-kitaaba li al-waladi: 0;

  # backward xcomp
  ar-rajulu _a9Taa li al-waladi al-kitaaba: 1;
  _a9Taa ar-rajulu li al-waladi al-kitaaba: 1;

  # object clitics
  ana ra_aytu hu: 1;
  ra_aytu hu ana: 1;
  ra_aytu ana hu: 0;
  hu ra_aytu ana: 0;
  hu ana ra_aytu: 0;

  huwa ra_aa nii: 1;
  huwa ra_aa ii: 0;
  huwa ra_aa ana: 0;

  ar-rajulu _a9Taa haa li al-waladi: 1;
  _a9Taa haa ar-rajulu li al-waladi: 1;

  # relative clauses
  # "I gave it to the man that the girl saw him"
  _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at hu: 3;
  # "I gave it to the man that the girl saw her"
  _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at haa: 0;
  # "I gave it to the man that the girl saw"
  _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at: 0;
  # "I gave it to the man that the girl saw the boy"
  _a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at al-walada: 0;
}