Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
#############################################################
# #
# arabic.ccg #
# #
#############################################################
# Author: Ben Wing <ben@666.com>
# Date: April 2006
# This is a grammar for a fragment of Arabic. It's particularly
# useful for demonstrating the extended use of macros to handle
# complicated morphological inflections.
# See the `tiny' grammar (tiny.ccg) for more info about the format
# of this file.
feature {
CASE<2>: nom, acc, gen;
NUM<2>: sg, du, pl;
GEND<2>: m, f;
STATE<2>: cons, non-cons {indef, def};
ANIM<2>: hum, nonhum;
PERS<2>: 1st, 2nd, 3rd;
RESUMPTIVE<2>: nonres, res;
SEM-NUM<X:NUM>: sg-X, du-X, pl-X;
SEM-PERS<X:PERS>: 1st-X, 2nd-X, 3rd-X;
TENSE<E>: past, pres;
MOOD<E>: indic, subj, juss;
# Here's a more complicated hierarchy, from the original tiny grammar.
ontology: sem-obj {
phys-obj {
animate-being {
person
},
thing
},
situation {
change {
action
},
state
}
};
}
rule {
no typeraise;
typeraise +: n => s;
typeraise - $: n => s;
typeraise - $: pp => s;
typeraise - $: pp/n => s;
typechange: s$1 | n[nom] => s$1 ;
typechange: n<~2>[cons] => n<2>[3rd,def] /* n[gen,def] ;
typechange: n<~2>[cons] => n<2>[3rd,indef] /* n[gen,indef] ;
}
##########################################################################
# Morphological entries #
# (morph.xml) #
##########################################################################
word wa:Conj; # "and"
word anna:Comp; # "that", introducing sentential complements
word inna:Comp; # same, but only after the verb qaal "say"
word maa:InterrogPro(thing): 3rd; # "what"
word man:InterrogPro(person): 3rd; # "who"
word li:Prep; # "what"
word fii:Prep; # "who"
# This word means "this".
word haadhaa {
*: sg, m;
haadhihi: sg, f;
ha_ulaahi: pl;
# Bizarrely, this word declines for case only in the dual.
haadhaani: du, m, nom;
haadhayni: du, m, acc;
haadhayni: du, m, gen;
haataani: du, f, nom;
haatayni: du, f, acc;
haatayni: du, f, gen;
}
# This word means "that".
word dhaalik {
*: sg, m;
tilka: sg, f;
ulaa_ika: pl;
# Bizarrely, this word declines for case only in the dual.
dhaanika: du, m, nom;
dhaynika: du, m, acc;
dhaynika: du, m, gen;
taanika: du, f, nom;
taynika: du, f, acc;
taynika: du, f, gen;
}
# This is the relative pronoun.
word al-ladhii {
*: sg, m;
al-latii: sg, f;
al-ladhiina: pl, m;
al-laati: pl, f;
# Bizarrely, this word declines for case only in the dual.
al-ladhaani: du, m, nom;
al-ladhayni: du, m, acc;
al-ladhayni: du, m, gen;
al-lataani: du, f, nom;
al-latayni: du, f, acc;
al-latayni: du, f, gen;
}
word pro:Pro {
ana: 1st, 1st-X, sg, sg-X;
anta: 2nd, 2nd-X, sg, sg-X, m;
anti: 2nd, 2nd-X, sg, sg-X, f;
huwa: 3rd, 3rd-X, sg, sg-X, m;
hiya: 3rd, 3rd-X, sg, sg-X, f;
naHnu: 1st, 1st-X, pl, pl-X;
antun: 2nd, 2nd-X, pl, pl-X, m;
antunna: 2nd, 2nd-X, pl, pl-X, f;
hum: 3rd, 3rd-X, pl, pl-X, m;
hunna: 3rd, 3rd-X, pl, pl-X, f;
}
word ii:: 1st, 1st-X, sg, sg-X;
word nii:: 1st, 1st-X, sg, sg-X;
word ka:: 2nd, 2nd-X, sg, sg-X, m;
word ki:: 2nd, 2nd-X, sg, sg-X, f;
word hu:: 3rd, 3rd-X, sg, sg-X, m;
word haa:: 3rd, 3rd-X, sg, sg-X, f;
word naa:: 1st, 1st-X, pl, pl-X;
word kum:: 2nd, 2nd-X, pl, pl-X, m;
word kunna:: 2nd, 2nd-X, pl, pl-X, f;
word hum:: 3rd, 3rd-X, pl, pl-X, m;
word hunna:: 3rd, 3rd-X, pl, pl-X, f;
#############################################
# Nouns #
#############################################
# This shows how a reasonably complicated morphology can be accommodated.
# It is certainly possible that some of this may (and probably should)
# be offloaded into a separate morphology-processing engine. However,
# even in that case there is often a good deal more to the lexicon.
# We show a couple examples of complete paradigms, in order to make it
# easier to understand what's going on below.
# Here is a typical noun (kitaab "book") with a broken plural (kutub "books").
# For nouns with broken plurals, the plural is typically declined like
# the singular. Note that Arabic nouns are conjugated for three numbers
# (singular, dual, plural), three cases (nominative, accusative, dative),
# and three states (indefinite, definite, construct). (The construct state
# is used for nouns that are modified by other nouns -- e.g. "book" in
# "the book of Mary".)
# Form Nominative Accusative Dative
# ---------------------------------------------------------------
# sg.indef kitaabun kitaaban kitaabin
# sg.def al-kitaabu al-kitaaba al-kitaabi
# sg.cons kitaabu kitaaba kitaabi
#
# du.indef kitaabaani kitaabayni kitaabayni
# du.def al-kitaabaani al-kitaabayni al-kitaabayni
# du.cons kitaabaa kitaabay kitaabay
#
# pl.indef kutubun kutuban kutubin
# pl.def al-kutubu al-kutuba al-kutubi
# pl.cons kutubu kutuba kutubi
# Here is a typical noun (mudarris "teacher") with a different kind of
# plural, a so-called "strong masculine plural", which has its own declension.
# Form Nominative Accusative Dative
# ---------------------------------------------------------------
# sg.indef mudarrisun mudarrisan mudarrisin
# sg.def al-mudarrisu al-mudarrisa al-mudarrisi
# sg.cons mudarrisu mudarrisa mudarrisi
#
# du.indef mudarrisaani mudarrisayni mudarrisayni
# du.def al-mudarrisaani al-mudarrisayni al-mudarrisayni
# du.cons mudarrisaa mudarrisay mudarrisay
#
# pl.indef mudarrisuuna mudarrisiina mudarrisiina
# pl.def al-mudarrisuuna al-mudarrisiina al-mudarrisiina
# pl.cons mudarrisuu mudarrisii mudarrisii
# Here, we make heavy use of macros.
# This macro says: Every time an expression of the form
# three-form-decl(...) occurs, replace it with the text that comes after.
# The parameters will be substituted into the text. The braces that
# denote the macro's text do *NOT* form part of the text that is substituted.
# Note that macro substitutions are processed recursively: If the text
# of a macro substitution contains calls to other macros, they will also
# be processed. This makes "inheritance" very easy to implement.
# This macro is used for a particular paradigm corresponding to a
# particular number of a word.
def three-different-form-decl(indef-form, def-form, cons-form,
indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend) {
indef-form.indef-nom: morph-num, sem-num, gend, nom, indef;
indef-form.indef-acc: morph-num, sem-num, gend, acc, indef;
indef-form.indef-gen: morph-num, sem-num, gend, gen, indef;
add-al(def-form.def-nom): morph-num, sem-num, gend, nom, def;
add-al(def-form.def-acc): morph-num, sem-num, gend, acc, def;
add-al(def-form.def-gen): morph-num, sem-num, gend, gen, def;
cons-form.cons-nom: morph-num, sem-num, gend, nom, cons;
cons-form.cons-acc: morph-num, sem-num, gend, acc, cons;
cons-form.cons-gen: morph-num, sem-num, gend, gen, cons;
}
# It's questionable whether we should do this. This assimilates al-
# to a following coronal consonant, e.g. ar-rajul, as-sigaara,
# ath-thalj, an-nuur, aDH-DHuhr, etc.
def add-al(form) regsub('^al-([std]h|DH|[tdszrnTDSZL])', 'a\1-\1', al-.form)
def three-form-decl(form, indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend) {
three-different-form-decl(form, form, form,
indef-nom, indef-acc, indef-gen,
def-nom, def-acc, def-gen,
cons-nom, cons-acc, cons-gen,
morph-num, sem-num, gend)
}
# Using the above macro, we create two more macros to handle two common
# paradigm types: Accusative and genitive are the same, and the
# definite is either the same as the construct (two-form-decl-1) or
# the same as the indefinite (two-form-decl-2).
def two-form-decl-1(form, non-cons-nom, non-cons-obl,
cons-nom, cons-obl, morph-num, sem-num, gend) {
three-form-decl(form, non-cons-nom, non-cons-obl, non-cons-obl,
non-cons-nom, non-cons-obl, non-cons-obl,
cons-nom, cons-obl, cons-obl,
morph-num, sem-num, gend)
}
def two-form-decl-2(form, indef-nom, indef-obl,
non-indef-nom, non-indef-obl, morph-num, sem-num, gend) {
three-form-decl(form, indef-nom, indef-obl, indef-obl,
non-indef-nom, non-indef-obl, non-indef-obl,
non-indef-nom, non-indef-obl, non-indef-obl,
morph-num, sem-num, gend)
}
# In turn we create macros for particular paradigms: strong masculine ("uun"),
# strong feminine ("aat"), dual, and basic triptote (the paradigm for
# "kitaab" above and, in general, most singulars).
# Note that an alternative to using braces is to put the macro text on
# the same line as the `def' part of the macro (backslashes can be used
# to join multiple lines together).
def uun-plural(form) two-form-decl-1(form, uuna, iina, uu, ii, pl, pl-X, m)
def aat-plural(form) \
two-form-decl-2(form, aatun, aatin, aatu, aati, pl, pl-X, f)
def dual(form, gend) \
two-form-decl-1(form, aani, ayni, aa, ay, du, du-X, gend)
def triptote(form, morph-num, sem-num, gend) \
three-form-decl(form, un, an, in, u, a, i, u, a, i, morph-num, sem-num, gend)
# Here we define macros for full paradigms for words. Note how semicolons
# are not used, because they are supplied by the macro text itself.
# (Consult the text for three-form-decl() above, and remember that the
# braces denoting the macro text are not actually part of the text. This
# means that if you really want braces as the outermost thing in some
# macro text, you'll need to supply two levels of braces.)
def thing(sing, plur) {
word sing:N(thing) {
triptote(sing, sg, sg-X, m)
dual(sing, m)
triptote(plur, sg, pl-X, f)
}
}
def fem-thing(sing, plur) {
word sing:N(thing) {
triptote(sing.t, sg, sg-X, f)
dual(sing.t, f)
triptote(plur, sg, pl-X, f)
}
}
def person(sing, plur, gend) {
word sing:N(person) {
triptote(sing, sg, sg-X, gend)
dual(sing, gend)
triptote(plur, pl, pl-X, gend)
}
}
def male(sing, plur) person(sing, plur, m)
def female(sing, plur) person(sing, plur, f)
def strong-male(sing) {
word sing:N(person) {
triptote(sing, sg, sg-X, m)
dual(sing, m)
uun-plural(sing)
}
}
# Here we define the actual words. Note how short these definitions are,
# specifying only what's unpredictable.
thing(kitaab, kutub)
thing(waqt, _awqaat)
thing(Harf, Huruuf)
thing(dars, duruus)
thing(waqt, _awqaat)
fem-thing(sigaara, sagaayir)
fem-thing(madiina, mudun)
male(rajul, rijaal)
male(walad, _awlaad)
male(Taalib, Tullaab)
female(bint, _abnaat)
strong-male(mudarris)
word imra_a:N(person) {
three-different-form-decl(imra_at, mar_at, imra_at,
un, an, in, u, a, i, u, a, i,
sg, sg-X, f)
three-different-form-decl(imra_at, mar_at, imra_at,
aani, ayni, ayni, aani, ayni, ayni, aa, ay, ay,
du, du-X, f)
triptote(nisaa_, pl, pl-X, f)
}
def extended_construct_word(stem, plur) {
word stem:N(person) {
three-form-decl(stem, un, an, in, u, a, i, uu, aa, ii, sg, sg-X, m)
dual(stem, m)
triptote(plur, pl, pl-X, m)
}
}
extended_construct_word(_ax, _ixwaan)
extended_construct_word(_ab, _abnaa_)
# Typical paradigms:
# 1sg 'aktaa 'aktaa 'akta |
# 2sg.m taktaa taktaa takta |
# 2sg.f taktayna taktay taktay |
# 3sg.m yaktaa yaktaa yakta |
# 3sg.f taktaa taktaa takta |
# 2du taktayaani taktayaa taktayaa |
# 3du.m yaktayaani yaktayaa yaktayaa |
# 3du.f taktayaani taktayaa taktayaa |
# 1pl naktaa naktaa nakta |
# 2pl.m taktawna taktaw taktaw |
# 2pl.f taktayna taktayna taktayna |
# 3pl.m yaktawna yaktaw yaktaw |
# 3pl.f yaktayna yaktayna yaktayna |
#
# 1sg 'aktuu 'aktuwa 'aktu | 'aktii 'aktiya 'akti
# 2sg.m taktuu taktuwa taktu | taktii taktiya takti
# 2sg.f taktiina taktii taktii | taktiina taktii taktii
# 3sg.m yaktuu yaktuwa yaktu | yaktii yaktiya yakti
# 3sg.f taktuu taktuwa taktu | taktii taktiya takti
# 2du taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa
# 3du.m yaktuwaani yaktuwaa yaktuwaa | yaktiyaani yaktiyaa yaktiyaa
# 3du.f taktuwaani taktuwaa taktuwaa | taktiyaani taktiyaa taktiyaa
# 1pl naktuu naktuwa naktu | naktii naktiya nakti
# 2pl.m taktuuna taktuu taktuu | taktuuna taktuu taktuu
# 2pl.f taktuuna taktuuna taktuuna | taktiina taktiina taktiina
# 3pl.m yaktuuna yaktuu yaktuu | yaktuuna yaktuu yaktuu
# 3pl.f yaktuuna yaktuuna yaktuuna | yaktiina yaktiina yaktiina
def two-form-past(formv, formc) {
formc.tu: past, 1st, sg;
formc.ta: past, 2nd, m, sg;
formc.ti: past, 2nd, f, sg;
formv.a: past, 3rd, m, sg;
formv.at: past, 3rd, f, sg;
formc.tumaa: past, 2nd, du;
formv.aa: past, 3rd, m, du;
formv.ataa: past, 3rd, f, du;
formc.naa: past, 1st, pl;
formc.tum: past, 2nd, m, pl;
formc.tunna: past, 2nd, f, pl;
formv.uu: past, 3rd, m, pl;
formc.na: past, 3rd, f, pl;
}
def 3rd-weak-past-ay(form) {
form.ay.tu: past, 1st, sg;
form.ay.ta: past, 2nd, m, sg;
form.ay.ti: past, 2nd, f, sg;
form.aa: past, 3rd, m, sg;
form.at: past, 3rd, f, sg;
form.ay.tumaa: past, 2nd, du;
form.ay.aa: past, 3rd, m, du;
form.ataa: past, 3rd, f, du;
form.ay.naa: past, 1st, pl;
form.ay.tum: past, 2nd, m, pl;
form.ay.tunna: past, 2nd, f, pl;
form.aw: past, 3rd, m, pl;
form.ay.na: past, 3rd, f, pl;
}
def 3rd-weak-past-aw(form) {
form.aw.tu: past, 1st, sg;
form.aw.ta: past, 2nd, m, sg;
form.aw.ti: past, 2nd, f, sg;
form.aa: past, 3rd, m, sg;
form.at: past, 3rd, f, sg;
form.aw.tumaa: past, 2nd, du;
form.aw.aa: past, 3rd, m, du;
form.ataa: past, 3rd, f, du;
form.aw.naa: past, 1st, pl;
form.aw.tum: past, 2nd, m, pl;
form.aw.tunna: past, 2nd, f, pl;
form.aw: past, 3rd, m, pl;
form.aw.na: past, 3rd, f, pl;
}
def 3rd-weak-past-ii(form) {
form.ii.tu: past, 1st, sg;
form.ii.ta: past, 2nd, m, sg;
form.ii.ti: past, 2nd, f, sg;
form.iya: past, 3rd, m, sg;
form.iyat: past, 3rd, f, sg;
form.ii.tumaa: past, 2nd, du;
form.iy.aa: past, 3rd, m, du;
form.iy.ataa: past, 3rd, f, du;
form.ii.naa: past, 1st, pl;
form.ii.tum: past, 2nd, m, pl;
form.ii.tunna: past, 2nd, f, pl;
form.uu: past, 3rd, m, pl;
form.ii.na: past, 3rd, f, pl;
}
def strong-past(form) two-form-past(form, form)
# In general, almost all Arabic present-tense verbs of a particular mood
# can be defined using five forms. Verbs with a hamza in the first radical
# have a problem in the first-singular; ideally this should be handled
# automatically using a regexp or something of that sort, but we don't have
# such support currently, so we use an optional param.
def gen-pres(mood, fsing, fsing-fem, fdual, fplur-masc, fplur-fem) {
# This shows how you can use regular expressions if need be.
# regsub(string, regex, repl) is a special built-in that does regular-
# expression substitution on STRING, replacing all occurrences of
# REGEX with REPL. Regular-expression syntax is as in Python.
# In this case, Arabic verbs have a phonetic rule that eliminates
# two glottal stops occurring near each other at the beginning of a
# word. For example, _a_kulu -> _aakulu, and _u_kalu -> _uukalu.
# (That is, the vowel is lengthened.)
# _ . regsub(foo, bar, fsing): pres, mood, 1st, sg;
_ . regsub('^([aiu])_', '\1\1', fsing): pres, mood, 1st, sg;
# _.fsing: pres, mood, 1st, sg;
t.fsing: pres, mood, 2nd, m, sg;
t.fsing-fem: pres, mood, 2nd, f, sg;
y.fsing: pres, mood, 3rd, m, sg;
t.fsing: pres, mood, 3rd, f, sg;
t.fdual: pres, mood, 2nd, du;
y.fdual: pres, mood, 3rd, m, du;
t.fdual: pres, mood, 3rd, f, du;
n.fsing: pres, mood, 1st, pl;
t.fplur-masc: pres, mood, 2nd, m, pl;
t.fplur-fem: pres, mood, 2nd, f, pl;
y.fplur-masc: pres, mood, 3rd, m, pl;
y.fplur-fem: pres, mood, 3rd, f, pl;
}
# The "two-form" present uses normal (non-3rd-weak) endings but may
# have two forms of the root, one form vocalic endings (almost all of them)
# and one for consonant endings (only the feminine plural). This
# encompasses 2nd-weak verbs and doubled verbs, and (trivially) strong verbs.
def two-form-pres-indic(formv, formc) {
gen-pres(indic, formv.u, formv.iina, formv.aani, formv.uuna, formc.na)
}
def two-form-pres-subj(formv, formc) {
gen-pres(subj, formv.a, formv.ii, formv.aa, formv.uu, formc.na)
}
# The jussive is different because the base form (fsing) has no ending.
# This means that it may assume the consonant form instead of the vowel
# form, or may have a number of variants (in particular, for doubled
# verbs). So the base form needs to be given explicitly.
def two-form-pres-juss(base, formv, formc) {
gen-pres(juss, base, formv.ii, formv.aa, formv.uu, formc.na)
}
def strong-pres(form) {
two-form-pres-indic(form, form)
two-form-pres-subj(form, form)
two-form-pres-juss(form, form, form)
}
def 2nd-weak-pres(formv, formc) {
two-form-pres-indic(formv, formc)
two-form-pres-subj(formv, formc)
two-form-pres-juss(formc, formv, formc)
}
def doubled-pres(formv, formc) {
two-form-pres-indic(formv, formc)
two-form-pres-subj(formv, formc)
two-form-pres-juss(formc, formv, formc)
two-form-pres-juss(formv.a, formv, formc)
two-form-pres-juss(formv.i, formv, formc)
}
# Verbs whose third radical is a /w/ or a /y/ have all manner of exceptional
# forms; easiest just to list them. In general, there are three types,
# depending on whether the base singular forms end in -aa, -ii, or -uu.
def 3rd-weak-pres-aa(form) {
gen-pres(indic, form.aa, form.ayna, form.ayaani, form.awna, form.ayna)
gen-pres(subj, form.aa, form.ay, form.ayaa, form.aw, form.ayna)
# Note the shortened vowel here.
gen-pres(juss, form.a, form.ay, form.ayaa, form.aw, form.ayna)
}
def 3rd-weak-pres-ii(form) {
gen-pres(indic, form.ii, form.iina, form.iyaani, form.uuna, form.iina)
gen-pres(subj, form.iya, form.ii, form.iyaa, form.uu, form.iina)
# Note the shortened vowel here.
gen-pres(juss, form.i, form.ii, form.iyaa, form.uu, form.iina)
}
def 3rd-weak-pres-uu(form) {
gen-pres(indic, form.uu, form.iina, form.uwaani, form.uuna, form.uuna)
gen-pres(subj, form.uwa, form.ii, form.uwaa, form.uu, form.uuna)
# Note the shortened vowel here.
gen-pres(juss, form.u, form.ii, form.uwaa, form.uu, form.uuna)
}
def 2nd-weak-verb(pastv, props, pastc, presv, presc) {
word pastv: props {
two-form-past(pastv, pastc)
2nd-weak-pres(presv, presc)
}
}
# Note the way that macro calls can be constructed as well. Here, the
# value of PAST_TYPE is the suffix at the end of the macro name.
def 3rd-weak-verb(past_stem, props, past_type, pres_stem, pres_type) {
word past_stem . past_type: props {
3rd-weak-past- . past_type(past_stem)
3rd-weak-pres- . pres_type(pres_stem)
}
}
def strong-verb(past, props, pres) {
word past: props {
strong-past(past)
strong-pres(pres)
}
}
2nd-weak-verb(kaan, TransV(pred=be), kun, akuun, akun)
2nd-weak-verb(naam, IntransV(pred=sleep), nim, anaam, anam)
2nd-weak-verb(qaal, SayV(pred=say), qul, aquul, aqul)
strong-verb(katab, TransV(pred=write), aktub)
strong-verb(dhahab, IntransV(pred=go), adhhab)
# Note that the following verb, which begins with a glottal stop,
# will have a modification made to it in the first-person singular present.
# (See above.)
strong-verb(_akal, IntransV TransV (pred=eat), a_kul)
3rd-weak-verb(ra_, TransV(pred=see), ay, ar, aa)
3rd-weak-verb(_a9T, DitransV(pred=give), ay, u9T, ii)
3rd-weak-verb(laq, TransV(pred=find), ii, alq, aa)
strong-verb(9araf, ThinkV(pred=know), a9rif) # see also 9alam
strong-verb(tafakkar, ThinkV(pred=think), atafakkar)
strong-verb(ta9allam, ThinkV(pred=learn), ata9allam)
family N {
entry: n<2>[X, 3rd, nonres]: X:sem-obj(*);
}
family InterrogPro(Pro) {
entry: s/*(s/n<2>[res]);
entry: s/*(s|n<2>[nonres,nom]);
entry: s/*(s/n<2>[nonres,acc]);
member: maa, man;
}
family Pro {
entry: n<2>[X, nom, def, nonres]: X:sem-obj(*);
member: pro;
}
family Rel {
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[res]);
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s|n<2>[nonres,nom]);
entry: (n<~2>[CASE,nonres]\n<~2>[nonres])/*(s/n<2>[nonres,acc]);
member: al-ladhii;
}
family AndConj(Conj) {
entry: n[pl, CASE, STATE] \* n[CASE, STATE] /* n[CASE, STATE];
entry: s$1 \* s$1 /* s$1;
member: wa;
}
family Det(indexRel=det) {
entry: n<2>[X, def, nonres] /^ n<2>[X]: X:sem-obj(<det>*);
member: haadhaa, dhaalik;
}
# good luck on this one! construct even more complicated ones!
#
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu al-kilaaba al-latii akalat sagaayira mudarrisii al-waladi
#"the man that wrote the books saw and gave to his daughter the dogs that ate the cigarettes of the boy's teachers."
# Works, correctly:
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina
#Bad
#ar-rajula al-ladhii katabat al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat haa li ha_ulaahi al-mudarrisiina
#"the man that wrote the books saw and gave to his daughter the cigarettes that I said that the boy thought that the woman gave them to those teachers"
# Fails, correctly:
#ar-rajulu al-ladhii kataba al-kutuba ra_aa wa _a9Taa li binti hu as-sagaayira al-latii qultu inna al-waladu tafakkara anna al-mar_atu _a9Tat hu li ha-ulaahi al-mudarrisiina
# Works:
# ar-rajulu al-ladhii al-waladu _a9Taa as-sagaayira li binti hu dhahaba
# ar-rajulu _a9Taa li binti hu as-sagaayira
# ar-rajulu al-ladhii waladu hu _a9Taa as-sagaayira li al-binti dhahaba
# Won't work:
# ar-rajulu al-ladhii al-waladu _a9Taa li binti hu as-sagaayira dhahaba
# ar-rajulu al-ladhii waladu hu _a9Taa li al-binti as-sagaayira dhahaba
family PossClitic(Cli, indexRel=poss) {
entry: n<~1>[X, def, nonres] \* n<1>[X, cons]: X:sem-obj(<poss>*);
entry: (n<~1>[X, def, nonres] / n<2>[res]) \* n<1>[X, cons]: X:sem-obj(<poss>*);
member: ii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}
family ObjClitic(Cli, indexRel=poss) {
entry: (s$1 | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
entry: (s$1 / n<~2>[res] | n<3>[nonres]) \ (s$1 / n<2>[nonres] | n<3>[nom,nonres]);
member: nii, ka, ki, hu, haa, naa, kum, kunna, hum, hunna;
}
family Adj(indexRel=adj) {
entry: n<2>[X, NUM, GEND, CASE, STATE] \ n<2>[X]:
X:sem-obj(<adj>*);
}
family Prep-Nom(Prep, indexRel="*NoSem*") {
# The pp<~3> notation generates an 'inheritsFrom' tag rather than
# an 'id' tag for the feature structure.
entry: pp<~3>[lex=*] /< n<3>[gen,nonres];
member: li, fii;
}
family Comp(indexRel="*NoSem*") {
entry: sbar<~1>[lex=*] / s<1>;
member: anna, inna;
}
family IntransV(V) {
entry: s[E] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being);
}
family SayV(V) {
entry: s[E] / sbar[Z, lex=inna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}
family ThinkV(V) {
entry: s[E] / sbar[Z, lex=anna] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Situation>Z);
}
family TransV(V) {
entry: s[E] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj);
}
family DitransV(V) {
# The first slash (on the pp) is marked with a mode allowing backward xcomp.
entry: s[E] /< pp[Z,lex=li] / n[Y,acc,nonres] | n<2>[X,NUM,PERS,GEND,non-cons,nonres,nom]:
E:action(* ^ <Actor>X:animate-being ^ <Patient>Y:sem-obj ^
<Recipient>Z:animate-being) ;
}
testbed {
# different states of subject
rajulun dhahaba: 1;
ar-rajulu dhahaba: 1;
rajulu dhahaba: 0;
# subject-verb agreement
rajulun dhahabuu: 0;
ar-rajulu dhahabuu: 0;
rajulu dhahabuu: 0;
# gender agreement
ar-rajulu dhahaba: 1;
ar-rajulu dhahabat: 0;
al-bintu dhahaba: 0;
al-bintu dhahabat: 1;
# possession
rajulun hu dhahaba: 0;
ar-rajulu hu dhahaba: 0;
rajulu hu dhahaba: 1;
# subject case
ar-rajula dhahaba: 0;
ar-rajuli dhahaba: 0;
# case in construct phrase
_axuu ar-rajuli dhahaba: 1;
_axuu ar-rajula dhahaba: 0;
_axuu ar-rajulu dhahaba: 0;
# construct state in construct phrase
_axun ar-rajuli dhahaba: 0;
al-_axu ar-rajuli dhahaba: 0;
# object case
ar-rajulu ra_aa al-kitaaba: 1;
ar-rajulu ra_aa al-kitaabi: 0;
ar-rajulu ra_aa al-kitaabu: 0;
# preposition case
ar-rajulu _a9Taa al-kitaaba li al-waladi: 1;
ar-rajulu _a9Taa al-kitaaba li al-waladu: 0;
ar-rajulu _a9Taa al-kitaaba li al-walada: 0;
# subcategorization
ar-rajulu ra_aa al-kitaaba li al-waladi: 0;
# backward xcomp
ar-rajulu _a9Taa li al-waladi al-kitaaba: 1;
_a9Taa ar-rajulu li al-waladi al-kitaaba: 1;
# object clitics
ana ra_aytu hu: 1;
ra_aytu hu ana: 1;
ra_aytu ana hu: 0;
hu ra_aytu ana: 0;
hu ana ra_aytu: 0;
huwa ra_aa nii: 1;
huwa ra_aa ii: 0;
huwa ra_aa ana: 0;
ar-rajulu _a9Taa haa li al-waladi: 1;
_a9Taa haa ar-rajulu li al-waladi: 1;
# relative clauses
# "I gave it to the man that the girl saw him"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at hu: 3;
# "I gave it to the man that the girl saw her"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at haa: 0;
# "I gave it to the man that the girl saw"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at: 0;
# "I gave it to the man that the girl saw the boy"
_a9Taytu haa li ar-rajuli al-ladhii al-bintu ra_at al-walada: 0;
}