Skip to content
Snippets Groups Projects
Unverified Commit e2be8df7 authored by Sebastian Höffner's avatar Sebastian Höffner
Browse files

Adding a first simple grammar to parse OpenCCG's output into digestible JSON objects.

parent cf7a5d39
No related branches found
No related tags found
No related merge requests found
......@@ -23,7 +23,7 @@ RUN curl -o openccg-0.9.5.tgz https://datapacket.dl.sourceforge.net/project/open
# Server software: python 3, nginx, uwsgi
&& apt-get update \
&& apt-get install -y python3 python3-pip nginx \
&& pip3 install flask uwsgi \
&& pip3 install flask uwsgi tatsu \
# Configure nginx
&& ln -s /etc/nginx/sites-available/occg /etc/nginx/sites-enabled/occg \
&& rm /etc/nginx/sites-enabled/default
......
......@@ -24,10 +24,20 @@ def is_non_gui_agent(ua_string):
def create_response(sentence):
"""Prepares a response.
Parses the sentence using wccg, then enriches the result with
some meta data.
Args:
sentence: The sentence to parse.
Returns:
The response as a dictionary.
"""
content = wccg.parse(sentence)
response = {
'version': '1.0.0',
'version': '1.1.0',
'application': 'web-openccg',
'uuid': str(uuid.uuid4())
}
......
from tatsu import parse
from tatsu.util import asjson
GRAMMAR = r"""
start
= semspec $
;
semspec
= nominal
| term
| conjunction
| expression
;
conjunction
= @:expression '^' ~ @:conjunction
| @:expression
;
expression
= role_expression
| nominal_expression
| variable_expression
| atom_expression
;
role_expression
= role:role
;
variable_expression
= variable:variable
;
nominal_expression
= nominal:nominal
;
atom_expression
= atom:atom
;
term
= '(' ~ @:conjunction ')'
;
nominal
= '@' ~ nominal:variable roles:term
;
role
= '<' type:atom '>' target:variable
| '<' type:atom '>' target:term
| '<' type:atom '>' target:atom
;
variable
= /[a-z]\d+(:[a-zA-Z\-]+)*/
;
atom
= /[a-zA-Z\-\.]+/
;
"""
def ccg_to_json(to_parse):
"""Parses an OpenCCG string into a more easily digestiable JSON format.
Args:
to_parse: The OpenCCG string.
Returns:
A JSON representation of the OpenCCG string.
"""
return asjson(parse(GRAMMAR, to_parse))
......@@ -3,6 +3,8 @@ import re
import string
import subprocess
from ccgparser import ccg_to_json
def parse(sentence):
"""Parses a sentence using OpenCCG's command line tool wccg.
......@@ -27,7 +29,11 @@ def parse(sentence):
universal_newlines=True)
sentence = re.sub(f'[{re.escape(string.punctuation)}]', '', sentence).lower()
response = wccg_proc.communicate(input=sentence)[0]
return _as_dict(response or f'"{sentence}": Unable to parse. wccg returned an empty response.')
wccg_response = _as_dict(response or f'"{sentence}": Unable to parse. wccg returned an empty response.')
wccg_response = jsonify_parses(wccg_response)
return wccg_response
def ensure_unique_key(new_key, dictionary):
......@@ -94,3 +100,20 @@ def _as_dict(response):
return dict(sentence=sentence[1:-1],
parses=parses,
http_status=200)
def jsonify_parses(wccg_response):
"""Converts the OpenCCG responses to proper JSON objects and adds them
to the supplied dictionary.
Args:
wccg_response: The dictionary as returned by _as_dict.
Returns:
A copy of the dictionary, additionally containing parses_json.
"""
json_parses = {}
for key, parse in wccg_response.get('parses', {}).items():
json_parses[key] = ccg_to_json(parse)
copy = wccg_response.copy()
copy['json_parses'] = json_parses
return copy
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment