Skip to content
Snippets Groups Projects
Unverified Commit 2ab38f9f authored by Sebastian Höffner's avatar Sebastian Höffner
Browse files

Initial commit.

parents
No related branches found
No related tags found
No related merge requests found
# Created by https://www.gitignore.io/api/python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
### Python Patch ###
.venv/
### Python.VirtualEnv Stack ###
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
pip-selfcheck.json
# End of https://www.gitignore.io/api/python
cask 'docker'
FROM openjdk:latest
LABEL maintainer="Sebastian Höffner <shoeffner@tzi.de>"
LABEL description="A small webapp to parse sentences using the DiaSpace grammar (University of Bremen) with OpenCCG."
LABEL version="1.0"
EXPOSE 80
ENV OPENCCG_HOME /openccg
ENV PATH "$OPENCCG_HOME/bin:$PATH"
ENV LD_LIBRARY_PATH "$OPENCCG_HOME/lib:$LD_LIBRARY_PATH"
COPY nginx.conf /etc/nginx/sites-available/occg
# Download and extract OpenCCG
RUN curl -o openccg-0.9.5.tgz https://datapacket.dl.sourceforge.net/project/openccg/openccg/openccg%20v0.9.5%20-%20deplen%2C%20kenlm%2C%20disjunctivizer/openccg-0.9.5.tgz \
&& tar zxf openccg-0.9.5.tgz \
&& rm openccg-0.9.5.tgz \
# Download and extract grammar
&& curl -O http://www.diaspace.uni-bremen.de/twiki/pub/DiaSpace/ReSources/english.zip \
&& unzip -d /english english.zip \
&& rm english.zip \
# Server software: python 3, nginx, uwsgi
&& apt-get update \
&& apt-get install -y python3 python3-pip nginx \
&& pip3 install flask uwsgi \
# Configure nginx
&& ln -s /etc/nginx/sites-available/occg /etc/nginx/sites-enabled/occg \
&& rm /etc/nginx/sites-enabled/default
# Run Flask app behind nginx
WORKDIR /app
CMD service nginx start \
&& uwsgi --socket /tmp/ccgapp.sock \
--uid www-data \
--manage-script-name \
--module ccgapp \
--callable app
Copyright 2018 Sebastian Höffner
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# Licences
Many parts for this project are taken from other projects and deserve some attribution.
## Software
### OpenCCG -- automatic download
[OpenCCG](http://openccg.sourceforge.net/) is used inside the docker container.
It is developed by Michael White and other contributors.
OpenCCG is distributed under the [LGPL v2.1+](https://www.gnu.org/licenses/lgpl-2.1.html).
## Data
### GUM/OpenCCG grammar -- automatic download
Downloaded inside the docker container.
This is a grammar is from the DiaSpace project and can be found [here](https://www.sfbtr8.spatial-cognition.de/en/project/interaction/i5-diaspace/resources/index.html).
In most files, Robert Ross is credited alongside other contributors.
The grammar is distributed inside `english.zip` under the [LGPL v2.1+](https://www.gnu.org/licenses/lgpl-2.1.html).
### GUM 3, GUM 3 Space Ontologies
The GUM 3 and GUM 3 spatial extension ontologies are released by the University of Bremen.
They can be found under [https://www.ontospace.uni-bremen.de/ontology/stable/GUM-3.owl](https://www.ontospace.uni-bremen.de/ontology/stable/GUM-3.owl) and [https://www.ontospace.uni-bremen.de/ontology/stable/GUM-3-space.owl](https://www.ontospace.uni-bremen.de/ontology/stable/GUM-3-space.owl).
The ontologies (`GUM-3.owl`/`GUM-3-space.owl`) are distributed under the [CC BY-NC-ND 3.0](http://creativecommons.org/licenses/by-nc-nd/3.0/).
# Web OpenCCG
This repository builds a small nginx-webserver and python wrapper around OpenCCG using the GUM-space ontology, ready to run inside a docker container.
After an initial `docker-compose up`, the service can be queried using a simple POST request, e.g. using curl:
$ curl --data "The yellow robot under the table." localhost:8080
{"sentence": "the yellow robot under the table", "number": 1, "parses": {"np": "(@w2:slm-Robot(slm-Robot ^ <det>the ^ <ident>specific ^ <quant>singular) ^ @x2:gs-SpatialLocating( <gs-locatum>w2:slm-Robot ^ <gs-placement>(x1:gs-GeneralizedLocation ^ <gs-hasSpatialModality>(w3:gs-UnderProjectionExternal ^ slm-Under) ^ <gs-relatum>(w5:slm-Table ^ slm-Table ^ <det>the ^ <ident>specific ^ <quant>singular))) ^ @x3:gum-ColorPropertyAscription( <concrete>true ^ <domain>w2:slm-Robot ^ <range>(w1:slm-Yellow ^ yellow ^ <concrete>true)))"}}
Or, as an example, using Python [requests](http://docs.python-requests.org/en/master/):
```python
import requests
print(requests.post('http://localhost:8080', data={'sentence': 'The yellow robot under the table.'}).json())
```
Note that is is not production ready, as it is really slow and not optimized:
Instead of keeping one (or multiple) instances of OpenCCG running to query them faster, each request spawns an individual OpenCCG instance.
## Usage
### Response format
The response is a JSON object and contains four fields:
- `sentence`: The cleaned input sentence (all lowercase, punctuation removed, ...).
- `number`: The number of possible parses as determined from OpenCCG.
- `parses`: A dictionary of parse-identifiers (e.g. "np") to actual parses as OpenCCG outputs them.
- `http_status`: The HTTP status from the request.
Thus, an example response for the sentence "The yellow robot under the table." is:
```json
{'sentence': 'the yellow robot under the table',
'number': 1,
'parses': {'np': '(@w2:slm-Robot(slm-Robot ^ <det>the ^ <ident>specific ^ <quant>singular) ^ @x2:gs-SpatialLocating( <gs-locatum>w2:slm-Robot ^ <gs-placement>(x1:gs-GeneralizedLocation ^ <gs-hasSpatialModality>(w3:gs-UnderProjectionExternal ^ slm-Under) ^ <gs-relatum>(w5:slm-Table ^ slm-Table ^ <det>the ^ <ident>specific ^ <quant>singular))) ^ @x3:gum-ColorPropertyAscription( <concrete>true ^ <domain>w2:slm-Robot ^ <range>(w1:slm-Yellow ^ yellow ^ <concrete>true)))'},
'http_status': 200}
```
### Querying OpenCCG
The [OpenCCG](http://openccg.sourceforge.net/) service allows to parse sentences using the English [CCG grammar](https://www.sfbtr8.spatial-cognition.de/en/project/interaction/i5-diaspace/resources/index.html) based on the [Generalized Upper Model](https://www.sfbtr8.spatial-cognition.de/en/project/interaction/i1-ontospace/research/gum-20-30/index.html) by the SFB/TR 8 Spatial Cognition.
It is wrapped into a small web app which can either be queried using a post request (e.g. using curl or wget), or used with a crude GUI:
$ curl --data "The yellow robot under the table." localhost:8080
{"sentence": "the yellow robot under the table", "number": 1, "parses": {"np": "(@w2:slm-Robot(slm-Robot ^ <det>the ^ <ident>specific ^ <quant>singular) ^ @x2:gs-SpatialLocating( <gs-locatum>w2:slm-Robot ^ <gs-placement>(x1:gs-GeneralizedLocation ^ <gs-hasSpatialModality>(w3:gs-UnderProjectionExternal ^ slm-Under) ^ <gs-relatum>(w5:slm-Table ^ slm-Table ^ <det>the ^ <ident>specific ^ <quant>singular))) ^ @x3:gum-ColorPropertyAscription( <concrete>true ^ <domain>w2:slm-Robot ^ <range>(w1:slm-Yellow ^ yellow ^ <concrete>true)))"}}
When using the GUI (open your browser at [http://localhost:8080](http://localhost:8080)), the response is more human readable:
"the yellow robot under the table": 1 parse found.
Parse: np :
(@w2:slm-Robot(slm-Robot ^
<det>the ^
<ident>specific ^
<quant>singular) ^ @x2:gs-SpatialLocating(
<gs-locatum>w2:slm-Robot ^
<gs-placement>(x1:gs-GeneralizedLocation ^
<gs-hasSpatialModality>(w3:gs-UnderProjectionExternal ^ slm-Under) ^
<gs-relatum>(w5:slm-Table ^ slm-Table ^
<det>the ^
<ident>specific ^
<quant>singular))) ^ @x3:gum-ColorPropertyAscription(
<concrete>true ^
<domain>w2:slm-Robot ^
<range>(w1:slm-Yellow ^ yellow ^
<concrete>true)))
### Changing the port
Many webservices use port 8080 as a default port.
To change the port of this software, adjust the docker-compose file and change the port line from `"8080:80"` to your port on the left side (but keep the 80 in tact), so for example to set up the service on Port 9043, you would change it to `"9043:80"`.
import json
from flask import Flask, render_template, request, redirect
import wccg
app = Flask(__name__)
def is_non_gui_agent(ua_string):
"""Returns True for a few specific matches for User-Agent strings.
This is a very simple heuristic to distinguish between browser agents
and command line or programmatical agents, and the list might grow.
Returns:
True if the User-Agent string contains either of a set of possible
user agents, for example curl/, wget/ or python-requests/.
"""
uas = ('python-requests/', 'curl/', 'wget/')
ua_string = ua_string.lower()
return any(ua in ua_string for ua in uas)
@app.route('/gui', methods=['GET', 'POST'])
def gui():
"""Presents a simple input form to a browser user.
"""
sentence = request.form.get('sentence')
response = wccg.parse(sentence)
response = json.dumps(response, indent=4)
return render_template('form.html', sentence=sentence, response=response)
@app.route('/', methods=['GET', 'POST'])
def index():
"""This method handles / requests.
If the request is from a GUI client (that is, is_non_gui_agent is False),
a redirect to /gui is returned.
Else, if the requests is a GET request or contains no POST data,
an error (501 or 400, respectively) is returned.
Otherwise, the sentence's parse is returned. If wccg was unable to parse
the sentence, its answer is returned alongside a 422 status code.
"""
ua = request.headers.get('User-Agent')
if is_non_gui_agent(ua):
if request.method == 'GET':
return json.dumps(dict(error="Use POST.", http_status=501)), 501
try:
key = next(request.values.keys())
except StopIteration:
key = None
# Get sentence from form field or use the first key.
# The first key could be send e.g. by
# curl --data "This is the sentence." 127.0.0.1:5000
sentence = request.form.get(key) or key
response = wccg.parse(sentence)
return json.dumps(response), response['http_status']
return redirect('/gui', code=307)
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0')
<!DOCTYPE html>
<html>
<head>
<title>OpenCCG Webinterface</title>
</head>
<body>
OpenCCG Webinterface
<form action="/" method="POST">
<input type="text" name="sentence" value="{{ sentence or '' }}" style="width: 300px" />
<input type="submit" value="Go!" />
</form>
{% if response %}
<div>
<pre>
{{ response }}
</pre>
</div>
{% endif %}
</body>
</html>
import re
import string
import subprocess
def parse(sentence):
"""Parses a sentence using OpenCCG's command line tool wccg.
Before the sentence is parsed, all (English) punctuation is removed and
it is converted to lowercase, as the grammar and OpenCCG work best this way.
Args:
sentence: The sentence to parse.
Returns:
A dictionary containing either the parsed version or an error message.
The dictionary contains the sentence, an appropriate http_status
and either an error or the parses.
"""
if not sentence:
return dict(error='No sentence provided.', http_status=400)
wccg_proc = subprocess.Popen(['wccg', '-showsem', '-showall', '/english'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
universal_newlines=True)
sentence = re.sub(f'[{re.escape(string.punctuation)}]', '', sentence).lower()
response = wccg_proc.communicate(input=sentence)[0]
return _as_dict(response or f'"{sentence}": Unable to parse. wccg returned an empty response.')
def _as_dict(response):
"""Converts the response to JSON so it's easier to parse for other
programs."""
lines = response.splitlines()
if ': Unable to' in lines[0]:
sentence, error = response.split(':', 1)
error = ' '.join(l.strip() for l in error.splitlines()).strip()
return dict(sentence=sentence[1:-1], error=error, http_status=422)
sentence, num_parses = lines[0].split(':')
num_parses = int(num_parses.split()[0])
parses = {}
key = None
for line in lines[2:]:
if line.startswith('Parse'):
key = line.split(':')[1].strip()
parses[key] = []
elif line:
parses[key].append(line.strip())
for key, parse in parses.items():
parses[key] = ' '.join(parse)
return dict(sentence=sentence[1:-1],
number=num_parses,
parses=parses,
http_status=200)
version: "3.2"
services:
openccg:
build: .
ports:
- "8080:80"
volumes:
- type: bind
source: ./app
target: /app
server {
location / {
include uwsgi_params;
uwsgi_pass unix:/tmp/ccgapp.sock;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment