From 5f6796fbcd7277e60ed1053aec1c0881305cc2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20H=C3=B6ffner?= <info@sebastian-hoeffner.de> Date: Thu, 4 Oct 2018 15:35:38 +0200 Subject: [PATCH] Creating unique keys for identical identifiers. For example, the sentence 'take the cup off the table and bring it to the shelf' has six different parses, four labeled with 'smain', and two labeled with 'smain/.r'. Previously, since the labels were used for the dictionary keys, only two parses were returned (and the others overwritten). Now, the parses are labeled as smain, smain/0, smain/.r, smain/.r/0, etc. to provide unique keys. --- app/wccg.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/app/wccg.py b/app/wccg.py index 0e95f88..1ac228d 100644 --- a/app/wccg.py +++ b/app/wccg.py @@ -1,3 +1,4 @@ +import itertools import re import string import subprocess @@ -28,6 +29,45 @@ def parse(sentence): response = wccg_proc.communicate(input=sentence)[0] return _as_dict(response or f'"{sentence}": Unable to parse. wccg returned an empty response.') + +def ensure_unique_key(new_key, dictionary): + """If new_key is already in dictionary, a number is appended to new_key. + + If new_key is 'main' and 'main' is already in dictionary, the following keys + are tried until one is not part of dictionary: + + main/0 + main/1 + main/2 + ... + main/10 + main/11 + ... + + + Args: + new_key: The key to be added or modified. + dictionary: The dictionary to check new_key against. + Returns: + new_key if the key is unique, otherwise an alternative version. + + >>> keys = ['a', 'a/0', 'a/1', 'a/2', 'a/3'] + >>> ensure_unique_key('a', keys) + 'a/4' + >>> ensure_unique_key('b', keys) + 'b' + >>> ensure_unique_key('a/1', keys) + 'a/1/0' + """ + if new_key not in dictionary: + return new_key + + for i in itertools.count(): + key = f'{new_key}/{i}' + if key not in dictionary: + return key + + def _as_dict(response): """Converts the response to JSON so it's easier to parse for other programs.""" @@ -43,7 +83,7 @@ def _as_dict(response): key = None for line in lines[2:]: if line.startswith('Parse'): - key = line.split(':')[1].strip() + key = ensure_unique_key(line.split(':')[1].strip(), parses.keys()) parses[key] = [] elif line: parses[key].append(line.strip()) -- GitLab