From 5f6796fbcd7277e60ed1053aec1c0881305cc2e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20H=C3=B6ffner?= <info@sebastian-hoeffner.de>
Date: Thu, 4 Oct 2018 15:35:38 +0200
Subject: [PATCH] Creating unique keys for identical identifiers.

For example, the sentence 'take the cup off the table and bring it to
the shelf' has six different parses, four labeled with 'smain', and two
labeled with 'smain/.r'. Previously, since the labels were used for the
dictionary keys, only two parses were returned (and the others
overwritten).
Now, the parses are labeled as smain, smain/0, smain/.r, smain/.r/0,
etc. to provide unique keys.
---
 app/wccg.py | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/app/wccg.py b/app/wccg.py
index 0e95f88..1ac228d 100644
--- a/app/wccg.py
+++ b/app/wccg.py
@@ -1,3 +1,4 @@
+import itertools
 import re
 import string
 import subprocess
@@ -28,6 +29,45 @@ def parse(sentence):
     response = wccg_proc.communicate(input=sentence)[0]
     return _as_dict(response or f'"{sentence}": Unable to parse. wccg returned an empty response.')
 
+
+def ensure_unique_key(new_key, dictionary):
+    """If new_key is already in dictionary, a number is appended to new_key.
+
+    If new_key is 'main' and 'main' is already in dictionary, the following keys
+    are tried until one is not part of dictionary:
+
+        main/0
+        main/1
+        main/2
+        ...
+        main/10
+        main/11
+        ...
+
+
+    Args:
+        new_key: The key to be added or modified.
+        dictionary: The dictionary to check new_key against.
+    Returns:
+        new_key if the key is unique, otherwise an alternative version.
+
+    >>> keys = ['a', 'a/0', 'a/1', 'a/2', 'a/3']
+    >>> ensure_unique_key('a', keys)
+    'a/4'
+    >>> ensure_unique_key('b', keys)
+    'b'
+    >>> ensure_unique_key('a/1', keys)
+    'a/1/0'
+    """
+    if new_key not in dictionary:
+        return new_key
+
+    for i in itertools.count():
+        key = f'{new_key}/{i}'
+        if key not in dictionary:
+            return key
+
+
 def _as_dict(response):
     """Converts the response to JSON so it's easier to parse for other
     programs."""
@@ -43,7 +83,7 @@ def _as_dict(response):
     key = None
     for line in lines[2:]:
         if line.startswith('Parse'):
-            key = line.split(':')[1].strip()
+            key = ensure_unique_key(line.split(':')[1].strip(), parses.keys())
             parses[key] = []
         elif line:
             parses[key].append(line.strip())
-- 
GitLab