From 1d53df7c99126679d391a1efc96b30aa3848b4d3 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Sat, 17 Aug 2013 19:01:24 -0400 Subject: add function for converting alignment to sexp --- string_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'string_utils.py') diff --git a/string_utils.py b/string_utils.py index 1b94ce3..3e0706b 100644 --- a/string_utils.py +++ b/string_utils.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from Levenshtein import distance as levenshtein +import re def simplify(text): mapp = [(u"’", u"'"), (u"↑", u"."), (u"…", u"..."), (u"É", u"E"), @@ -171,3 +172,20 @@ def print_alignment(l1, l2, c2, alignment): for word in l2: print u"{0:>25} | {1}".format("", word) + +def alignment_to_sexp(alignment, sexp, l2): + alignment = iter(alignment) + for line in sexp: + if "word" not in line: + print line + else: + index = alignment.next() + if index == -1: + break + else: + re.sub("(?P\d+ \d+ \d+ \d+\s) \w+(?P\)+$)", + "\g{0}\g".format( + " ".join([l2[i] for i in list(index)])), + line) + line.encode('string-escape') + print line -- cgit v1.2.3-70-g09d2