# Author: Arne Neumann <arne-neumann@web.de>
"""
The ``util`` module contains a number of 'bread and butter' functions that are
needed to run pypolibox, but are not particularly interesting (e.g. format
converters, existence checks etc.).
There shouldn't be any code in this module that require loading other
modules from pypolibox!
"""
import os
import re
import cPickle as pickle
import yaml
from nltk.featstruct import Feature
[docs]def ensure_utf8(string_or_int):
"""
ensures that a string does not use unicode but UTF8.
converts integer input to a string.
"""
if isinstance(string_or_int, int):
string = str(string_or_int)
elif isinstance(string_or_int, unicode):
string = string_or_int.encode("UTF8")
elif isinstance(string_or_int, (str, Feature)):
string = string_or_int
else:
print "string_or_int: ", string_or_int
raise Exception("can't process input of type {0}".format(type(string_or_int)))
return string
[docs]def ensure_unicode(string_or_int):
"""
ensures that a string does use unicode instead of UTF8.
converts integer input to a unicode string.
"""
if isinstance(string_or_int, int):
string_or_int = str(string_or_int)
if isinstance(string_or_int, unicode):
string = string_or_int
else:
#print "string_or_int: ", string_or_int, " with type: ", type(string_or_int)
string = string_or_int.decode("UTF8")
return string
[docs]def flatten(nested_list):
"""flattens a list, where each list element is itself a list
:param nested_list: the nested list
:type nested_list: list
:return: flattened list
"""
flattened_list = []
for element in nested_list:
flattened_list.extend(element)
return flattened_list
[docs]def sql_array_to_set(sql_array):
""" converts SQL string "arrays" into a set of strings
our book database uses '[' and ']' to handle attributes w/ more than one
value: e.g. authors = '[Noam Chomsky][Alan Touring]'
this function turns those multi-value strings into a set with separate
values
:type sql_array: ``str``
:param sql_array: a string from the database that represents one or
more items delimited by '[' and ']', e.g. "[Noam Chomsky]" or "[Noam
Chomsky][Alan Touring]"
:rtype: ``set`` of ``str``
:return: a set of strings, where each string represents one item from
the database, e.g. ["Noam Chomsky", "Alan Touring"]
"""
item = re.compile("\[(.*?)\]")
items = item.findall(sql_array)
item_set = set()
for i in items:
item_set.add(i)
return item_set
[docs]def sql_array_to_list(sql_array):
""" converts SQL string "arrays" into a list of strings
Our book database uses '[' and ']' to handle attributes w/ more than one
value: e.g. authors = '[Noam Chomsky][Alan Touring]'. This function
turns those multi-value strings into a set with separate values.
:type sql_array: ``str``
:param sql_array: a string from the database that represents one or
more items delimited by '[' and ']', e.g. "[Noam Chomsky]" or "[Noam
Chomsky][Alan Touring]"
:rtype: ``list`` of ``str``
:return: a list of strings, where each string represents one item from
the database, e.g. ["Noam Chomsky", "Alan Touring"]
"""
item = re.compile("\[(.*?)\]")
return item.findall(sql_array)
[docs]def msgs_instance_to_list_of_msgs(messages_instance):
"""converts a ``Messages`` instance into a list of ``Message`` instances"""
return [message for message in messages_instance.messages.values()]
[docs]def freeze_all_messages(message_list):
"""
makes all messages (``FeatDict``s) immutable, which is necessary for turning
them into sets
"""
for message in message_list:
message.freeze()
return message_list
[docs]def write_to_file(str_or_obj, file_path):
"""
takes a string and writes it to a file or takes any other object, pickles
it and writes it to a file
"""
f_obj = open(file_path, "w")
if type(str_or_obj) is str:
f_obj.write(str_or_obj)
else:
pickle.dump(str_or_obj, f_obj)
f_obj.close()
[docs]def exists(thing, namespace):
"""checks if a variable/object/instance exists in the given namespace
:type thing: ``str``
:type namespace: ``dict``
:rtype: ``bool``
"""
if namespace.has_key(thing):
return True
else:
return False