Source code for pypolibox.messages

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Arne Neumann <arne-neumann@web.de>

"""
The ``messages`` module contains the ``Message`` class and related classes.

``Message``s contain propositions about books. The text planner applies
``Rule``s to these ``Message``s to form ``ConstituentSet``s. ``Rule``s will
also be applied to ``ConstituentSet``s, ultimately forming one ``TextPlan``
that contains all the information to be realized.
"""

import nltk
from nltk import FeatDict, Feature


[docs]class Message(nltk.featstruct.FeatDict): """ A ``Message`` combines and stores knowledge about an object (here: books) in a logical structure. Messages are constructed during content selection (taking the user's requirements, querying a database and processing its results), which precedes text planning. Each ``Message`` has a ``msgType`` which describes the kind of information it includes. For example, the msgType 'id' specifies information that is needed to distinguish a book from other books:: [ *msgType* = 'id' ] [ authors = frozenset(['Roland Hausser']) ] [ codeexamples = 0 ] [ language = 'German' ] [ pages = 572 ] [ proglang = frozenset([]) ] [ target = 0 ] [ title = 'Grundlagen der Computerlinguistik' ] [ year = 2000 ] """ def __init__(self, msgType = None): """ ``msgType`` is only specified for the ``nltk.featstruct.FeatDict`` if it is specified by the user. """ if msgType: self[nltk.featstruct.Feature('msgType')] = msgType
[docs]class Messages: """ represents all ``Message`` instances generated from ``Propositions`` about a ``Book``. """ def __init__ (self, propositions): """reads propositions and calls message generation functions :type propositions: ``Propositions`` :param propositions: a ``Propositions`` class instance """ self.book_score = propositions.book_score self.propositions = propositions.propositions self.messages = {} # does not generate a message if there are no propositions about # its content (e.g. about 'extra') for proposition_type in self.propositions.iterkeys(): if self.propositions[proposition_type]: self.messages[proposition_type] = \ self.generate_message(proposition_type)
[docs] def generate_message(self, proposition_type): """ generates a ``Message`` from a 'simple' ``Proposition``. Simple propositions are those kinds of propostions that only give information about one item (i.e. describe one book) but don't compare two items (e.g. book A is 12 years older than book B). """ message = Message(msgType = proposition_type) proposition_dict = self.propositions[proposition_type] simple_propositions = set(('id', 'lastbook_match', 'usermodel_match', 'usermodel_nomatch')) #simple_propositions can be turned into messages without #further 'calculations' #keywords, authors and proglangs are stored as sets, but we need #frozensets (hashable) when creating rules and checking for duplicate #messages if proposition_type in simple_propositions: for attrib in proposition_dict.iterkeys(): value, rating = proposition_dict[attrib] if type(value) == set: value = frozenset(value) message.update({attrib: (value, rating)}) if proposition_type is 'extra': message = self.generate_extra_message(proposition_dict) if proposition_type is 'lastbook_nomatch': message = self.generate_lastbook_nomatch_message(proposition_dict) if message[Feature("msgType")] is not 'id': message = self.add_identification_to_message(message) return message
[docs] def generate_extra_message(self, proposition_dict): """ generates a ``Message`` from an 'extra' ``Proposition``. Extra propositions only exist if a book is remarkably new / old or very short / long. """ msg = Message(msgType='extra') for attrib in proposition_dict.iterkeys(): if attrib == 'year': description, rating = proposition_dict['year'] recency = FeatDict({'description': description, 'rating': rating}) msg.update({'recency': recency}) else: value, rating = proposition_dict[attrib] if type(value) == set: value = frozenset(value) msg.update({attrib: (value, rating)}) return msg
[docs] def generate_lastbook_nomatch_message(self, proposition_dict): """ generates a ``Message`` from a 'lastbook_nomatch' ``Proposition``. A lastbook_nomatch propositions states which differences exist between two books. """ msg = Message(msgType='lastbook_nomatch') for attrib in proposition_dict.iterkeys(): if attrib == 'longer': pages, rating = proposition_dict['longer'] magnitude = FeatDict({'number': pages, 'unit': 'pages'}) length = FeatDict({'type': 'RelativeVariation', 'direction': '+', 'magnitude': magnitude, 'rating': rating}) msg.update({'length': length}) elif attrib == 'shorter': pages, rating = proposition_dict['shorter'] magnitude = FeatDict({'number': pages, 'unit': 'pages'}) length = FeatDict({'type': 'RelativeVariation', 'direction': '-', 'magnitude': magnitude, 'rating': rating}) msg.update({'length': length}) elif attrib == 'newer': years, rating = proposition_dict['newer'] magnitude = FeatDict({'number': years, 'unit': 'years'}) recency = FeatDict({'type': 'RelativeVariation', 'direction': '+', 'magnitude': magnitude, 'rating': rating}) msg.update({'recency': recency}) elif attrib == 'older': years, rating = proposition_dict['older'] magnitude = FeatDict({'number': years, 'unit': 'years'}) recency = FeatDict({'type': 'RelativeVariation', 'direction': '-', 'magnitude': magnitude, 'rating': rating}) msg.update({'recency': recency}) else: value, rating = proposition_dict[attrib] if type(value) == set: value = frozenset(value) msg.update({attrib: (value, rating)}) return msg
[docs] def add_identification_to_message(self, message): """ Adds special 'reference_title' and 'reference_authors' attributes to messages other than the ``id_message``. In contrast to the ``id_message``, other messages will not be used to produce sentences that contain their content (i.e. no statement of the 'author X wrote book Y in 1979' generated from an 'extra_message' or a 'lastbook_nomatch' message). Nevertheless, they will need to make reference to the title and the authors of the book (e.g. 'Y is a rather short book'). As an example, look at this 'usermodel_match' message:: [ *msgType* = 'usermodel_match' ] [ *reference_authors* = frozenset(['Ulrich Schmitz']) ] [ *reference_title* = 'Computerlinguistik. Eine Einf├╝hrung' ] [ language = 'German' ] [ proglang = frozenset(['Lisp']) ] The message contains two bits of information (the language and programming language used), which both have regular strings as keys. The 'referential' keys on the other hand are ``nltk.Feature`` instances and not strings. This distinction should be regarded as a syntactic trick used to emphasize a semantic differce (READ: if you have a better solution, please change it). """ for attrib in ('title', 'authors'): value, rating = self.propositions['id'][attrib] if type(value) == set: value = (frozenset(value), rating) else: value = (value, rating) reference = Feature("reference_"+attrib) message.update({reference: value}) return message
def __str__(self): ret_str = "" ret_str += "book score: {0}\n\n".format(self.book_score) for message in self.messages.iterkeys(): if self.messages[message]: ret_str += "{0}\n\n".format(self.messages[message]) return ret_str
[docs]class AllMessages: """ represents all Messages generated from AllPropositions about all Books() that were returned by a query """ def __init__ (self, allpropositions): """ :type allpropositions: ``AllPropositions`` :param allpropositions: a ``AllPropositions`` class instance containing a list of ``Propositions`` instances This will genenerate a ``Messages`` instance (containing all ``Message``s about a book) for each ``Propositions`` instance. It also adds a 'lastbook_title' and 'lastbook_author' to ``Message``s that compare the current and the preceding book """ propositions_list = allpropositions.books self.books = [] lastbook_id_messages = ['lastbook_match', 'lastbook_nomatch'] for index, book in enumerate(propositions_list): if index == 0: self.books.append(Messages(book)) else: lastbook = propositions_list[index-1] for message_type in lastbook_id_messages: book.propositions[message_type]['lastbook_title'] = \ lastbook.propositions['id']['title'] book.propositions[message_type]['lastbook_authors'] = \ lastbook.propositions['id']['authors'] self.books.append(Messages(book)) def __str__(self): ret_str = "" for index, book in enumerate(self.books): ret_str += "book #{0} is described with these messages:\n".format(index) + \ "==========================================\n\n{0}".format(book) return ret_str