#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Arne Neumann <arne-neumann@web.de>
"""
The ``messages`` module contains the ``Message`` class and related classes.
``Message``s contain propositions about books. The text planner applies
``Rule``s to these ``Message``s to form ``ConstituentSet``s. ``Rule``s will
also be applied to ``ConstituentSet``s, ultimately forming one ``TextPlan``
that contains all the information to be realized.
"""
import nltk
from nltk import FeatDict, Feature
[docs]class Message(nltk.featstruct.FeatDict):
"""
A ``Message`` combines and stores knowledge about an object (here: books)
in a logical structure. Messages are constructed
during content selection (taking the user's requirements, querying a
database and processing its results), which precedes text planning.
Each ``Message`` has a ``msgType`` which describes the kind of information
it includes. For example, the msgType 'id' specifies information that is
needed to distinguish a book from other books::
[ *msgType* = 'id' ]
[ authors = frozenset(['Roland Hausser']) ]
[ codeexamples = 0 ]
[ language = 'German' ]
[ pages = 572 ]
[ proglang = frozenset([]) ]
[ target = 0 ]
[ title = 'Grundlagen der Computerlinguistik' ]
[ year = 2000 ]
"""
def __init__(self, msgType = None):
"""
``msgType`` is only specified for the ``nltk.featstruct.FeatDict`` if it
is specified by the user.
"""
if msgType:
self[nltk.featstruct.Feature('msgType')] = msgType
[docs]class Messages:
"""
represents all ``Message`` instances generated from ``Propositions`` about a
``Book``.
"""
def __init__ (self, propositions):
"""reads propositions and calls message generation functions
:type propositions: ``Propositions``
:param propositions: a ``Propositions`` class instance
"""
self.book_score = propositions.book_score
self.propositions = propositions.propositions
self.messages = {}
# does not generate a message if there are no propositions about
# its content (e.g. about 'extra')
for proposition_type in self.propositions.iterkeys():
if self.propositions[proposition_type]:
self.messages[proposition_type] = \
self.generate_message(proposition_type)
[docs] def generate_message(self, proposition_type):
"""
generates a ``Message`` from a 'simple' ``Proposition``. Simple
propositions are those kinds of propostions that only give information
about one item (i.e. describe one book) but don't compare two items
(e.g. book A is 12 years older than book B).
"""
message = Message(msgType = proposition_type)
proposition_dict = self.propositions[proposition_type]
simple_propositions = set(('id', 'lastbook_match', 'usermodel_match',
'usermodel_nomatch'))
#simple_propositions can be turned into messages without
#further 'calculations'
#keywords, authors and proglangs are stored as sets, but we need
#frozensets (hashable) when creating rules and checking for duplicate
#messages
if proposition_type in simple_propositions:
for attrib in proposition_dict.iterkeys():
value, rating = proposition_dict[attrib]
if type(value) == set:
value = frozenset(value)
message.update({attrib: (value, rating)})
if proposition_type is 'extra':
message = self.generate_extra_message(proposition_dict)
if proposition_type is 'lastbook_nomatch':
message = self.generate_lastbook_nomatch_message(proposition_dict)
if message[Feature("msgType")] is not 'id':
message = self.add_identification_to_message(message)
return message
[docs] def generate_lastbook_nomatch_message(self, proposition_dict):
"""
generates a ``Message`` from a 'lastbook_nomatch' ``Proposition``. A
lastbook_nomatch propositions states which differences exist between
two books.
"""
msg = Message(msgType='lastbook_nomatch')
for attrib in proposition_dict.iterkeys():
if attrib == 'longer':
pages, rating = proposition_dict['longer']
magnitude = FeatDict({'number': pages, 'unit': 'pages'})
length = FeatDict({'type': 'RelativeVariation',
'direction': '+', 'magnitude': magnitude,
'rating': rating})
msg.update({'length': length})
elif attrib == 'shorter':
pages, rating = proposition_dict['shorter']
magnitude = FeatDict({'number': pages, 'unit': 'pages'})
length = FeatDict({'type': 'RelativeVariation',
'direction': '-', 'magnitude': magnitude,
'rating': rating})
msg.update({'length': length})
elif attrib == 'newer':
years, rating = proposition_dict['newer']
magnitude = FeatDict({'number': years, 'unit': 'years'})
recency = FeatDict({'type': 'RelativeVariation',
'direction': '+', 'magnitude': magnitude,
'rating': rating})
msg.update({'recency': recency})
elif attrib == 'older':
years, rating = proposition_dict['older']
magnitude = FeatDict({'number': years, 'unit': 'years'})
recency = FeatDict({'type': 'RelativeVariation',
'direction': '-', 'magnitude': magnitude,
'rating': rating})
msg.update({'recency': recency})
else:
value, rating = proposition_dict[attrib]
if type(value) == set:
value = frozenset(value)
msg.update({attrib: (value, rating)})
return msg
[docs] def add_identification_to_message(self, message):
"""
Adds special 'reference_title' and 'reference_authors' attributes to
messages other than the ``id_message``.
In contrast to the ``id_message``, other messages will not be used to
produce sentences that contain their content (i.e. no statement of the
'author X wrote book Y in 1979' generated from an 'extra_message' or a
'lastbook_nomatch' message). Nevertheless, they will need to make
reference to the title and the authors of the book (e.g. 'Y is a
rather short book'). As an example, look at this 'usermodel_match'
message::
[ *msgType* = 'usermodel_match' ]
[ *reference_authors* = frozenset(['Ulrich Schmitz']) ]
[ *reference_title* = 'Computerlinguistik. Eine Einführung' ]
[ language = 'German' ]
[ proglang = frozenset(['Lisp']) ]
The message contains two bits of information (the language and
programming language used), which both have regular strings as keys.
The 'referential' keys on the other hand are ``nltk.Feature``
instances and not strings. This distinction should be regarded as
a syntactic trick used to emphasize a semantic differce (READ: if you
have a better solution, please change it).
"""
for attrib in ('title', 'authors'):
value, rating = self.propositions['id'][attrib]
if type(value) == set:
value = (frozenset(value), rating)
else:
value = (value, rating)
reference = Feature("reference_"+attrib)
message.update({reference: value})
return message
def __str__(self):
ret_str = ""
ret_str += "book score: {0}\n\n".format(self.book_score)
for message in self.messages.iterkeys():
if self.messages[message]:
ret_str += "{0}\n\n".format(self.messages[message])
return ret_str
[docs]class AllMessages:
"""
represents all Messages generated from AllPropositions about all Books()
that were returned by a query
"""
def __init__ (self, allpropositions):
"""
:type allpropositions: ``AllPropositions``
:param allpropositions: a ``AllPropositions`` class instance containing
a list of ``Propositions`` instances
This will genenerate a ``Messages`` instance (containing all ``Message``s
about a book) for each ``Propositions`` instance. It also adds a
'lastbook_title' and 'lastbook_author' to ``Message``s that compare the
current and the preceding book
"""
propositions_list = allpropositions.books
self.books = []
lastbook_id_messages = ['lastbook_match', 'lastbook_nomatch']
for index, book in enumerate(propositions_list):
if index == 0:
self.books.append(Messages(book))
else:
lastbook = propositions_list[index-1]
for message_type in lastbook_id_messages:
book.propositions[message_type]['lastbook_title'] = \
lastbook.propositions['id']['title']
book.propositions[message_type]['lastbook_authors'] = \
lastbook.propositions['id']['authors']
self.books.append(Messages(book))
def __str__(self):
ret_str = ""
for index, book in enumerate(self.books):
ret_str += "book #{0} is described with these messages:\n".format(index) + \
"==========================================\n\n{0}".format(book)
return ret_str