Source code for pypolibox.pypolibox

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Arne Neumann <arne-neumann@web.de>

"""
The pypolibox module is the 'main' module of the pypolibox package. It's the
module you'd usually call from the command line or load into your Python
interpreter. It just imports all the important modules and runs some demo
code in case it is run from the command line without any arguments.
"""

import sys
from nltk.featstruct import Feature

from database import Query, Results, Book, Books
from facts import Facts, AllFacts
from propositions import Propositions, AllPropositions
from textplan import (TextPlan, TextPlans, generate_textplan,
                      linearize_textplan, textplans2xml)
from hlds import etreeprint
from messages import Message, Messages, AllMessages
from rules import ConstituentSet, Rule, Rules


[docs]def test():
    """test and realize all text plans for all test queries"""
    import cPickle
    atp = cPickle.load(open("data/alltextplans.pickle", "r"))
    for textplans in atp:
        for textplan in textplans.document_plans:
            check_and_realize_textplan(textplan)


[docs]def generate_textplans(query):
    """generates all text plans for a database query"""
    books = Books(Results(query))
    return TextPlans(AllMessages(AllPropositions(AllFacts(books))))


[docs]def initialize_openccg(lang='de'):
    """
    starts OpenCCG's tccg realizer as a server in the background (ca. 20s).
    """
    from realization import OpenCCG
    return OpenCCG(lang=lang)


[docs]def check_and_realize_textplan(openccg, textplan, lexicalize_message_block, phrase2sentence):
    """
    realizes a text plan and warns about message blocks that cannot be
    realized due to current restrictions in the OpenCC grammar.

    Parameters
    ----------
    openccg : OpenCCG
        a running OpenCCG instance
    textplan : TextPlan
        text plan to be realized
    """
    msg_blocks = linearize_textplan(textplan)
    for msg_block in msg_blocks:
        try:
            lexicalized_msg_block = lexicalize_message_block(msg_block)
            print "The {0} message block can be realized " \
                  "as follows:\n".format(msg_block[Feature("msgType")])
            for lexicalized_phrase in lexicalized_msg_block:
                lexicalized_sentence = phrase2sentence(lexicalized_phrase)
                for realized_sent in openccg.realize(lexicalized_sentence):
                    print realized_sent

        except NotImplementedError, err:
            print err
            print "The message block contains these messages:\n", msg_block, \
                  "\n\n**********\n\n"
        print

[docs]def main():
    """
    This is the pypolibox commandline interface. It allows you to query
    the database and generate book recommendatins, which will either be
    handed to OpenCCG for generating sentences or printed to stdout in
    an XML format representing the text plans.
    """
    query = Query(sys.argv[1:])

    output_format = query.query_args.output_format
    valid_output_formats = ['openccg', 'hlds', 'textplan-xml', 'textplan-featstruct']
    if output_format not in valid_output_formats:
        sys.stderr.write("Output format must be one of: {}\n".format(valid_output_formats))
        sys.exit(1)

    try:
        lexicalize_messageblocks = \
            __import__("lexicalize_messageblocks_%s" % query.query_args.output_language, globals(), locals(), [], -1)
    except ImportError:
        raise

    try:
        lexicalization = \
            __import__("lexicalization_%s" % query.query_args.output_language, globals(), locals(), [], -1)
    except ImportError:
        raise

    lexicalize_message_block = lexicalize_messageblocks.lexicalize_message_block
    phrase2sentence = lexicalization.phrase2sentence


    textplans = generate_textplans(query)

    if output_format == 'openccg':
        openccg = initialize_openccg(lang=query.query_args.output_language)
        print "{} text plans will be generated.".format(len(textplans.document_plans))
        for i, textplan in enumerate(textplans.document_plans):
            print "Generating text plan #%i:\n" % i
            check_and_realize_textplan(openccg, textplan, lexicalize_message_block, phrase2sentence)
    elif output_format == 'hlds':
        from copy import deepcopy
        from hlds import (Diamond, Sentence, diamond2sentence,
            add_nom_prefixes, create_hlds_file)

        for i, textplan in enumerate(textplans.document_plans):
            print "Text plan #%i:\n" % i

            # TODO: refactor to avoid code duplication w/
            # check_and_realize_textplan()
            msg_blocks = linearize_textplan(textplan)
            for msg_block in msg_blocks:
                try:
                    lexicalized_msg_block = lexicalize_message_block(msg_block)
                    print "The {0} message block can be realized " \
                          "as follows:\n".format(msg_block[Feature("msgType")])
                    for lexicalized_phrase in lexicalized_msg_block:
                        lexicalized_sentence = phrase2sentence(lexicalized_phrase)

                        # TODO: refactor to avoid duplication w/ OpenCCG.realize
                        temp_sentence = deepcopy(lexicalized_sentence)

                        if isinstance(lexicalized_sentence, Diamond):
                            temp_sentence = diamond2sentence(temp_sentence)

                        add_nom_prefixes(temp_sentence)
                        print create_hlds_file(temp_sentence,
                            mode="realize", output="xml")

                except NotImplementedError, err:
                    print err
                    print "The message block contains these messages:\n", msg_block, \
                          "\n\n**********\n\n"

    elif output_format == 'textplan-featstruct':
        for i, textplan in enumerate(textplans.document_plans):
            print "Text plan #%i:\n" % i
            print textplan, "\n\n"

    else: # output_format == 'textplan-xml'
        etreeprint(textplans2xml(textplans))


if __name__ == "__main__":
    main()