#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Arne Neumann <arne-neumann@web.de>
"""
The ``facts`` module takes the information stored in ``Book`` instances and
converts them into attribute value matrices (``Facts``). Furthermore, the
module compares each book with its predecessor (e.g. book A is newer than book
B and has code examples, while B is shorter and targets beginners ...). The
insights gathered from these comparisons are also stored in ``Facts``
instances.
"""
import datetime
[docs]class AllFacts():
"""
Simply speaking, an ``AllFacts`` instance contains all facts about all
books that were returned by a database query. More formally, it contains a
``Facts`` instance for each ``Book`` in a ``Books`` instance.
In a ``Books`` instance, all books returned by a database query are sorted
by the number of query parameters they match ('user model match') in
descending order. This means, that ``AllFacts`` will contain facts about
the best-matching book, followed by facts about the second-best matching
book (including a comparison to the best matching one), followed by facts
about the third-best matching book (including a comparison to the second
one) etc.
"""
def __init__ (self, b):
"""
generates all facts for all books returned by a database query, i.e. a
``Facts`` instance for each ``Book`` in a ``Books`` instance. For a
hands-on description, see the ``Facts`` documentation.
Parameters
----------
b : Books
a ``Books`` instance, which contains all ``Book`` instances
that were constructed from the database query results.
"""
self.query_args = b.query_args # original query args for generating query_facts
self.books = []
self.book_scores = b.scores
for index, book in enumerate(b.books):
book_score = self.book_scores[index]
if index == 0: #first book
book_facts = Facts(book, book_score, index)
self.books.append(book_facts)
else: # every other book --> trigger comparison with preceeding book
preceding_book = b.books[index-1]
book_facts = Facts(book, book_score, index, preceding_book)
self.books.append(book_facts)
def __str__(self):
"""
print the facts for each book
"""
return_string = ""
for index, book in enumerate(self.books):
return_string += "facts about book #{0}:\n".format(index) + \
"--------------------\n" + \
"{0}\n\n".format(book)
return return_string
[docs]class Facts():
"""
A ``Facts`` instance represents facts about a single book, but also
contains a comparison of that particular book with its predecessor.
"""
def __init__ (self, book, book_score, index=0, preceding_book=False):
"""
Uses the facts/metadata retrieved from the sqlite3 database, and
generates facts in form of an attribute value matrix. The facts are
grouped logically. A ``Facts`` instance basically consists of a
dictionary (stored in ``self.facts``) containing these four keys::
(1) 'id_facts'
(2) 'extra_facts'
(3) 'query_facts'
(4) 'lastbook_facts'
Since this method is basically dealing with a list of ``Book``
instances, the first book's ``Facts`` instance will not contain
'lastbook_facts', as there is no previous book in the list that it
could be compared to.
:param book: a ``Book`` instance
:type book: ``Book``
:param book_score: the score of the book that was calculated in
:class:`Books.get_book_ranks()`
:type book_score: ``float``
:param index: the index of the book in the ``Books`` list of books
:type index: ``int``
:param preceding_book: if True, there is a book preceding this one
and both books will be compared
:type preceding_book: ``bool``
"""
facts = {}
facts["id_facts"] = self.generate_id_facts(index, book)
facts["extra_facts"] = self.generate_extra_facts(index, book)
facts["query_facts"] = self.generate_query_facts(index, book, book_score)
if preceding_book == False: # if this is the first/only book
pass # DON't compare this book to a non-existent preceeding one
else:
facts["lastbook_facts"] = self.generate_lastbook_facts(index, book, preceding_book) # generate additional facts, comparing the current with the preceeding book
self.facts = facts
[docs] def generate_id_facts(self, index, book):
"""
generates a dictionary of id facts about the current book which will be
stored in ``self.facts["id_facts"]``. In contrast to other facts,
``id_facts`` are those kind of facts that can be directly retrieved
from the database (i.e. there is no comparison between books or
reasoning involved). The id_facts dictionary contains the following
keys::
id_facts keys database book table columns
'authors'
'codeexamples' 'examples'
'exercises'
'keywords'
'language' 'lang'
'pages'
'proglang' 'plang'
'target'
'title'
'year'
The key names should be self-exlanatory. In those cases where they do
not exactly match their counterparts in the database, the
corresponding database table column name is given in the table above.
:param index: the index of the book in the ``Books`` list of books
:type index: ``int``
:param book: a ``Book`` instance
:type book: ``Book``
:return: a dictionary with the keys described above
:rtype: ``dict``
"""
id_facts = {}
attributes = ['authors', 'codeexamples', 'exercises', 'keywords',
'language', 'pages', 'proglang', 'target', 'title',
'year']
for attribute in attributes:
# Instead of writing lots of repetitive code like in JPolibox:
# id_facts["authors"] = book.authors
# id_facts["codeexamples"] = book.codeexamples ...
# we will get all those book attributes at once (with ``getattr``)
# and turn them into dictionary items (via ``__setitem__``).
book_attribute = getattr(book, attribute)
id_facts.__setitem__(attribute, book_attribute)
return id_facts
[docs] def generate_query_facts(self, index, book, book_score):
"""
generates facts that describes if a book matches (parts of) the query
(a.k.a the user model). a typical query_facts dictionary will look
like this::
query_facts:
usermodel_nomatch: {'codeexamples': 0}
usermodel_match: {'exercises': 1, 'keywords':
set(['semantics', 'parsing']), 'language':
'German'}
book_score: 0.8
The book described in this examples matches 80 % of the user
requirements (it contains exercises and deals with semantics and
parsing and is written in German) but does not contain code examples
(as was asked for by the user).
:param index: the index of the book in the ``Books`` list of books
:type index: ``int``
:param book: a ``Book`` instance
:type book: ``Book``
:param book_score: the score of the book that was calculated in
:class:`Books.get_book_ranks()`
:type book_score: ``float``
:return: a dictionary that contains three keys, the ``book_score``,
the ``usermodel_match`` as well as the ``usermodle_nomatch``.
'usermodel_match' contains all the features that were requested by
the user and are present in the book. 'usermodle_nomatch' contains
all features that were requested but are missing from the book.
:rtype: ``dict``
"""
query_facts = {}
query_facts["book_score"] = book_score
query_facts["usermodel_match"] = {}
query_facts["usermodel_nomatch"] = {}
query_args = book.query_args
simple_attributes = ['codeexamples', 'exercises', 'language',
'pagerange', 'target']
complex_attributes = ['keywords', 'proglang']
# complex attributes may contain more than 1 value
for simple_attribute in simple_attributes:
#if query_args has a non-empty value for this attrib
if getattr(query_args, simple_attribute):
if getattr(query_args, simple_attribute) == getattr(book, simple_attribute):
query_facts["usermodel_match"][simple_attribute] = getattr(book, simple_attribute)
else:
query_facts["usermodel_nomatch"][simple_attribute] = getattr(book, simple_attribute)
for complex_attribute in complex_attributes:
# if query_args has at least one value for this attrib
if getattr(query_args, complex_attribute):
values = getattr(query_args, complex_attribute)
matching_values = set()
nonmatching_values = set()
for value in values:
if value in getattr(book, complex_attribute):
matching_values.add(value)
else:
nonmatching_values.add(value)
if matching_values != set(): # if not empty ...
query_facts["usermodel_match"][complex_attribute] = matching_values
if nonmatching_values != set():
query_facts["usermodel_nomatch"][complex_attribute] = nonmatching_values
return query_facts
[docs] def generate_lastbook_facts(self, index, book, preceding_book):
"""
generates facts that compare the current book with the preceding one.
A typical example of a lastbook_facts dictionary would look like
this::
lastbook_facts:
lastbook_nomatch:
{'language': 'German',
'keywords_preceding_book_only':
set(['pragmatics', 'chart parsing']),
'keywords_current_book_only':
set([' ', 'grammar', 'language hierarchy', 'corpora',
'syntax', 'morphology', 'left associative
grammar']),
'codeexamples': 0,
'proglang': set(['Lisp']),
'newer': 11,
'keywords':
set([' ', 'grammar', 'language hierarchy', 'corpora',
'syntax', 'left associative grammar', 'morphology',
'chart parsing', 'pragmatics']),
'proglang_preceding_book_only':
set(['Lisp'])}
lastbook_match:
{'exercises': 1, 'keywords': set(['semantics',
'parsing']), 'target': 0, 'pagerange': 1}
This method will calculate if is newer/older/shorter/longer than its
predecessor (if so, it will store the difference as an integer). For
keys that have sets as their values (``keywords`` and ``proglang``),
the resulting dictionary will list which values differed and which
were only present in either the preceding or the current book.
:param index: the index of the book in the ``Books`` list of books
:type index: ``int``
:param book: a ``Book`` instance
:type book: ``Book``
:param preceding_book: if True, there is a book preceding this one
and both books will be compared
:type preceding_book: ``bool``
:return: a dictionary with two keys: ``lastbook_match`` and
``lastbook_nomatch``, which in turn are dictionaries themselves and
contain facts that are shared between the two books (lastbook_match)
or that differ between the two (lastbook_nomatch).
"""
lastbook_facts = {}
lastbook_facts['lastbook_match'] = {}
lastbook_facts['lastbook_nomatch'] = {}
simple_comparisons = ['codeexamples', 'exercises','language', 'target']
set_comparisons = ['keywords', 'proglang']
for simple_comparison in simple_comparisons:
if getattr(book, simple_comparison) == getattr(preceding_book, simple_comparison):
lastbook_facts['lastbook_match'][simple_comparison] = getattr(book, simple_comparison)
else:
lastbook_facts['lastbook_nomatch'][simple_comparison] = getattr(book, simple_comparison)
for attribute in set_comparisons:
current_attrib = getattr(book, attribute)
preceding_attrib = getattr(preceding_book, attribute)
if current_attrib == preceding_attrib == set([]):
pass # nothing to compare
else:
shared_values = current_attrib.intersection(preceding_attrib)
if shared_values != set([]):
lastbook_facts['lastbook_match'][attribute] = shared_values
non_shared_values = current_attrib.symmetric_difference(preceding_attrib)
lastbook_facts['lastbook_nomatch'][attribute] = non_shared_values
current_only_values = current_attrib.difference(preceding_attrib)
if current_only_values != set([]):
fact_name = attribute + '_current_book_only'
lastbook_facts['lastbook_nomatch'][fact_name] = current_only_values
preceding_only_values = preceding_attrib.difference(current_attrib)
if preceding_only_values != set([]):
fact_name = attribute + '_preceding_book_only'
lastbook_facts["lastbook_nomatch"][fact_name] = preceding_only_values
if book.year == preceding_book.year:
lastbook_facts["lastbook_match"]["year"] = book.year
else:
if book.year > preceding_book.year:
years_diff = book.year - preceding_book.year
lastbook_facts["lastbook_nomatch"]["newer"] = years_diff
else:
years_diff = preceding_book.year - book.year
lastbook_facts["lastbook_nomatch"]["older"] = years_diff
if book.pagerange == preceding_book.pagerange:
lastbook_facts["lastbook_match"]["pagerange"] = book.pagerange
else:
if book.pages > preceding_book.pages:
page_diff = book.pages - preceding_book.pages
lastbook_facts["lastbook_nomatch"]["longer"] = page_diff
else: #current book is shorter
page_diff = preceding_book.pages - book.pages
lastbook_facts["lastbook_nomatch"]["shorter"] = page_diff
return lastbook_facts
def __str__(self):
"""prints the ``Facts`` instance, but omits empty values"""
signifiers_of_emptyness = [ [], {}, set() ] # lists, dicts, sets can be empty (we can't simply say "if val:", since this this would not only exclude emtpy lists/dicts/sets but also "0")
return_string = ""
for key, value in self.facts.iteritems():
if value not in signifiers_of_emptyness:
return_string += "\n{0}:\n".format(key)
for attribute, val in value.iteritems():
if val not in signifiers_of_emptyness:
return_string += "\t{0}: {1}\n".format(attribute, val)
return return_string