LunaGen/LunaGen/src/lunagen_backup.py

#!/usr/bin/python3
# encoding: utf-8
'''
lunagen -- "Lunatics!" Project Release Site Generator

LunaGen generates a small static HTML website based on YAML
data documents containing the key release information. This
generates an index.html file containing the episode lists
for all series, individual release pages for each episode,
and additional pages for character list and story background.

This is meant to be a fan-focused site.

It also contains affiliate link, sponsor, advertising, and
other fundraising elements.

@author:     Terry Hancock

@copyright:  2019 Anansi Spaceworks.

@license:    GNU General Public License, version 2.0 or later. (Python code)
             Creative Commons Attribution-ShareAlike, version 3.0 or later. (Website Templates).

@contact:    digitante@gmail.com
'''

import sys
import os
#from shutil import copytree, rmtree
from distutils.dir_util import remove_tree, copy_tree
from optparse import OptionParser
import random
from collections import OrderedDict

import yaml
import jinja2

import lunagen

lunagen.demo()

class Config(object):
    """
    Installation configuration variables.

    TODO: Probably change this to load from a YAML file in the future.
    """
    __all__ = []
    __version__ = 0.1
    __date__ = '2019-09-19'
    __updated__ = '2019-09-19'

    DEBUG = True
    TESTRUN = False
    PROFILE = False
    THEMES = '../themes'

class LunaGen(jinja2.Environment):
    """
    Generator for a LunaGen site, based on contents of srcdir:

       <srcdir>/data - YAML files (handwritten content)
       <srcdir>/templates - Jinja2 site templates
       <srcdir>/skeleton  - unchanging parts of the site (copied)

    The new site is created in <tgtdir>.
    """
    def __init__(self, srcdir, tgtdir=None, verbosity=0):
        self.srcdir = os.path.abspath(srcdir)
        if not tgtdir:
            self.tgtdir = os.path.join(self.srcdir, 'site')
        else:
            self.tgtdir = os.path.abspath(tgtdir)
        self.datadir = os.path.join(self.srcdir, 'data')
        self.templates = os.path.join(self.srcdir, 'templates')
        self.skeleton = os.path.join(self.srcdir, 'skeleton')

        self.verbose = verbosity

        if self.verbose:
            print("Source directory: %s" % self.srcdir)
            print("Target directory: %s" % self.tgtdir)
            print("YAML content should be in: %s" % self.datadir)
            print("Jinja2 templates should be in: %s" % self.templates)
            print("Skeleton website should be in: %s" % self.skeleton)


        #TODO: Could make sure these directories exist

        # Load up the data from YAML files:
        self._load_sitedata()
        self._load_theme()
        self._load_affiliates()
        self._load_softwarelist()
        self._load_products()
        self._load_serieslist()

        super().__init__(
            loader=jinja2.ChoiceLoader([
                    jinja2.FileSystemLoader(os.path.join(self.datadir, 'templates')),
                    jinja2.FileSystemLoader(self.theme['path']),
                    jinja2.FileSystemLoader(self.templates)]),
            autoescape=jinja2.select_autoescape(['html','xml']))

    @staticmethod
    def _paginate(seq, pagesize):
        """
        Given a sequence of objects, break it into a book of
        pages, each containing no more than pagesize objects:

        >>> test = [1,'2','three', 4, 5, 'six', 'seven', 8, 9, True, False, None, 0]
        >>> LunaGen._paginate(test, 4)
        [[1, '2', 'three', 4], [5, 'six', 'seven', 8], [9, True, False, None], [0]]
        >>>
        """
        book = []
        page = []
        for i,ob in enumerate(seq):
            if i%pagesize==0:
                if i>0: book.append(page)
                page = []
            page.append(ob)
        if len(page)>0:
            book.append(page)
        return book

    @staticmethod
    def _paginate_sponsors(series, episode):
        """
        Regroup sponsors into pages which:
         - Contain only one kind of sponsor
         - Contain no more than the 'page' limit number of sponsors per page
         - Are tagged with the sponsortype so we can find the right tempate for them:

        >>> series = {'sponsortypes':
        ...  {'A':{'page':1,'limit':1},
        ...   'B':{'page':3, 'limit':10},
        ...   'C':{'page':4, 'limit':20}}}
        ...
        >>> episode = {'sponsors':
        ...  {'A':list(range(2)),
        ...   'B':list(range(7)),
        ...   'C':list(range(22))}}
        ...
        >>> LunaGen._paginate_sponsors(series, episode)
        [('A', [0]), ('B', [0, 1, 2]), ('B', [3, 4, 5]), ('B', [6]), ('C', [0, 1, 2, 3]), ('C', [4, 5, 6, 7]), ('C', [8, 9, 10, 11]), ('C', [12, 13, 14, 15]), ('C', [16, 17, 18, 19])]
        >>>
        """
        paged_sponsors = []
        for spkey, sponsortype in series['sponsortypes'].items():
            if spkey not in episode['sponsors']:
                episode['sponsors'][spkey] = []
            #if 'excludes' in sponsortype:
            #    for excluded in sponsortype['excludes']:
            #        if excluded in episode['sponsors'] and episode['sponsors'][excluded]:
            #            print("WARNING: excluded sponsortype %s will be ignored, because of existing %s." %
            #                    (excluded, spkey))
            if 'page'in sponsortype:
                paged = LunaGen._paginate(
                    episode['sponsors'][spkey][:sponsortype['limit']],
                        sponsortype['page'])
                tags = [spkey] * len(paged)
                paged_sponsors.extend(zip(tags, paged))
        return paged_sponsors

    @staticmethod
    def _fix_series(series):
        """
        Modify series data to correct certain datatypes that are not
        natively supported by YAML (like OrderedDict):
        >>> series = {'credits':{
        ...         'a':{'labels':'ordered'},
        ...         'b':{'labels':[['A','-A-'],['B','-B-']]}}}
        ...
        >>> LunaGen._fix_series(series)
        >>> series['credits']['b']['labels']
        OrderedDict([('A', '-A-'), ('B', '-B-')])
        >>>
        """
        for key, credit in series['credits'].items():
            if type(credit['labels']) != type(''):
                credit['labels']=OrderedDict(credit['labels'])

    def _collect_stylesheets(self, *extras):
        """
        Collect a list of unique stylesheets from various stylesheet
        requirements from theme, site, and data from extra pages.
        """
        stylesheets = []
        stylesheets.extend(self.theme['stylesheets'])
        stylesheets.extend(self.sitedata['stylesheets'])
        for extra in extras:
            if 'stylesheets' in extra:
                stylesheets.extend(extra['stylesheets'])
        stylesheets = [s for i,s in enumerate(stylesheets) if s not in stylesheets[:i]]
        return stylesheets

    def _load_sitedata(self):
        if self.verbose: print("Loading global site data.")
        with open(os.path.join(self.datadir, 'site.yaml'), 'rt') as sitedatafile:
            self.sitedata = yaml.load(sitedatafile)

    def _load_theme(self):
        if self.verbose: print("Loading theme data.")
        self.theme  = { 'stylesheets':[] } # Default values
        themedir = os.path.join(Config.THEMES, self.sitedata['theme'])
        if not os.path.exists(themedir):
            raise FileNotFoundError("Theme directory %s not found!" % themedir)
        with open(os.path.join(themedir, 'theme.yaml'), 'rt') as themedatafile:
            self.theme.update(yaml.load(themedatafile))
            self.theme['path'] = themedir

    def _load_affiliates(self):
        if self.verbose: print("Loading affiliates data.")
        try:
            with open(os.path.join(self.datadir, 'affiliates.yaml')) as aff_file:
                affiliates = yaml.load(aff_file)
            stylesheets = self.sitedata['stylesheets']
            self.sitedata.update(affiliates)
            self.sitedata['stylesheets'] = self._collect_stylesheets(affiliates)
            self.sitedata['affiliates'] = random.sample(
                affiliates['affiliates'], min( int(affiliates['affiliates_at_once']),
                                                len(affiliates['affiliates'])))
        except FileNotFoundError:
            print("No affiliates.yaml file, so affiliates list is empty.")
            self.sitedata['affiliates'] = []


    def _load_softwarelist(self):
        if self.verbose: print("Loading software data.")
        try:
            with open(os.path.join(self.datadir, 'software.yaml')) as sw_file:
                softwarelist = yaml.load(sw_file)
            stylesheets = self.sitedata['stylesheets']
            self.sitedata.update(softwarelist)
            self.sitedata['stylesheets'] = self._collect_stylesheets(softwarelist)
        except FileNotFoundError:
            print("No software.yaml file, so software list is empty.")
            self.sitedata['softwarelist'] = []

    def _load_products(self):
        if self.verbose: print("Loading store products data.")
        try:
            with open(os.path.join(self.datadir, 'products.yaml')) as prod_file:
                products = yaml.load(prod_file)
            stylesheets = self.sitedata['stylesheets']
            self.sitedata.update(products)
            self.sitedata['stylesheets'] = self._collect_stylesheets(products)
        except FileNotFoundError:
            print("No products.yaml file, so software list is empty.")
            self.sitedata['products'] = []

    def _load_serieslist(self):
        if self.verbose: print("Loading series data")
        try:
            with open(os.path.join(self.datadir, 'episodes', 'series.yaml'),'rt') as seriesfile:
                self.serieslist = yaml.load(seriesfile)['serieslist']
            for series in self.serieslist:
                self._fix_series(series)
                episodes = []
                seriesdir = os.path.join(self.datadir, 'episodes', series['directory'])
                episode_filenames = [f for f in os.listdir(seriesdir) if f.endswith('.yaml')]
                for episode_filename in episode_filenames:
                    if self.verbose: print("Loading episode from %s" % episode_filename)
                    with open(os.path.join(seriesdir, episode_filename), 'rt') as episode_file:
                        episodes.append(yaml.load(episode_file))
                # Sort by episode number specified in the files:
                try:
                    episodes.sort(key=lambda a: int(a['episode']))
                except KeyError:
                    print("Some episode YAML files may not have an 'episode' number entry?")
                series['episodes'] = episodes
        except FileNotFoundError:
            print("No series.yaml file, so no series loaded.")
            self.sitedata['serieslist'] = []


    def _copy_skeleton(self):
        if os.path.exists(self.tgtdir):
            remove_tree(self.tgtdir, verbose=self.verbose)
        if self.verbose: print("Copying the theme base.")
        copy_tree(os.path.join(self.theme['path'], 'base'), self.tgtdir, verbose=self.verbose)
        if self.verbose: print("Copying the skeleton site.")
        copy_tree(self.skeleton, self.tgtdir, verbose=self.verbose)


    def _gen_simple_page(self, pagename, stylesheets=()):
        """
        Generates a simple page with 1:1:1 Jinja2+YAML+CSS.

        The YAML and CSS pages are optional. If no file exists
        for them, they will be ignored and the page generated
        with only the template and global data and/or style.
        """
        if self.verbose: print("Creating '%s' page" % pagename)
        jinja2_name = pagename + '.j2'
        yaml_path = os.path.join(self.datadir, pagename+'.yaml')
        css_path = os.path.join(self.tgtdir, pagename+'.css')
        # Assumes skeleton already copied
        data = {}   # Defaults can be set here
        data.update(self.sitedata)  # Global data
        if os.path.exists(yaml_path):
            with open(yaml_path, 'rt') as yaml_file:
                data.update(yaml.load(yaml_file))   # Page data
        data['stylesheets'] = self._collect_stylesheets(data)
        # Add CSS if not already present:
        if os.path.exists(css_path) and pagename not in data['stylesheets']:
            data['stylesheets'].append(pagename)
        if self.verbose: print("Generating '%s.html' from template." % pagename)
        html = self.get_template(jinja2_name).render(data)
        with open(os.path.join(self.tgtdir, pagename+'.html'), 'wt') as page:
            page.write(html)

    def _gen_index(self):
        """
        Generate an index page, if the skeleton doesn't already have one.
        """
        if not os.path.exists(os.path.join(self.tgtdir, 'index.html')):
            if self.verbose: print("Generating the Index page.")
            data = {}
            data.update(self.sitedata)
            data['next'] = next   # Adds iterator capability
            data['stylesheets'] = self._collect_stylesheets(self.sitedata['stylesheets'])
            if 'episode_as_index' in self.sitedata and self.sitedata['episode_as_index']:
                data['serieslist'] = self.serieslist
                data['banners'] = iter(['affiliates_banner.j2',
                                        'store_banner.j2',
                                        'sponsoropps_banner.j2'])
                data['stylesheets'].extend(self._collect_stylesheets(
                                        self.sitedata['episode_list_page']))
            html = self.get_template('index.j2').render(data)
            with open(os.path.join(self.tgtdir, 'index.html'), 'wt') as page:
                page.write(html)
        else:
            if self.verbose: print("Found 'index.html', so not generated.")

    def _gen_episode_list_page(self):
        """
        Generate a page linking to all of the individual episodes,
        grouped into "series" (or "seasons").

        #Currently hard-coded to be saved as 'index.html' for the site.
        """
        if self.verbose: print("Generating the Index (Episode List) page.")
        if 'render_as' in self.sitedata['episode_list_page']:
            render_as = self.sitedata['episode_list_page']['render_as']
        else:
            render_as = 'index.html'
        data = {}
        data.update(self.sitedata)
        data['serieslist'] = self.serieslist
        data['banners'] = iter(['affiliates_banner.j2', 'store_banner.j2', 'sponsoropps_banner.j2'])
        data['stylesheets'] = self._collect_stylesheets(self.sitedata['episode_list_page'])
        data['next'] = next   # Adds iterator capability
        html = self.get_template('episode_list.j2').render(data)
        with open(os.path.join(self.tgtdir, render_as), 'wt') as page:
            page.write(html)

    def _gen_episode_pages(self):
        """
        Generate a page for each episode in each series.
        """
        if self.verbose: print("Generating episode pages...")

        if 'stylesheets' in self.sitedata['episode_pages']:
            stylesheets = self.sitedata['episode_pages']['stylesheets']
        else:
            stylesheets = []

        for series in self.serieslist:
            for episode in series['episodes']:
                paged_sponsors = self._paginate_sponsors(series, episode)
                episode['paged_sponsors'] = iter(paged_sponsors)
                data = {}
                data.update(self.sitedata)
                data['series'] = series
                data['episode'] = episode
                data['stylesheets'] = self._collect_stylesheets(self.sitedata['episode_pages'],episode)
                data['next'] = next
                data['banners'] = ['affiliates_banner.j2']
                html = self.get_template('episode_page.j2').render(data)
                filename = episode['series'] +'E' + ('%2.2d' % int(episode['episode'])) + '.html'
                os.makedirs(os.path.join(self.tgtdir, series['directory']), exist_ok=True)
                with open(os.path.join(self.tgtdir, series['directory'], filename), 'wt') as page:
                    page.write(html)

    def gensite(self):
        """
        Generate the site, using the data we've accumulated.
        """
        self._copy_skeleton()
        for page in self.sitedata['simple_pages']:
            self._gen_simple_page(page)
        if self.sitedata['serieslist']:
            self._gen_episode_list_page()
            self._gen_episode_pages()
        else:
            print("Not generating series & episode pages: serieslist empty.")
        self._gen_index()


def main(argv=None):
    '''Command line options.'''

    program_name = os.path.basename(sys.argv[0])
    program_version = "v0.1"
    program_build_date = "%s" % Config.__updated__

    program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
    program_longdesc = '''\
LunaGen is a static HTML website generator designed for releasing
a series of episodes (technically: a series of series of episodes).

Data is authored using the YAML structured data language, which allows
for episode metadata and descriptions to be written in a human-friendly
format, which is then formatted into HTML using Jinja2 templates.

Once generated, the site is static and can simply be uploaded to a
standard web server with minimal or no configuration (like a static web host).

It was originally created to generate the release pages for
Anansi Spaceworks' "Lunatics!" series.

For details, please see the 'examples' and 'doc' directories.
    '''
    program_license = "Copyright 2019 Terry Hancock (Anansi Spaceworks)                                            \
                Licensed under the GNU General Public License, version 2.0\n"

    if argv is None:
        argv = sys.argv[1:]
    #try:
    # setup option parser
    parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
    parser.add_option("-i", "--in", dest="src", help="set input path [default: %default]", metavar="FILE")
    parser.add_option("-o", "--out", dest="tgt", help="set output path [default: %default]", metavar="FILE")
    parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")

    # set defaults
    parser.set_defaults(tgt="./site", src=".")

    # process options
    (opts, args) = parser.parse_args(argv)

    if opts.verbose > 0:
        print("verbosity level = %d" % opts.verbose)
    if opts.src:
        print("src = %s" % opts.src)
    if opts.tgt:
        print("tgt = %s" % opts.tgt)

    lunagen = LunaGen(opts.src, opts.tgt, opts.verbose)
    lunagen.gensite()

#    except Exception as e:
#        indent = len(program_name) * " "
#        sys.stderr.write(program_name + ": " + repr(e) + "\n")
#        sys.stderr.write(indent + "  for help use --help")
#        return 2


if __name__ == "__main__":
    if Config.DEBUG:
        #sys.argv.append("-h")
        sys.argv.append("-v")
    if Config.TESTRUN:
        import doctest
        doctest.testmod()
    if Config.PROFILE:
        import cProfile
        import pstats
        profile_filename = 'lunagen_profile.txt'
        cProfile.run('main()', profile_filename)
        statsfile = open("profile_stats.txt", "wb")
        p = pstats.Stats(profile_filename, stream=statsfile)
        stats = p.strip_dirs().sort_stats('cumulative')
        stats.print_stats()
        statsfile.close()
        sys.exit(0)
    sys.exit(main())