# file_context.py """ Contextual metadata acquired from the file system, file name, directory structure, and sidecar data files. Data is acquired from file and directory names and also from yaml files in the tree. The yaml files are loaded in increasing priority from metadata.yaml, abx.yaml, .yaml. They are also loaded from the top of the tree to the bottom, with the most local Values overriding the top-level ones. @author: Terry Hancock @copyright: 2019 Anansi Spaceworks. @license: GNU General Public License, version 2.0 or later. (Python code) Creative Commons Attribution-ShareAlike, version 3.0 or later. (Website Templates). @contact: digitante@gmail.com """ import os, re, copy, string, collections import yaml DEFAULT_YAML = {} with open(os.path.join(os.path.dirname(__file__), 'abx.yaml')) as def_yaml_file: DEFAULT_YAML.update(yaml.safe_load(def_yaml_file)) TESTPATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'testdata', 'myproject', 'Episodes', 'A.001-Pilot', 'Seq', 'LP-LastPoint', 'A.001-LP-1-BeginningOfEnd-anim.txt')) from . import accumulate from .accumulate import RecursiveDict from .enum import Enum from .ranks import RankNotFound from .parsers import NameParsers log_level = Enum('DEBUG', 'INFO', 'WARNING', 'ERROR') from .name_schema import FieldSchema from .name_context import NameContext class FileContext(NameContext): """ Collected information about a file's storage location on disk. Collects name and path information from a filepath, used to identify the file's role in a project. In order to do this correctly, the FileContext object needs a schema defined for the project, which explains how to read and parse project file names, to determine what unit, name, or role they might have in the project. For this, you will need to have a .yaml file which defines the 'project_schema' (a list of dictionaries used to initialize a list of FieldSchema objects). Examples of .yaml are provided in the 'myproject.yaml' file in the test data in the source distribution of ABX, and you can also see a "live" example in the "Lunatics!" project. Subclass from NameContext, so please read more information there. Attributes: root (filepath): The root directory of the project as an absolute operating system filepath. This should be used for finding the root where it is currently, not stored for permanent use, as it will be wrong if the project is relocated. render_root (filepath): The root directory for rendering. We often have this symlinked to a large drive to avoid congestion. Usually just /Renders. filetype (str): Filetype code or extension for this file. Usually identifies what sort of file it is and may imply how it is used in some cases. role (str): Explicit definition of file's role in the project, according to roles specified in .yaml. For a default, see 'abx.yaml' in the ABX source code. Derived from the file name. title (str): Title derived from the filename. The relationship between this and the NameContext title is unclear at present -- probably we should be setting the NameContext.title property from here (?) comment (str): Comment field from the filename. This is a free field generally occurring after the role, using a special delimiter and meant to be readable by humans. It may indicate an informal backup or saved version of the file outside of the VCS, as opposed to a main, VCS-tracked copy. Or it may indicate some variant version of the file. name_contexts (list[NameContext]): A list of NameContext objects contained in this file, typically one-per-scene in a Blender file. filepath (str): O/S and location dependent absolute path to the file. filename (str): Unaltered filename from disk. file_exists (bool): Does the file exist on disk (yet)? This may be false if the filename has been determined inside the application, but the file has not been written to disk yet. folder_exists (bool): Does the containing folder exist (yet)? folders (list(str)): List of folder names from the project root to the current file, forming a relative path from the root to this file. omit_ranks (dict[str:int]): How many ranks are omitted from the beginning of filename fields? (Implementation). provided_data (RecursiveDict): The pile of data from project YAML files. This is a special dictionary object that does "deep updates" in which sub-dictionaries and sub-lists are updated recursively rather than simply being replaced at the top level. This allows the provided_data to accumulate information as it looks up the project tree to the project root. It is not recommended to directly access this data. (Implementation) abx_fields (RecursiveDict): A pile of 'abx.yaml' file with directives affecting how ABX should behave with this file. This can be used to set custom behavior in different project units. For example, we use it to define different render profiles for different project units. notes (list(str)): A primitive logging facility. This stores warning and information messages about the discovery process to aid the production designer in setting up the project correctly. NOTE that the clear method does not clear the notes! There is a separate clear_notes() method. parsers (list): A list of registered parser implementations for analyzing file names. FileContext tries them all, and picks the parser which reports the best score -- that is, parser score themselves on how likely their parse is to be correct. So if a parser hits a problem, it demerits its score, allowing another parser to take over. Currently there are only three parsers provided: a custom one, originally written to be specific to "Lunatics!" episodes ('abx_episode', now obsolete?), a parser using the project_schema system ('abx_schema', now the preferred choice), and a "dumb" parser design to fallback on if no schema is provided, which reads only the filetype and possible role, title, and comment fields, guessing from common usage with no explicit schema ('abx_fallback'). This implementation could probably benefit from some more application of computer science and artificial intelligence, but I've settled on a "good enough" solution and the assumption that production designers would probably rather just learn how to use the YAML schemas correctly, than to try to second-guess a sloppy AI system. As of v0.2.6, FileContext does NOT support getting any information directly from the operating system path for the file (i.e. by reading directory names), although this would seem to be a good idea. Therefore, project units have to be specified by additional unit-level YAML documents (these can be quite small), explicitly setting the unit-level information for directories above the current object, and by inference from the project schema and the filename (which on "Lunatics!" conveys all the necessary information for shot files, but perhaps not for library asset files). """ # IMMUTABLE DEFAULTS: filepath = None root = None folders = () #ranks = () project_units = () filename = None fields = None #subunits = () code = '_' # defaults = { # 'filetype': None, 'role': None, 'hierarchy': None, 'project': None, # 'series': None, 'episode': None, 'seq': None, 'block': None, # 'camera': None, 'shot': None, 'title': None } def __init__(self, path=None): """ Collect path context information from a given filepath. (Searches the filesystem for context information). """ NameContext.__init__(self, None, {}) self.clear() self.clear_notes() if path: self.update(path) def clear(self): """ Clear the contents of the FileContext object. Nearly the same as reinitializing, but the notes attribute is left alone, to preserve the log history. """ NameContext.clear(self) # Identity self.root = os.path.abspath(os.environ['HOME']) self.render_root = os.path.join(self.root, 'Renders') self.filetype = '' self.role = '' self.title = '' self.comment = '' # Containers #self.notes = [] self.name_contexts = {} # Status / Settings self.filepath = None self.filename = None self.file_exists = False self.folder_exists = False self.omit_ranks = { 'edit': 0, 'render': 0, 'filename': 0, 'scene': 0} # Defaults self.provided_data = RecursiveDict(DEFAULT_YAML) self.abx_fields = DEFAULT_YAML['abx'] def clear_notes(self): """ Clear the log history in the notes attribute. """ # We use this for logging, so it doesn't get cleared by the # normal clear process. self.notes = [] def update(self, path): """ Update the FileContext based on a new file path. """ # Basic File Path Info self.filepath = os.path.abspath(path) self.filename = os.path.basename(path) # Does the file path exist? if os.path.exists(path): self.file_exists = True self.folder_exists = True else: self.file_exists = False if os.path.exists(os.path.dirname(path)): self.folder_exists = True else: self.folder_exists = False # - Should we create it? / Are we creating it? # We should add a default YAML file in the ABX software to guarantee # necessary fields are in place, and to document the configuration for # project developers. # Data from YAML Files #self._collect_yaml_data() self.provided_data = RecursiveDict(DEFAULT_YAML) kitcat_root, kitcat_data, abx_data = accumulate.get_project_data(self.filepath) self.root = kitcat_root self.provided_data.update(kitcat_data) path = os.path.abspath(os.path.normpath(self.filepath)) root = os.path.abspath(self.root) self.folders = [os.path.basename(self.root)] self.folders.extend(os.path.normpath(os.path.relpath(path, root)).split(os.sep)[:-1]) self.abx_fields = abx_data # Did we find the YAML data for the project? # Did we find the project root? # TODO: Bug? # Note that 'project_schema' might not be correct if overrides are given. # As things are, I think it will simply append the overrides, and this # may lead to odd results. We'd need to actively compress the list by # overwriting according to rank # try: self._load_schemas(self.provided_data['project_schema']) self.namepath_segment = [d['code'] for d in self.provided_data['project_unit']] self.code = self.namepath[-1] except: print("Can't find Name Path. Missing .yaml file?") pass # print("\n(899) filename = ", self.filename) # if 'project_schema' in self.provided_data: # print("(899) project_schema: ", self.provided_data['project_schema']) # else: # print("(899) project schema NOT DEFINED") # # print("(904) self.namepath_segment = ", self.namepath_segment) # Was there a "project_schema" section? # - if not, do we fall back to a default? # Was there a "project_unit" section? # - if not, can we construct what we need from project_root & folders? # Is there a definitions section? # Do we provide defaults? try: self.render_root = os.path.join(self.root, self.provided_data['definitions']['render_root']) except KeyError: self.render_root = os.path.join(self.root, 'Renders') self.omit_ranks = {} try: for key, val in self.provided_data['definitions']['omit_ranks'].items(): self.omit_ranks[key] = int(val) except KeyError: self.omit_ranks.update({ 'edit': 0, 'render': 1, 'filename': 1, 'scene': 3}) # Data from Parsing the File Name if ( 'parser' in self.provided_data['definitions'] and self.provided_data['definitions']['parser'] in NameParsers): # If project has defined what parser it wants (and it is registered), # then restrict to that parser: parser_selection = [self.provided_data['definitions']['parser']] else: parser_selection = NameParsers.keys() if 'parser_options' in self.provided_data['definitions']: parser_options = self.provided_data['definitions']['parser_options'] else: parser_options = {} # TESTING: # Previous code locked-in the schema parser, so I'm starting with that: parser_selection = ['abx_schema'] self.parsers = [NameParsers[p]( schemas = self.schemas, definitions = self.provided_data['definitions'], **parser_options) for p in parser_selection] parser_chosen, parser_score = self._parse_filename() self.log(log_level.INFO, "Parsed with %s, score: %d" % (parser_chosen, parser_score)) # TODO: # We don't currently consider the information from the folder names, # though we could get some additional information this way def __repr__(self): s = '{0}(data='.format(self.__class__.__name__) #s = s + super().__repr__() s = s + str(self.code) + '(' + str(self.rank) + ')' s = s + ')' return s def log(self, level, msg): """ Log a message to the notes attribute. This is a simple facility for tracking issues with the production source tree layout, schemas, and file contexts. """ if type(level) is str: level = log_level.index(level) self.notes.append((level, msg)) def get_log_text(self, level=log_level.INFO): """ Returns the notes attribute as a block of text. """ level = log_level.number(level) return '\n'.join([ ': '.join((log_level.name(note[0]), note[1])) for note in self.notes if log_level.number(note[0]) >= level]) def _parse_filename(self): """ Try available fuzzy data parsers on the filename, and pick the one that returns the best score. """ fields = {} best_score = 0.0 best_parser_name = None for parser in self.parsers: score, fielddata = parser(self.filename, self.namepath) if score > best_score: fields = fielddata best_parser_name = parser.name best_score = score self.fields.update(fields) self._pull_up_last_rank_fields() return best_parser_name, best_score def _pull_up_last_rank_fields(self): if ( 'rank' in self.fields and self.fields['rank'] in self.fields and isinstance(self.fields[self.fields['rank']], collections.Mapping) ): for key, val in self.fields[self.fields['rank']].items(): self.fields[key] = val # def _collect_yaml_data(self): @property def filetype(self): """ Filetype suffix for the file (usually identifies format). """ if 'filetype' in self.fields: return self.fields['filetype'] else: return '' @filetype.setter def filetype(self, filetype): self.fields['filetype'] = filetype @property def role(self): """ Role field from the filename, or guessed from filetype. """ if 'role' in self.fields: return self.fields['role'] else: return '' @role.setter def role(self, role): self.fields['role'] = role @property def title(self): """ Title field parsed from the file name. """ if 'title' in self.fields: return self.fields['title'] else: return '' @title.setter def title(self, title): self.fields['title'] = title @property def comment(self): """ Comment field parsed from the filename. Meant to be a human-readable extension to the filename, often used to represent an informal version, date, or variation on the file. """ if 'comment' in self.fields: return self.fields['comment'] else: return '' @comment.setter def comment(self, comment): self.fields['comment'] = comment @classmethod def deref_implications(cls, values, matchfields): """ NOT USED: Interpret information from reading folder names. """ subvalues = {} for key in values: # TODO: is it safe to use type tests here instead of duck tests? if type(values[key])==int and values[key] < len(matchfields): subvalues[key]=matchfields[values[key]] elif type(values[key]==dict): subvalues[key]=cls.deref_implications(values[key], matchfields) elif type(values[key]==list): vals = [] for val in values[key]: vals.append(cls.deref_implications(val, matchfields)) return subvalues def get_path_implications(self, path): """ NOT USED: Extract information from folder names. """ data = {} prefix = r'(?:.*/)?' suffix = r'(?:/.*)?' for implication in self.schema['path_implications']: matched = re.compile(prefix + implication['match'] + suffix).match(path) if matched and matched.groups: data.update(self.deref_implications(implication['values'], matched.groups())) return data def new_name_context(self, rank=None, **kwargs): """ Get a NameContext object representing a portion of this file. In Blender, generally in a 1:1 relationship with locally-defined scenes. """ fields = {} fields.update(self.fields) namepath_segment = [] ranks = [s.rank for s in self.schemas] i_rank = len(self.namepath) if i_rank == 0: old_rank = None else: old_rank = ranks[i_rank -1] # The new rank will be the highest rank mentioned, or the # explicitly requested rank or # one rank past the namepath # new_rank = self.schemas[i_rank].rank for schema in self.schemas[i_rank:]: if schema.rank in kwargs: fields[schema.rank] = {'code':kwargs[schema.rank]} new_rank = schema.rank namepath_segment.append(kwargs[schema.rank]) elif rank is not None: namepath_segment.append(schema.default) if ranks.index(schema.rank) <= ranks.index(rank): new_rank = schema.rank if old_rank: delta_rank = ranks.index(new_rank) - ranks.index(old_rank) else: # I think in this case, it's as if the old_rank number is -1? delta_rank = ranks.index(new_rank) + 1 # Truncate to the new rank: namepath_segment = namepath_segment[:delta_rank] fields['rank'] = new_rank fields['code'] = namepath_segment[-1] name_context = NameContext(self, fields, namepath_segment=namepath_segment) self.name_contexts[str(id(name_context))] = name_context return name_context