Initial commit

2022-06-28 22:20:27 -05:00 · 2022-06-28 22:20:27 -05:00 · 55f5050340
commit 55f5050340
2 changed files with 254 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,54 @@
 # lsseq
 A simple filter utility for viewing long listings of text that have
 either repeating lines or sequential lines. Usage is similar to "uniq",
 except that in addition to condensing duplicates, it condenses runs
 of incrementing or decrementing numbered lines.
 In order to be regarded as a run, lines must contain a single block of
 decimal numerals (with or without zero-padding), which increase by one
 step on each line. It does not matter if there are additional number blocks
 that don't change, and the changing number can appear anywhere in the line.
 Although there is nothing about the program that requires it to be used
 on file listings, it was created to quickly asses PNG or EXR streams
 generated by Blender rendering processes.
 Currently has no switches or controls, and very few features.
 I also created it as a learning project for **Eclipse/PyDev** and integration
 with **Github**, so this is more of a learning project, although it is kind
 of handy.
 Usage example:
 	$ ls my_pngstream*.png | lsseq
 If the listing looks like this:
 	my_pngstream-f00001.png
 	my_pngstream-f00002.png
 	my_pngstream-f00003.png
 	my_pngstream-f00004.png
 	my_pngstream-f00005.png
 	my_pngstream-f00010.png
 	my_pngstream-f00012.png
 The output will look like this:
 	my_pngstream-f00001.png
 	    ... (5) +
 	my_pngstream-f00005.png
 	my_pngstream-f00010.png
 	my_pngstream-f00012.png
 For very long runs, this will be MUCH more compact and readable.
 # TODO
 Would like to add option parsing and some options for ignoring parts of
 the input lines. This would be handy for use with "ls -l" for example,
 to ignore the changing filesize and data numbers, which would currently
 foul the run-detection.
--- a/lsseq.py
+++ b/lsseq.py
@ -0,0 +1,200 @@
 #!/usr/bin/env python 
 # lsseq
 """
 Filter utility to condense long lists including identical or sequential lines
 of text (such as the names of files in a PNG stream).
 """
 import sys, re
 def main(inp=sys.stdin):
    """
    Given a list of strings or lines from a stream, return a reduced
    list showing strings of duplicated lines or lines in an incrementing
    or decrementing sequence (like a PNG-stream):
    >>> sample = '''\
    ... S1E01-PC-1-Cam1-f00001.png
    ... S1E01-PC-1-Cam1-f00002.png
    ... S1E01-PC-1-Cam1-f00103.png
    ... S1E01-PC-1-Cam1-f00103.png
    ... S1E01-PC-1-Cam1-f00103.png
    ... S1E01-PC-1-Cam1-f00104.png
    ... S1E01-PC-1-Cam1-f00105.png
    ... S1E01-PC-1-Cam1-f00106.png
    ... S1E01-PC-1-Cam1-f00107.png
    ... S1E01-PC-1-Cam1-f00108.png
    ... S1E01-PC-1-Cam1-f02000.png
    ... S1E01-PC-1-Cam1-f02300.png
    ... S1E01-PC-1-Cam1-f02400.png
    ... S1E01-PC-1-Cam1-f02399.png
    ... S1E01-PC-1-Cam1-f02398.png
    ... S1E01-PC-1-Cam1-f02397.png
    ... S1E01-PC-1-Cam1-f02396.png
    ... S1E01-PC-1-Cam1-f03555.png
    ... S1E01-PC-1-Cam2-f03333.png
    ... S1E01-PC-1-Cam2-f03440.png
    ... S1E01-PC-1-Cam2-f03441.png
    ... '''
    >>> print(main(sample.split('\n')))
    S1E01-PC-1-Cam1-f00001.png
    S1E01-PC-1-Cam1-f00002.png
    S1E01-PC-1-Cam1-f00103.png
       x 3
    S1E01-PC-1-Cam1-f00104.png
       ... (5) +
    S1E01-PC-1-Cam1-f00108.png
    S1E01-PC-1-Cam1-f02000.png
    S1E01-PC-1-Cam1-f02300.png
    S1E01-PC-1-Cam1-f02400.png
       ... (5) -
    S1E01-PC-1-Cam1-f02396.png
    S1E01-PC-1-Cam1-f03555.png
    S1E01-PC-1-Cam2-f03333.png
    S1E01-PC-1-Cam2-f03440.png
    S1E01-PC-1-Cam2-f03441.png
    >>> 
    """
    listing=[]
    prev = None
    dup_run = 0
    inc_run = 0
    dec_run = 0
    #for line in inp:
    inp_iter = iter(inp)
    while 1:
        try:
            line = inp_iter.next()
        except StopIteration:
            line = None
        if prev is None:
            # first line
            prev = line
            base = line
            continue
        if line:
            comparison = compare_lines(prev, line)
        else:
            comparison = '!='
        if comparison == '==':
            dup_run += 1
        elif comparison == '+1' and not dup_run and not dec_run:
            inc_run += 1
        elif comparison == '-1' and not dup_run and not inc_run:
            dec_run += 1
        else:
            listing.append('')
            listing.append(base)       
            if dup_run:
                # Report # of duplicates and reset
                listing.append('   x {0:d}'.format(dup_run+1))
            elif inc_run==1 or dec_run==1:
                # If a listing is only 2 lines, then just list them
                listing.append(prev)
            elif inc_run>1:
                # Show range of increment run
                listing.append('   ... ({0:d}) +'.format(inc_run+1))
                listing.append(prev)
            elif dec_run>1:
                # Show range of decrement run
                listing.append('   ... ({0:d}) -'.format(dec_run+1))
                listing.append(prev)
            else:
                # Next file is unmatched ('!=') to previous
                pass
            dup_run = 0
            inc_run = 0
            dec_run = 0
            base = line
        if not line:
            # final iteration
            break
        prev=line
    return('\n'.join(listing))
 numfield = re.compile(r'(^\d+)')
 othfield = re.compile(r'(^\D+)')
 def breakdown(s):
    """
    Break a string down into numerical fields delimited by non-numerical fields:
    >>> breakdown('df39-330-spam-51.bar')
    [('a', 'df'), ('0', '39'), ('a', '-'), ('0', '330'), ('a', '-spam-'), ('0', '51'), ('a', '.bar')]
    >>> 
    """
    fields = []
    while (s):
        if numfield.match(s):
            fields.append(('0', numfield.match(s).groups()[0]))
        else:
            fields.append(('a', othfield.match(s).groups()[0]))
        s = s[len(fields[-1][1]):]
    return fields
 def compare_lines(l1, l2):
    """
    Compare two lines, returning '!=', '==', '+1', '-1', indicating
    different, same, incremented by one, or decremented by one:
    >>> compare_lines('my_line2439_3349.png', 'my_line2439_3350.png')
    '+1'
    >>> compare_lines('my_line2439_3349.png', 'my_line2439_3348.png')
    '-1'
    >>> compare_lines('my_line2439_3349.png', 'my_line2439_8304.png')
    '!='
    >>> compare_lines('my_line2439_3349.png', 'spam_8304.png')
    '!='
    >>> compare_lines('my_line2439_3349.png', 'my_line2439_3349.png')
    '=='
    >>> 
    """
    zl = zip(breakdown(l1), breakdown(l2))
    if all(f1==f2 for (f1,f2) in zl):
        return '=='
    # Check for ONE field incrementing or decrementing by 1:
    comparison = [compare_fields(f1,f2) for (f1,f2) in zl]
    if len([c for c in comparison if c=='+1']) == 1:
        return '+1'
    if len([c for c in comparison if c=='-1']) == 1:
        return '-1'
    #Otherwise, it doesn't match at all
    return '!='
 def compare_fields(f1, f2):
    """
    Compare fields for equality ('=='), incremented by 1 ('+1'), decremented by 1 ('-1'),
    or other inequality ('!='):
    >>> compare_fields(('0','00345'), ('0','00346'))
    '+1'
    >>> 
    """
    if f1 == f2:
        return '=='
    if f1[0] == '0' and f2[0] == '0': 
        delta = int(f2[1]) - int(f1[1])
        if   delta == -1:
            return '-1'
        elif delta ==  1:
            return '+1'
    return '!='
 if __name__=='__main__':
    print(main(sys.stdin))