commit 55f505034054ace99f74c81a3a4b00f00a71345d Author: Terry Hancock Date: Tue Jun 28 22:20:27 2022 -0500 Initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..f4d317c --- /dev/null +++ b/README.md @@ -0,0 +1,54 @@ +# lsseq + +A simple filter utility for viewing long listings of text that have +either repeating lines or sequential lines. Usage is similar to "uniq", +except that in addition to condensing duplicates, it condenses runs +of incrementing or decrementing numbered lines. + +In order to be regarded as a run, lines must contain a single block of +decimal numerals (with or without zero-padding), which increase by one +step on each line. It does not matter if there are additional number blocks +that don't change, and the changing number can appear anywhere in the line. + +Although there is nothing about the program that requires it to be used +on file listings, it was created to quickly asses PNG or EXR streams +generated by Blender rendering processes. + +Currently has no switches or controls, and very few features. + +I also created it as a learning project for **Eclipse/PyDev** and integration +with **Github**, so this is more of a learning project, although it is kind +of handy. + +Usage example: + + $ ls my_pngstream*.png | lsseq + +If the listing looks like this: + + my_pngstream-f00001.png + my_pngstream-f00002.png + my_pngstream-f00003.png + my_pngstream-f00004.png + my_pngstream-f00005.png + my_pngstream-f00010.png + my_pngstream-f00012.png + +The output will look like this: + + my_pngstream-f00001.png + ... (5) + + my_pngstream-f00005.png + + my_pngstream-f00010.png + + my_pngstream-f00012.png + +For very long runs, this will be MUCH more compact and readable. + +# TODO + +Would like to add option parsing and some options for ignoring parts of +the input lines. This would be handy for use with "ls -l" for example, +to ignore the changing filesize and data numbers, which would currently +foul the run-detection. \ No newline at end of file diff --git a/lsseq.py b/lsseq.py new file mode 100755 index 0000000..628df1e --- /dev/null +++ b/lsseq.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# lsseq +""" +Filter utility to condense long lists including identical or sequential lines +of text (such as the names of files in a PNG stream). +""" +import sys, re + +def main(inp=sys.stdin): + """ + Given a list of strings or lines from a stream, return a reduced + list showing strings of duplicated lines or lines in an incrementing + or decrementing sequence (like a PNG-stream): + + >>> sample = '''\ + ... S1E01-PC-1-Cam1-f00001.png + ... S1E01-PC-1-Cam1-f00002.png + ... S1E01-PC-1-Cam1-f00103.png + ... S1E01-PC-1-Cam1-f00103.png + ... S1E01-PC-1-Cam1-f00103.png + ... S1E01-PC-1-Cam1-f00104.png + ... S1E01-PC-1-Cam1-f00105.png + ... S1E01-PC-1-Cam1-f00106.png + ... S1E01-PC-1-Cam1-f00107.png + ... S1E01-PC-1-Cam1-f00108.png + ... S1E01-PC-1-Cam1-f02000.png + ... S1E01-PC-1-Cam1-f02300.png + ... S1E01-PC-1-Cam1-f02400.png + ... S1E01-PC-1-Cam1-f02399.png + ... S1E01-PC-1-Cam1-f02398.png + ... S1E01-PC-1-Cam1-f02397.png + ... S1E01-PC-1-Cam1-f02396.png + ... S1E01-PC-1-Cam1-f03555.png + ... S1E01-PC-1-Cam2-f03333.png + ... S1E01-PC-1-Cam2-f03440.png + ... S1E01-PC-1-Cam2-f03441.png + ... ''' + >>> print(main(sample.split('\n'))) + S1E01-PC-1-Cam1-f00001.png + S1E01-PC-1-Cam1-f00002.png + + S1E01-PC-1-Cam1-f00103.png + x 3 + + S1E01-PC-1-Cam1-f00104.png + ... (5) + + S1E01-PC-1-Cam1-f00108.png + + S1E01-PC-1-Cam1-f02000.png + + S1E01-PC-1-Cam1-f02300.png + + S1E01-PC-1-Cam1-f02400.png + ... (5) - + S1E01-PC-1-Cam1-f02396.png + + S1E01-PC-1-Cam1-f03555.png + S1E01-PC-1-Cam2-f03333.png + + S1E01-PC-1-Cam2-f03440.png + S1E01-PC-1-Cam2-f03441.png + + >>> + """ + listing=[] + prev = None + dup_run = 0 + inc_run = 0 + dec_run = 0 + #for line in inp: + inp_iter = iter(inp) + while 1: + try: + line = inp_iter.next() + except StopIteration: + line = None + + if prev is None: + # first line + prev = line + base = line + continue + + if line: + comparison = compare_lines(prev, line) + else: + comparison = '!=' + + if comparison == '==': + dup_run += 1 + elif comparison == '+1' and not dup_run and not dec_run: + inc_run += 1 + elif comparison == '-1' and not dup_run and not inc_run: + dec_run += 1 + else: + listing.append('') + listing.append(base) + if dup_run: + # Report # of duplicates and reset + listing.append(' x {0:d}'.format(dup_run+1)) + + elif inc_run==1 or dec_run==1: + # If a listing is only 2 lines, then just list them + listing.append(prev) + + elif inc_run>1: + # Show range of increment run + listing.append(' ... ({0:d}) +'.format(inc_run+1)) + listing.append(prev) + + elif dec_run>1: + # Show range of decrement run + listing.append(' ... ({0:d}) -'.format(dec_run+1)) + listing.append(prev) + else: + # Next file is unmatched ('!=') to previous + pass + dup_run = 0 + inc_run = 0 + dec_run = 0 + base = line + if not line: + # final iteration + break + prev=line + return('\n'.join(listing)) + + +numfield = re.compile(r'(^\d+)') +othfield = re.compile(r'(^\D+)') +def breakdown(s): + """ + Break a string down into numerical fields delimited by non-numerical fields: + >>> breakdown('df39-330-spam-51.bar') + [('a', 'df'), ('0', '39'), ('a', '-'), ('0', '330'), ('a', '-spam-'), ('0', '51'), ('a', '.bar')] + >>> + """ + fields = [] + while (s): + if numfield.match(s): + fields.append(('0', numfield.match(s).groups()[0])) + else: + fields.append(('a', othfield.match(s).groups()[0])) + s = s[len(fields[-1][1]):] + return fields + +def compare_lines(l1, l2): + """ + Compare two lines, returning '!=', '==', '+1', '-1', indicating + different, same, incremented by one, or decremented by one: + + >>> compare_lines('my_line2439_3349.png', 'my_line2439_3350.png') + '+1' + >>> compare_lines('my_line2439_3349.png', 'my_line2439_3348.png') + '-1' + >>> compare_lines('my_line2439_3349.png', 'my_line2439_8304.png') + '!=' + >>> compare_lines('my_line2439_3349.png', 'spam_8304.png') + '!=' + >>> compare_lines('my_line2439_3349.png', 'my_line2439_3349.png') + '==' + >>> + """ + zl = zip(breakdown(l1), breakdown(l2)) + if all(f1==f2 for (f1,f2) in zl): + return '==' + + # Check for ONE field incrementing or decrementing by 1: + comparison = [compare_fields(f1,f2) for (f1,f2) in zl] + if len([c for c in comparison if c=='+1']) == 1: + return '+1' + if len([c for c in comparison if c=='-1']) == 1: + return '-1' + + #Otherwise, it doesn't match at all + return '!=' + + +def compare_fields(f1, f2): + """ + Compare fields for equality ('=='), incremented by 1 ('+1'), decremented by 1 ('-1'), + or other inequality ('!='): + + >>> compare_fields(('0','00345'), ('0','00346')) + '+1' + >>> + """ + if f1 == f2: + return '==' + if f1[0] == '0' and f2[0] == '0': + delta = int(f2[1]) - int(f1[1]) + if delta == -1: + return '-1' + elif delta == 1: + return '+1' + return '!=' + +if __name__=='__main__': + print(main(sys.stdin)) + \ No newline at end of file