Initial commit

This commit is contained in:
Terry Hancock 2022-06-28 22:20:27 -05:00
commit 55f5050340
2 changed files with 254 additions and 0 deletions

54
README.md Normal file
View File

@ -0,0 +1,54 @@
# lsseq
A simple filter utility for viewing long listings of text that have
either repeating lines or sequential lines. Usage is similar to "uniq",
except that in addition to condensing duplicates, it condenses runs
of incrementing or decrementing numbered lines.
In order to be regarded as a run, lines must contain a single block of
decimal numerals (with or without zero-padding), which increase by one
step on each line. It does not matter if there are additional number blocks
that don't change, and the changing number can appear anywhere in the line.
Although there is nothing about the program that requires it to be used
on file listings, it was created to quickly asses PNG or EXR streams
generated by Blender rendering processes.
Currently has no switches or controls, and very few features.
I also created it as a learning project for **Eclipse/PyDev** and integration
with **Github**, so this is more of a learning project, although it is kind
of handy.
Usage example:
$ ls my_pngstream*.png | lsseq
If the listing looks like this:
my_pngstream-f00001.png
my_pngstream-f00002.png
my_pngstream-f00003.png
my_pngstream-f00004.png
my_pngstream-f00005.png
my_pngstream-f00010.png
my_pngstream-f00012.png
The output will look like this:
my_pngstream-f00001.png
... (5) +
my_pngstream-f00005.png
my_pngstream-f00010.png
my_pngstream-f00012.png
For very long runs, this will be MUCH more compact and readable.
# TODO
Would like to add option parsing and some options for ignoring parts of
the input lines. This would be handy for use with "ls -l" for example,
to ignore the changing filesize and data numbers, which would currently
foul the run-detection.

200
lsseq.py Executable file
View File

@ -0,0 +1,200 @@
#!/usr/bin/env python
# lsseq
"""
Filter utility to condense long lists including identical or sequential lines
of text (such as the names of files in a PNG stream).
"""
import sys, re
def main(inp=sys.stdin):
"""
Given a list of strings or lines from a stream, return a reduced
list showing strings of duplicated lines or lines in an incrementing
or decrementing sequence (like a PNG-stream):
>>> sample = '''\
... S1E01-PC-1-Cam1-f00001.png
... S1E01-PC-1-Cam1-f00002.png
... S1E01-PC-1-Cam1-f00103.png
... S1E01-PC-1-Cam1-f00103.png
... S1E01-PC-1-Cam1-f00103.png
... S1E01-PC-1-Cam1-f00104.png
... S1E01-PC-1-Cam1-f00105.png
... S1E01-PC-1-Cam1-f00106.png
... S1E01-PC-1-Cam1-f00107.png
... S1E01-PC-1-Cam1-f00108.png
... S1E01-PC-1-Cam1-f02000.png
... S1E01-PC-1-Cam1-f02300.png
... S1E01-PC-1-Cam1-f02400.png
... S1E01-PC-1-Cam1-f02399.png
... S1E01-PC-1-Cam1-f02398.png
... S1E01-PC-1-Cam1-f02397.png
... S1E01-PC-1-Cam1-f02396.png
... S1E01-PC-1-Cam1-f03555.png
... S1E01-PC-1-Cam2-f03333.png
... S1E01-PC-1-Cam2-f03440.png
... S1E01-PC-1-Cam2-f03441.png
... '''
>>> print(main(sample.split('\n')))
S1E01-PC-1-Cam1-f00001.png
S1E01-PC-1-Cam1-f00002.png
S1E01-PC-1-Cam1-f00103.png
x 3
S1E01-PC-1-Cam1-f00104.png
... (5) +
S1E01-PC-1-Cam1-f00108.png
S1E01-PC-1-Cam1-f02000.png
S1E01-PC-1-Cam1-f02300.png
S1E01-PC-1-Cam1-f02400.png
... (5) -
S1E01-PC-1-Cam1-f02396.png
S1E01-PC-1-Cam1-f03555.png
S1E01-PC-1-Cam2-f03333.png
S1E01-PC-1-Cam2-f03440.png
S1E01-PC-1-Cam2-f03441.png
>>>
"""
listing=[]
prev = None
dup_run = 0
inc_run = 0
dec_run = 0
#for line in inp:
inp_iter = iter(inp)
while 1:
try:
line = inp_iter.next()
except StopIteration:
line = None
if prev is None:
# first line
prev = line
base = line
continue
if line:
comparison = compare_lines(prev, line)
else:
comparison = '!='
if comparison == '==':
dup_run += 1
elif comparison == '+1' and not dup_run and not dec_run:
inc_run += 1
elif comparison == '-1' and not dup_run and not inc_run:
dec_run += 1
else:
listing.append('')
listing.append(base)
if dup_run:
# Report # of duplicates and reset
listing.append(' x {0:d}'.format(dup_run+1))
elif inc_run==1 or dec_run==1:
# If a listing is only 2 lines, then just list them
listing.append(prev)
elif inc_run>1:
# Show range of increment run
listing.append(' ... ({0:d}) +'.format(inc_run+1))
listing.append(prev)
elif dec_run>1:
# Show range of decrement run
listing.append(' ... ({0:d}) -'.format(dec_run+1))
listing.append(prev)
else:
# Next file is unmatched ('!=') to previous
pass
dup_run = 0
inc_run = 0
dec_run = 0
base = line
if not line:
# final iteration
break
prev=line
return('\n'.join(listing))
numfield = re.compile(r'(^\d+)')
othfield = re.compile(r'(^\D+)')
def breakdown(s):
"""
Break a string down into numerical fields delimited by non-numerical fields:
>>> breakdown('df39-330-spam-51.bar')
[('a', 'df'), ('0', '39'), ('a', '-'), ('0', '330'), ('a', '-spam-'), ('0', '51'), ('a', '.bar')]
>>>
"""
fields = []
while (s):
if numfield.match(s):
fields.append(('0', numfield.match(s).groups()[0]))
else:
fields.append(('a', othfield.match(s).groups()[0]))
s = s[len(fields[-1][1]):]
return fields
def compare_lines(l1, l2):
"""
Compare two lines, returning '!=', '==', '+1', '-1', indicating
different, same, incremented by one, or decremented by one:
>>> compare_lines('my_line2439_3349.png', 'my_line2439_3350.png')
'+1'
>>> compare_lines('my_line2439_3349.png', 'my_line2439_3348.png')
'-1'
>>> compare_lines('my_line2439_3349.png', 'my_line2439_8304.png')
'!='
>>> compare_lines('my_line2439_3349.png', 'spam_8304.png')
'!='
>>> compare_lines('my_line2439_3349.png', 'my_line2439_3349.png')
'=='
>>>
"""
zl = zip(breakdown(l1), breakdown(l2))
if all(f1==f2 for (f1,f2) in zl):
return '=='
# Check for ONE field incrementing or decrementing by 1:
comparison = [compare_fields(f1,f2) for (f1,f2) in zl]
if len([c for c in comparison if c=='+1']) == 1:
return '+1'
if len([c for c in comparison if c=='-1']) == 1:
return '-1'
#Otherwise, it doesn't match at all
return '!='
def compare_fields(f1, f2):
"""
Compare fields for equality ('=='), incremented by 1 ('+1'), decremented by 1 ('-1'),
or other inequality ('!='):
>>> compare_fields(('0','00345'), ('0','00346'))
'+1'
>>>
"""
if f1 == f2:
return '=='
if f1[0] == '0' and f2[0] == '0':
delta = int(f2[1]) - int(f1[1])
if delta == -1:
return '-1'
elif delta == 1:
return '+1'
return '!='
if __name__=='__main__':
print(main(sys.stdin))