diff --git a/README.md b/README.md index 2fe2854..9d303d1 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Features: * Store md5 hash of files * Ability to update the catalog * Tag your different storages with additional information + * print results and export catalog to CSV @@ -74,6 +75,7 @@ See the [examples](#examples) for an overview of the available features. * [Catalog graph](#catalog-graph) * [Edit storage](#edit-storage) * [Update catalog](#update-catalog) + * [CSV format](#csv-format) * [Examples](#examples) * [Contribution](#contribution) @@ -206,6 +208,21 @@ Updates are based on the access time of each of the files and on the hash checksum if present (catalog was indexed with `-c --hash` and `update` is called with the switch `-c --hash`). +## CSV format + +Results can be printed to CSV using `--format=csv`. +Fields are separated by a comma (`,`) and are quoted with double quotes (`"`). + +Each line format is `name,type,path,size,indexed_at,maccess,md5`. + +* **name**: the entry name +* **type**: the entry type (file, directory) +* **path**: the entry path +* **size**: the entry size +* **indexed_at**: when this entry was indexed +* **maccess**: the entry modification date/time +* **md5**: the entry checksum (if any) + # Examples ## Simple example diff --git a/catcli/catcli.py b/catcli/catcli.py index 431c195..ac39a23 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -36,15 +36,16 @@ USAGE = """ {0} Usage: - {1} ls [--catalog=] [-aBCrVS] [] + {1} ls [--catalog=] [--format=] [-aBCrVS] [] + {1} find [--catalog=] [--format=] [-aBCbdVP] [--path=] + {1} tree [--catalog=] [--format=] [-aBCVSH] [] {1} index [--catalog=] [--meta=...] [-aBCcfnV] {1} update [--catalog=] [-aBCcfnV] [--lpath=] - {1} find [--catalog=] [-aBCbdVP] [--path=] {1} rm [--catalog=] [-BCfV] - {1} tree [--catalog=] [-aBCVS] [] {1} rename [--catalog=] [-BCfV] {1} edit [--catalog=] [-BCfV] {1} graph [--catalog=] [-BCV] [] + {1} print_supported_formats {1} help {1} --help {1} --version @@ -57,8 +58,10 @@ Options: -b --script Output script to manage found file(s) [default: False]. -C --no-color Do not output colors [default: False]. -c --hash Calculate md5 hash [default: False]. - -d --directory Only directory (default: False). + -d --directory Only directory [default: False]. + -F --format= Print format, see command \"print_supported_formats\" [default: native]. -f --force Do not ask when updating the catalog [default: False]. + -H --header Print header on CSV format [default: False]. -l --lpath= Path where changes are logged [default: ] -n --no-subsize Do not store size of directories [default: False]. -P --parent Ignore stored relpath [default: True]. @@ -68,7 +71,7 @@ Options: -V --verbose Be verbose [default: False]. -v --version Show version. -h --help Show this screen. -""".format(BANNER, NAME, CATALOGPATH) +""".format(BANNER, NAME, CATALOGPATH) # nopep8 def cmd_index(args, noder, catalog, top): @@ -143,7 +146,9 @@ def cmd_ls(args, noder, top): path += SEPARATOR if not path.endswith(WILD): path += WILD - found = noder.walk(top, path, rec=args['--recursive']) + found = noder.walk(top, path, + rec=args['--recursive'], + fmt=args['--format']) if not found: Logger.err('\"{}\": nothing found'.format(args[''])) return found @@ -165,18 +170,25 @@ def cmd_find(args, noder, top): fromtree = args['--parent'] directory = args['--directory'] startpath = args['--path'] + fmt = args['--format'] return noder.find_name(top, args[''], script=args['--script'], startpath=startpath, directory=directory, - parentfromtree=fromtree) + parentfromtree=fromtree, fmt=fmt) def cmd_tree(args, noder, top): path = args[''] + fmt = args['--format'] + hdr = args['--header'] + + # find node to start with node = top if path: node = noder.get_node(top, path) + if node: - noder.print_tree(node) + # print the tree + noder.print_tree(node, fmt=fmt, header=hdr) def cmd_graph(args, noder, top): @@ -220,8 +232,8 @@ def cmd_edit(args, noder, catalog, top): def banner(): - Logger.out(BANNER) - Logger.out("") + Logger.out_err(BANNER) + Logger.out_err("") def main(): @@ -231,6 +243,18 @@ def main(): print(USAGE) return True + if args['print_supported_formats']: + print('"native": native format') + print('"csv" : CSV format') + print(' {}'.format(Noder.CSV_HEADER)) + return True + + # check format + fmt = args['--format'] + if fmt != 'native' and fmt != 'csv': + Logger.err('bad format: {}'.format(fmt)) + return False + if args['--verbose']: print(args) diff --git a/catcli/logger.py b/catcli/logger.py index 8500a97..9d7ab92 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -39,6 +39,9 @@ class Logger: Logger.BOLD = '' Logger.UND = '' + def fix_badchars(line): + return line.encode('utf-8', 'ignore').decode('utf-8') + ###################################################################### # node specific output ###################################################################### @@ -48,13 +51,15 @@ class Logger: if attr: end = ' {}({}){}'.format(Logger.GRAY, attr, Logger.RESET) s = '{}{}{}{}:'.format(pre, Logger.UND, Logger.STORAGE, Logger.RESET) - s += ' {}{}{}{}\n'.format(Logger.PURPLE, name, Logger.RESET, end) + s += ' {}{}{}{}\n'.format(Logger.PURPLE, + Logger.fix_badchars(name), + Logger.RESET, end) s += ' {}{}{}'.format(Logger.GRAY, args, Logger.RESET) sys.stdout.write('{}\n'.format(s)) def file(pre, name, attr): '''print a file node''' - s = '{}{}'.format(pre, name) + s = '{}{}'.format(pre, Logger.fix_badchars(name)) s += ' {}[{}]{}'.format(Logger.GRAY, attr, Logger.RESET) sys.stdout.write('{}\n'.format(s)) @@ -67,12 +72,14 @@ class Logger: end.append(' '.join(['{}:{}'.format(x, y) for x, y in attr])) if end: end = ' [{}]'.format(', '.join(end)) - s = '{}{}{}{}'.format(pre, Logger.BLUE, name, Logger.RESET) + s = '{}{}{}{}'.format(pre, Logger.BLUE, + Logger.fix_badchars(name), Logger.RESET) s += '{}{}{}'.format(Logger.GRAY, end, Logger.RESET) sys.stdout.write('{}\n'.format(s)) def arc(pre, name, archive): - s = '{}{}{}{}'.format(pre, Logger.YELLOW, name, Logger.RESET) + s = '{}{}{}{}'.format(pre, Logger.YELLOW, + Logger.fix_badchars(name), Logger.RESET) s += ' {}[{}:{}]{}'.format(Logger.GRAY, Logger.ARCHIVE, archive, Logger.RESET) sys.stdout.write('{}\n'.format(s)) @@ -82,32 +89,44 @@ class Logger: ###################################################################### def out(string): '''to stdout no color''' + string = Logger.fix_badchars(string) sys.stdout.write('{}\n'.format(string)) + def out_err(string): + '''to stderr no color''' + string = Logger.fix_badchars(string) + sys.stderr.write('{}\n'.format(string)) + def debug(string): '''to stderr no color''' + string = Logger.fix_badchars(string) sys.stderr.write('[DBG] {}\n'.format(string)) def info(string): '''to stdout in color''' + string = Logger.fix_badchars(string) s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) sys.stdout.write('{}\n'.format(s)) def err(string): '''to stderr in RED''' + string = Logger.fix_badchars(string) s = '{}{}{}'.format(Logger.RED, string, Logger.RESET) sys.stderr.write('{}\n'.format(s)) def progr(string): '''print progress''' + string = Logger.fix_badchars(string) sys.stderr.write('{}\r'.format(string)) sys.stderr.flush() def bold(string): '''make it bold''' + string = Logger.fix_badchars(string) return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) def flog(path, string, append=True): + string = Logger.fix_badchars(string) mode = 'w' if append: mode = 'a' diff --git a/catcli/noder.py b/catcli/noder.py index 8b793b4..0997b49 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -35,6 +35,7 @@ class Noder: TYPE_ARC = 'arc' TYPE_STORAGE = 'storage' TYPE_META = 'meta' + CSV_HEADER = 'name,type,path,size,indexed_at,maccess,md5' def __init__(self, debug=False, sortsize=False, arc=False): ''' @@ -280,6 +281,52 @@ class Noder: ############################################################### # printing ############################################################### + def _node_to_csv(self, node, sep=','): + ''' + print a node to csv + @node: the node to consider + ''' + if not node: + return '' + if node.type == self.TYPE_TOP: + return '' + + out = [] + if node.type == self.TYPE_STORAGE: + # handle storage + out.append(node.name) + out.append(node.type) + out.append('') # full path + # size + sz = self._rec_size(node, store=False) + out.append(utils.human(sz)) + out.append(utils.epoch_to_str(node.ts)) + out.append('') # maccess + out.append('') # md5 + else: + out.append(node.name) + out.append(node.type) + + # node full path + parents = self._get_parents(node) + storage = self._get_storage(node) + fullpath = os.path.join(storage.name, parents) + out.append(fullpath) + + out.append(utils.human(node.size)) + out.append(utils.epoch_to_str(storage.ts)) + out.append(utils.epoch_to_str(node.maccess)) + + # md5 if any + if node.md5: + out.append(node.md5) + else: + out.append('') + + line = sep.join(['"' + o + '"' for o in out]) + if len(line) > 0: + Logger.out(line) + def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False, recalcparent=False): @@ -340,28 +387,28 @@ class Noder: nbchildren = len(node.children) freepercent = '{:.1f}%'.format( node.free * 100 / node.total - ).ljust(6) + ) # get the date dt = '' if self._has_attr(node, 'ts'): - dt = 'date: ' - dt += '{}'.format(utils.epoch_to_str(node.ts)).ljust(11) + dt = 'date:' + dt += '{}'.format(utils.epoch_to_str(node.ts)) ds = '' # the children size sz = self._rec_size(node, store=False) sz = utils.human(sz) - ds = 'totsize:' + '{}'.format(sz).ljust(7) + ds = 'totsize:' + '{}'.format(sz) # format the output name = '{}'.format(node.name) args = [ - 'nbfiles:' + '{}'.format(nbchildren).ljust(6), + 'nbfiles:' + '{}'.format(nbchildren), ds, 'free:{}'.format(freepercent), - 'du:' + '{}/{}'.format(hf, ht).ljust(14), + 'du:' + '{}/{}'.format(hf, ht), dt] Logger.storage(pre, - name.ljust(20), - '{}'.format(','.join(args)), + name, + '{}'.format(' | '.join(args)), node.attr) elif node.type == self.TYPE_ARC: # archive node @@ -370,11 +417,29 @@ class Noder: else: Logger.err('bad node encountered: {}'.format(node)) - def print_tree(self, node, style=anytree.ContRoundStyle()): - '''print the tree similar to unix tool "tree"''' + def print_tree(self, node, style=anytree.ContRoundStyle(), + fmt='native', header=False): + ''' + print the tree similar to unix tool "tree" + @node: start node + @style: when fmt=native, defines the tree style + @fmt: output format + @header: when fmt=csv, print the header + ''' + if fmt == 'native': + rend = anytree.RenderTree(node, childiter=self._sort_tree) + for pre, fill, node in rend: + self._print_node(node, pre=pre, withdepth=True) + elif fmt == 'csv': + self._to_csv(node, with_header=header) + + def _to_csv(self, node, with_header=False): + '''print the tree to csv''' rend = anytree.RenderTree(node, childiter=self._sort_tree) - for pre, fill, node in rend: - self._print_node(node, pre=pre, withdepth=True) + if with_header: + Logger.out(self.CSV_HEADER) + for _, _, node in rend: + self._node_to_csv(node) def to_dot(self, node, path='tree.dot'): '''export to dot for graphing''' @@ -387,8 +452,16 @@ class Noder: ############################################################### def find_name(self, root, key, script=False, directory=False, - startpath=None, parentfromtree=False): - '''find files based on their names''' + startpath=None, parentfromtree=False, + fmt='native'): + ''' + find files based on their names + @script: output script + @directory: only search for directories + @startpath: node to start with + @parentfromtree: get path from parent instead of stored relpath + @fmt: output format + ''' self._debug('searching for \"{}\"'.format(key)) start = root if startpath: @@ -403,16 +476,26 @@ class Noder: if directory and f.type != self.TYPE_DIR: # ignore non directory continue - self._print_node(f, withpath=True, withdepth=True, - withstorage=True, recalcparent=parentfromtree) + + # print the node + if fmt == 'native': + self._print_node(f, withpath=True, + withdepth=True, + withstorage=True, + recalcparent=parentfromtree) + elif fmt == 'csv': + self._node_to_csv(f) + if parentfromtree: paths.append(self._get_parents(f)) else: paths.append(f.relpath) + if script: tmp = ['${source}/' + x for x in paths] cmd = 'op=file; source=/media/mnt; $op {}'.format(' '.join(tmp)) Logger.info(cmd) + return found def _find_name(self, node): @@ -424,23 +507,47 @@ class Noder: ############################################################### # climbing ############################################################### - def walk(self, root, path, rec=False): - '''walk the tree for ls based on names''' + def walk(self, root, path, rec=False, fmt='native'): + ''' + walk the tree for ls based on names + @root: start node + @rec: recursive walk + @fmt: output format + ''' self._debug('walking path: \"{}\"'.format(path)) + r = anytree.resolver.Resolver('name') found = [] try: found = r.glob(root, path) if len(found) < 1: + # nothing found return [] + if rec: - self.print_tree(found[0].parent) + # print the entire tree + self.print_tree(found[0].parent, fmt=fmt) return found + + # sort found nodes found = sorted(found, key=self._sort, reverse=self.sortsize) - self._print_node(found[0].parent, - withpath=False, withdepth=True) + + # print the parent + if fmt == 'native': + self._print_node(found[0].parent, + withpath=False, withdepth=True) + elif fmt == 'csv': + self._node_to_csv(found[0].parent) + + # print all found nodes for f in found: - self._print_node(f, withpath=False, pre='- ', withdepth=True) + if fmt == 'native': + self._print_node(f, withpath=False, + pre='- ', + withdepth=True) + elif fmt == 'csv': + self._node_to_csv(f) + except anytree.resolver.ChildResolverError: pass return found @@ -499,6 +606,8 @@ class Noder: def _get_storage(self, node): '''recursively traverse up to find storage''' + if node.type == self.TYPE_STORAGE: + return node return node.ancestors[1] def _has_attr(self, node, attr): diff --git a/tests-requirements.txt b/tests-requirements.txt index 2795de2..82bfba8 100644 --- a/tests-requirements.txt +++ b/tests-requirements.txt @@ -1,5 +1,6 @@ pycodestyle; python_version >= '3.0' pyflakes; python_version >= '3.0' +#nose-py3; python_version >= '3.0' nose; python_version >= '3.0' coverage; python_version >= '3.0' coveralls; python_version >= '3.0' diff --git a/tests.sh b/tests.sh index 178ba7e..4513102 100755 --- a/tests.sh +++ b/tests.sh @@ -13,8 +13,10 @@ pycodestyle tests/ pyflakes catcli/ pyflakes tests/ -PYTHONPATH=catcli python3 -m nose -s --with-coverage --cover-package=catcli -#PYTHONPATH=catcli python3 -m nose -s +nosebin="nosetests" + +PYTHONPATH=catcli ${nosebin} -s --with-coverage --cover-package=catcli +#PYTHONPATH=catcli ${nosebin} -s for t in ${cur}/tests-ng/*; do echo "running test \"`basename ${t}`\"" diff --git a/tests/test_find.py b/tests/test_find.py index cb5a584..5daff0a 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -24,7 +24,8 @@ class TestFind(unittest.TestCase): # create fake args args = {'': '7544G', '--script': True, '--verbose': True, '--parent': False, - '--directory': False, '--path': None} + '--directory': False, '--path': None, + '--format': 'native'} # try to find something found = cmd_find(args, noder, top) diff --git a/tests/test_ls.py b/tests/test_ls.py index 7b2061a..5446531 100644 --- a/tests/test_ls.py +++ b/tests/test_ls.py @@ -25,7 +25,8 @@ class TestWalking(unittest.TestCase): # create fake args args = {'': '', '--recursive': False, - '--verbose': True} + '--verbose': True, + '--format': 'native'} # list root args[''] = '' diff --git a/tests/test_rm.py b/tests/test_rm.py index 81f96ef..356bc62 100644 --- a/tests/test_rm.py +++ b/tests/test_rm.py @@ -25,7 +25,8 @@ class TestRm(unittest.TestCase): # create fake args dict args = {'': '', '--recursive': False, - '--verbose': True} + '--verbose': True, + '--format': 'native'} # list files and make sure there are children args[''] = '' diff --git a/tests/test_tree.py b/tests/test_tree.py index 66bb618..46cfe4f 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -24,7 +24,12 @@ class TestTree(unittest.TestCase): noder = Noder() # create fake args dict - args = {'': path, '--verbose': True} + args = { + '': path, + '--verbose': True, + '--format': 'native', + '--header': False, + } # print tree and wait for any errors cmd_tree(args, noder, top)