pull/11/head
deadc0de6 6 years ago
commit 7c2f41b1d5

@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6
import sys import sys
__version__ = '0.4.6' __version__ = '0.5.4'
def main(): def main():

@ -22,16 +22,15 @@ class Catalog:
self.verbose = verbose # verbosity self.verbose = verbose # verbosity
self.force = force # force overwrite if exists self.force = force # force overwrite if exists
self.metanode = None self.metanode = None
# prefer json for git versioning
self.pickle = pickle self.pickle = pickle
def set_metanode(self, metanode): def set_metanode(self, metanode):
''' remove the metanode until tree is re-written ''' '''remove the metanode until tree is re-written'''
self.metanode = metanode self.metanode = metanode
self.metanode.parent = None self.metanode.parent = None
def restore(self): def restore(self):
''' restore the catalog ''' '''restore the catalog'''
if not self.path: if not self.path:
return None return None
if not os.path.exists(self.path): if not os.path.exists(self.path):
@ -41,7 +40,7 @@ class Catalog:
return self._restore_json(open(self.path, 'r').read()) return self._restore_json(open(self.path, 'r').read())
def save(self, node): def save(self, node):
''' save the catalog ''' '''save the catalog'''
if not self.path: if not self.path:
Logger.err('Path not defined') Logger.err('Path not defined')
return False return False
@ -49,7 +48,7 @@ class Catalog:
if d and not os.path.exists(d): if d and not os.path.exists(d):
os.makedirs(d) os.makedirs(d)
elif os.path.exists(self.path) and not self.force: elif os.path.exists(self.path) and not self.force:
if not utils.ask('Overwrite \"{}\"'.format(self.path)): if not utils.ask('Update catalog \"{}\"'.format(self.path)):
Logger.info('Catalog not saved') Logger.info('Catalog not saved')
return False return False
if d and not os.path.exists(d): if d and not os.path.exists(d):
@ -62,14 +61,22 @@ class Catalog:
return self._save_json(node) return self._save_json(node)
def _save_pickle(self, node): def _save_pickle(self, node):
''' pickle the catalog''' '''pickle the catalog'''
pickle.dump(node, open(self.path, 'wb')) pickle.dump(node, open(self.path, 'wb'))
if self.verbose: if self.verbose:
Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) Logger.info('Catalog saved to pickle \"{}\"'.format(self.path))
return True return True
def _restore_pickle(self):
'''restore the pickled tree'''
root = pickle.load(open(self.path, 'rb'))
if self.verbose:
m = 'Catalog imported from pickle \"{}\"'.format(self.path)
Logger.info(m)
return root
def _save_json(self, node): def _save_json(self, node):
''' export the catalog in json ''' '''export the catalog in json'''
exp = JsonExporter(indent=2, sort_keys=True) exp = JsonExporter(indent=2, sort_keys=True)
with open(self.path, 'w') as f: with open(self.path, 'w') as f:
exp.write(node, f) exp.write(node, f)
@ -77,16 +84,8 @@ class Catalog:
Logger.info('Catalog saved to json \"{}\"'.format(self.path)) Logger.info('Catalog saved to json \"{}\"'.format(self.path))
return True return True
def _restore_pickle(self):
''' restore the pickled tree '''
root = pickle.load(open(self.path, 'rb'))
if self.verbose:
m = 'Catalog imported from pickle \"{}\"'.format(self.path)
Logger.info(m)
return root
def _restore_json(self, string): def _restore_json(self, string):
''' restore the tree from json ''' '''restore the tree from json'''
imp = JsonImporter() imp = JsonImporter()
root = imp.import_(string) root = imp.import_(string)
if self.verbose: if self.verbose:

@ -37,6 +37,7 @@ USAGE = """
Usage: Usage:
{1} index [--catalog=<path>] [--meta=<meta>...] [-acfuV] <name> <path> {1} index [--catalog=<path>] [--meta=<meta>...] [-acfuV] <name> <path>
{1} update [--catalog=<path>] [-acfuV] <name> <path>
{1} ls [--catalog=<path>] [-arVS] [<path>] {1} ls [--catalog=<path>] [-arVS] [<path>]
{1} find [--catalog=<path>] [-abV] <term> {1} find [--catalog=<path>] [-abV] <term>
{1} rm [--catalog=<path>] [-fV] <storage> {1} rm [--catalog=<path>] [-fV] <storage>
@ -49,42 +50,69 @@ Usage:
{1} --version {1} --version
Options: Options:
--catalog=<path> Path to the catalog [default: {2}]. --catalog=<path> Path to the catalog [default: {2}].
--meta=<meta> Additional attribute to store [default: ]. --meta=<meta> Additional attribute to store [default: ].
-u --subsize Store size of folders [default: False]. -u --subsize Store size of directories [default: False].
-a --archive Handle archive file [default: False]. -a --archive Handle archive file [default: False].
-f --force Force overwrite [default: False]. -f --force Do not ask when updating the catalog [default: False].
-b --script Output script to manage found file(s) [default: False]. -b --script Output script to manage found file(s) [default: False].
-S --sortsize Sort by size, largest first [default: False]. -S --sortsize Sort by size, largest first [default: False].
-c --hash Calculate md5 hash [default: False]. -c --hash Calculate md5 hash [default: False].
-r --recursive Recursive [default: False]. -r --recursive Recursive [default: False].
-V --verbose Be verbose [default: False]. -V --verbose Be verbose [default: False].
-v --version Show version. -v --version Show version.
-h --help Show this screen. -h --help Show this screen.
""".format(BANNER, NAME, CATALOGPATH) """.format(BANNER, NAME, CATALOGPATH)
def cmd_index(args, noder, catalog, top): def cmd_index(args, noder, catalog, top, debug=False):
path = args['<path>'] path = args['<path>']
name = args['<name>'] name = args['<name>']
nohash = not args['--hash'] nohash = not args['--hash']
subsize = args['--subsize'] subsize = args['--subsize']
if not os.path.exists(path): if not os.path.exists(path):
Logger.err('\"{}\" does not exist'.format(path)) Logger.err('\"{}\" does not exist'.format(path))
return False return
if name in noder.get_storage_names(top): if name in noder.get_storage_names(top):
Logger.err('storage named \"{}\" already exist'.format(name)) if not ask('Overwrite storage \"{}\"'.format(name)):
return False Logger.err('storage named \"{}\" already exist'.format(name))
return
node = noder.get_storage_node(top, name)
node.parent = None
start = datetime.datetime.now() start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash) walker = Walker(noder, nohash=nohash, debug=debug)
attr = noder.clean_storage_attr(args['--meta']) attr = noder.format_storage_attr(args['--meta'])
root = noder.storage_node(name, path, parent=top, attr=attr) root = noder.storage_node(name, path, parent=top, attr=attr)
_, cnt = walker.index(path, name, parent=root, parentpath=path) _, cnt = walker.index(path, root, name)
if subsize: if subsize:
noder.rec_size(root) noder.rec_size(root)
stop = datetime.datetime.now() stop = datetime.datetime.now()
Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start)) Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start))
catalog.save(top) if cnt > 0:
catalog.save(top)
def cmd_update(args, noder, catalog, top, debug=False):
path = args['<path>']
name = args['<name>']
nohash = not args['--hash']
subsize = args['--subsize']
if not os.path.exists(path):
Logger.err('\"{}\" does not exist'.format(path))
return
root = noder.get_storage_node(top, name)
if not root:
Logger.err('storage named \"{}\" does not exist'.format(name))
return
start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash, debug=debug)
cnt = walker.reindex(path, root, top)
if subsize:
noder.rec_size(root)
stop = datetime.datetime.now()
Logger.info('updated {} file(s) in {}'.format(cnt, stop - start))
if cnt > 0:
catalog.save(top)
def cmd_ls(args, noder, top): def cmd_ls(args, noder, top):
@ -104,15 +132,14 @@ def cmd_ls(args, noder, top):
def cmd_rm(args, noder, catalog, top): def cmd_rm(args, noder, catalog, top):
what = args['<storage>'] name = args['<storage>']
storages = list(x.name for x in top.children) node = noder.get_storage_node(top, name)
if what in storages: if node:
node = next(filter(lambda x: x.name == what, top.children))
node.parent = None node.parent = None
if catalog.save(top): if catalog.save(top):
Logger.info('Storage \"{}\" removed'.format(what)) Logger.info('Storage \"{}\" removed'.format(name))
else: else:
Logger.err('Storage named \"{}\" does not exist'.format(what)) Logger.err('Storage named \"{}\" does not exist'.format(name))
return top return top
@ -201,7 +228,9 @@ def main():
# parse command # parse command
if args['index']: if args['index']:
cmd_index(args, noder, catalog, top) cmd_index(args, noder, catalog, top, debug=args['--verbose'])
if args['update']:
cmd_update(args, noder, catalog, top, debug=args['--verbose'])
elif args['find']: elif args['find']:
cmd_find(args, noder, top) cmd_find(args, noder, top)
elif args['tree']: elif args['tree']:
@ -221,7 +250,7 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
''' entry point ''' '''entry point'''
if main(): if main():
sys.exit(0) sys.exit(0)
sys.exit(1) sys.exit(1)

@ -29,22 +29,25 @@ class Decomp:
'zip': self._zip} 'zip': self._zip}
def get_format(self): def get_format(self):
'''return list of supported extensions'''
return list(self.ext.keys()) return list(self.ext.keys())
def get_names(self, path): def get_names(self, path):
''' get tree of compressed archive ''' '''get tree of compressed archive'''
ext = os.path.splitext(path)[1][1:] ext = os.path.splitext(path)[1][1:]
if ext in list(self.ext.keys()): if ext in list(self.ext.keys()):
return self.ext[ext](path) return self.ext[ext](path)
return None return None
def _tar(self, path): def _tar(self, path):
'''return list of file names in tar'''
if not tarfile.is_tarfile(path): if not tarfile.is_tarfile(path):
return None return None
tar = tarfile.open(path, "r") tar = tarfile.open(path, "r")
return tar.getnames() return tar.getnames()
def _zip(self, path): def _zip(self, path):
'''return list of file names in zip'''
if not zipfile.is_zipfile(path): if not zipfile.is_zipfile(path):
return None return None
z = zipfile.ZipFile(path) z = zipfile.ZipFile(path)

@ -29,7 +29,7 @@ class Logger:
# node specific output # node specific output
###################################################################### ######################################################################
def storage(pre, name, attr): def storage(pre, name, attr):
''' print a storage node ''' '''print a storage node'''
end = '' end = ''
if attr: if attr:
end = ' {}({}){}'.format(Logger.GRAY, attr, Logger.RESET) end = ' {}({}){}'.format(Logger.GRAY, attr, Logger.RESET)
@ -38,13 +38,13 @@ class Logger:
sys.stdout.write('{}\n'.format(s)) sys.stdout.write('{}\n'.format(s))
def file(pre, name, attr): def file(pre, name, attr):
''' print a file node ''' '''print a file node'''
s = '{}{}'.format(pre, name) s = '{}{}'.format(pre, name)
s += ' {}[{}]{}'.format(Logger.GRAY, attr, Logger.RESET) s += ' {}[{}]{}'.format(Logger.GRAY, attr, Logger.RESET)
sys.stdout.write('{}\n'.format(s)) sys.stdout.write('{}\n'.format(s))
def dir(pre, name, depth='', attr=None): def dir(pre, name, depth='', attr=None):
''' print a directory node ''' '''print a directory node'''
end = [] end = []
if depth != '': if depth != '':
end.append('nbfiles:{}'.format(depth)) end.append('nbfiles:{}'.format(depth))
@ -65,27 +65,28 @@ class Logger:
# generic output # generic output
###################################################################### ######################################################################
def out(string): def out(string):
''' to stdout ''' '''to stdout'''
sys.stdout.write('{}\n'.format(string)) sys.stdout.write('{}\n'.format(string))
def log(string): def log(string):
''' to stderr ''' '''to stderr'''
sys.stderr.write('{}\n'.format(string)) sys.stderr.write('{}\n'.format(string))
def info(string): def info(string):
''' to stderr in color ''' '''to stderr in color'''
s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET)
sys.stderr.write('{}\n'.format(s)) sys.stderr.write('{}\n'.format(s))
def err(string): def err(string):
''' to stderr in RED ''' '''to stderr in RED'''
s = '{}{}{}'.format(Logger.RED, string, Logger.RESET) s = '{}{}{}'.format(Logger.RED, string, Logger.RESET)
sys.stderr.write('{}\n'.format(s)) sys.stderr.write('{}\n'.format(s))
def progr(string): def progr(string):
''' print progress ''' '''print progress'''
sys.stderr.write('{}\r'.format(string)) sys.stderr.write('{}\r'.format(string))
sys.stderr.flush() sys.stderr.flush()
def bold(string): def bold(string):
'''make it bold'''
return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET)

@ -29,7 +29,7 @@ class Noder:
TOPNAME = 'top' TOPNAME = 'top'
METANAME = 'meta' METANAME = 'meta'
TYPE_TOP = 'top' # tip top ;-) TYPE_TOP = 'top'
TYPE_FILE = 'file' TYPE_FILE = 'file'
TYPE_DIR = 'dir' TYPE_DIR = 'dir'
TYPE_ARC = 'arc' TYPE_ARC = 'arc'
@ -44,36 +44,91 @@ class Noder:
if self.arc: if self.arc:
self.decomp = Decomp() self.decomp = Decomp()
def set_hashing(self, val):
self.hash = val
def get_storage_names(self, top): def get_storage_names(self, top):
''' return a list of all storage names ''' '''return a list of all storage names'''
return [x.name for x in list(top.children)] return [x.name for x in list(top.children)]
def clean_storage_attr(self, attr): def get_storage_node(self, top, name):
if not attr: '''return the storage node if any'''
return '' for n in top.children:
return ', '.join(attr) if n.type != self.TYPE_STORAGE:
continue
if n.name == name:
return n
return None
def get_node(self, top, path): def get_node(self, top, path, quiet=False):
''' get the node at path ''' '''get the node by internal tree path'''
r = anytree.resolver.Resolver('name') r = anytree.resolver.Resolver('name')
try: try:
return r.get(top, path) return r.get(top, path)
except anytree.resolver.ChildResolverError: except anytree.resolver.ChildResolverError:
Logger.err('No node at path \"{}\"'.format(path)) if not quiet:
Logger.err('No node at path \"{}\"'.format(path))
return None return None
def get_node_if_newer(self, top, path, maccess):
'''return the node (if any) and if path is newer'''
treepath = path.lstrip(os.sep)
node = self.get_node(top, treepath, quiet=True)
if not node:
# node does not exist
return None, True
if not node.maccess:
# force re-indexing if no maccess
return node, True
old_maccess = node.maccess
if float(maccess) > float(old_maccess):
return node, True
return node, False
def get_meta_node(self, top):
'''return the meta node if any'''
try:
return next(filter(lambda x: x.type == self.TYPE_META,
top.children))
except StopIteration:
return None
def rec_size(self, node):
'''recursively traverse tree and store dir size'''
if self.verbose:
Logger.info('getting directory size recursively')
if node.type == self.TYPE_FILE:
return node.size
size = 0
for i in node.children:
if node.type == self.TYPE_DIR:
size += self.rec_size(i)
if node.type == self.TYPE_STORAGE:
self.rec_size(i)
else:
continue
node.size = size
return size
###############################################################
# public helpers
###############################################################
def format_storage_attr(self, attr):
'''format the storage attr for saving'''
if not attr:
return ''
return ', '.join(attr)
def set_hashing(self, val):
'''hash files when indexing'''
self.hash = val
############################################################### ###############################################################
# node creationg # node creationg
############################################################### ###############################################################
def new_top_node(self): def new_top_node(self):
''' create a new top node''' '''create a new top node'''
return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP)
def update_metanode(self, meta): def update_metanode(self, meta):
''' create or update meta node information ''' '''create or update meta node information'''
epoch = int(time.time()) epoch = int(time.time())
if not meta: if not meta:
attr = {} attr = {}
@ -86,7 +141,7 @@ class Noder:
return meta return meta
def file_node(self, name, path, parent, storagepath): def file_node(self, name, path, parent, storagepath):
''' create a new node representing a file ''' '''create a new node representing a file'''
if not os.path.exists(path): if not os.path.exists(path):
Logger.err('File \"{}\" does not exist'.format(path)) Logger.err('File \"{}\" does not exist'.format(path))
return None return None
@ -102,8 +157,9 @@ class Noder:
relpath = os.path.join(os.path.basename(storagepath), relpath = os.path.join(os.path.basename(storagepath),
os.path.relpath(path, start=storagepath)) os.path.relpath(path, start=storagepath))
maccess = os.path.getmtime(path)
n = self._node(name, self.TYPE_FILE, relpath, parent, n = self._node(name, self.TYPE_FILE, relpath, parent,
size=st.st_size, md5=md5) size=st.st_size, md5=md5, maccess=maccess)
if self.arc: if self.arc:
ext = os.path.splitext(path)[1][1:] ext = os.path.splitext(path)[1][1:]
if ext in self.decomp.get_format(): if ext in self.decomp.get_format():
@ -112,13 +168,37 @@ class Noder:
return n return n
def dir_node(self, name, path, parent, storagepath): def dir_node(self, name, path, parent, storagepath):
''' create a new node representing a directory ''' '''create a new node representing a directory'''
path = os.path.abspath(path) path = os.path.abspath(path)
relpath = os.path.relpath(path, start=storagepath) relpath = os.path.relpath(path, start=storagepath)
return self._node(name, self.TYPE_DIR, relpath, parent) maccess = os.path.getmtime(path)
return self._node(name, self.TYPE_DIR, relpath,
parent, maccess=maccess)
def clean_not_flagged(self, top):
'''remove any node not flagged and clean flags'''
cnt = 0
for node in anytree.PreOrderIter(top):
if node.type != self.TYPE_FILE and node.type != self.TYPE_DIR:
continue
if self._clean(node):
cnt += 1
return cnt
def flag(self, node):
node.flag = True
def _clean(self, node):
'''remove node if not flagged'''
if not self._has_attr(node, 'flag') or \
not node.flag:
node.parent = None
return True
del node.flag
return False
def storage_node(self, name, path, parent, attr=None): def storage_node(self, name, path, parent, attr=None):
''' create a new node representing a storage ''' '''create a new node representing a storage'''
path = os.path.abspath(path) path = os.path.abspath(path)
free = psutil.disk_usage(path).free free = psutil.disk_usage(path).free
total = psutil.disk_usage(path).total total = psutil.disk_usage(path).total
@ -127,21 +207,24 @@ class Noder:
total=total, parent=parent, attr=attr, ts=epoch) total=total, parent=parent, attr=attr, ts=epoch)
def archive_node(self, name, path, parent, archive): def archive_node(self, name, path, parent, archive):
'''crete a new node for archive data'''
return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path, return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path,
parent=parent, size=0, md5=None, parent=parent, size=0, md5=None,
archive=archive) archive=archive)
def _node(self, name, type, relpath, parent, size=None, md5=None): def _node(self, name, type, relpath, parent,
''' generic node creation ''' size=None, md5=None, maccess=None):
'''generic node creation'''
return anytree.AnyNode(name=name, type=type, relpath=relpath, return anytree.AnyNode(name=name, type=type, relpath=relpath,
parent=parent, size=size, md5=md5) parent=parent, size=size,
md5=md5, maccess=maccess)
############################################################### ###############################################################
# printing # printing
############################################################### ###############################################################
def _print_node(self, node, pre='', withpath=False, def _print_node(self, node, pre='', withpath=False,
withdepth=False, withstorage=False): withdepth=False, withstorage=False):
''' print a node ''' '''print a node'''
if node.type == self.TYPE_TOP: if node.type == self.TYPE_TOP:
Logger.out('{}{}'.format(pre, node.name)) Logger.out('{}{}'.format(pre, node.name))
elif node.type == self.TYPE_FILE: elif node.type == self.TYPE_FILE:
@ -175,7 +258,11 @@ class Noder:
elif node.type == self.TYPE_STORAGE: elif node.type == self.TYPE_STORAGE:
hf = utils.human(node.free) hf = utils.human(node.free)
ht = utils.human(node.total) ht = utils.human(node.total)
name = '{} (free:{}, total:{})'.format(node.name, hf, ht) dt = ''
if self._has_attr(node, 'ts'):
dt = ', date:'
dt += utils.epoch_to_str(node.ts)
name = '{} (free:{}, total:{}{})'.format(node.name, hf, ht, dt)
Logger.storage(pre, name, node.attr) Logger.storage(pre, name, node.attr)
elif node.type == self.TYPE_ARC: elif node.type == self.TYPE_ARC:
if self.arc: if self.arc:
@ -185,16 +272,22 @@ class Noder:
# Logger.out('{}{}'.format(pre, node.name)) # Logger.out('{}{}'.format(pre, node.name))
def print_tree(self, node, style=anytree.ContRoundStyle()): def print_tree(self, node, style=anytree.ContRoundStyle()):
''' print the tree similar to unix tool "tree" ''' '''print the tree similar to unix tool "tree"'''
rend = anytree.RenderTree(node, childiter=self._sort_tree) rend = anytree.RenderTree(node, childiter=self._sort_tree)
for pre, fill, node in rend: for pre, fill, node in rend:
self._print_node(node, pre=pre, withdepth=True) self._print_node(node, pre=pre, withdepth=True)
def to_dot(self, node, path='tree.dot'):
'''export to dot for graphing'''
anytree.exporter.DotExporter(node).to_dotfile(path)
Logger.info('dot file created under \"{}\"'.format(path))
return 'dot {} -T png -o /tmp/tree.png'.format(path)
############################################################### ###############################################################
# searching # searching
############################################################### ###############################################################
def find_name(self, root, key, script=False): def find_name(self, root, key, script=False):
''' find files based on their names ''' '''find files based on their names'''
if self.verbose: if self.verbose:
Logger.info('searching for \"{}\"'.format(key)) Logger.info('searching for \"{}\"'.format(key))
self.term = key self.term = key
@ -214,7 +307,7 @@ class Noder:
return found return found
def _find_name(self, node): def _find_name(self, node):
''' callback for finding files ''' '''callback for finding files'''
if self.term.lower() in node.name.lower(): if self.term.lower() in node.name.lower():
return True return True
return False return False
@ -223,7 +316,7 @@ class Noder:
# climbing # climbing
############################################################### ###############################################################
def walk(self, root, path, rec=False): def walk(self, root, path, rec=False):
''' walk the tree for ls based on names ''' '''walk the tree for ls based on names'''
if self.verbose: if self.verbose:
Logger.info('walking path: \"{}\"'.format(path)) Logger.info('walking path: \"{}\"'.format(path))
r = anytree.resolver.Resolver('name') r = anytree.resolver.Resolver('name')
@ -248,7 +341,7 @@ class Noder:
# tree creationg # tree creationg
############################################################### ###############################################################
def _add_entry(self, name, top, resolv): def _add_entry(self, name, top, resolv):
''' add an entry to the tree ''' '''add an entry to the tree'''
entries = name.rstrip(os.sep).split(os.sep) entries = name.rstrip(os.sep).split(os.sep)
if len(entries) == 1: if len(entries) == 1:
self.archive_node(name, name, top, top.name) self.archive_node(name, name, top, top.name)
@ -262,7 +355,7 @@ class Noder:
self.archive_node(f, name, top, top.name) self.archive_node(f, name, top, top.name)
def list_to_tree(self, parent, names): def list_to_tree(self, parent, names):
''' convert list of files to a tree ''' '''convert list of files to a tree'''
if not names: if not names:
return return
r = anytree.resolver.Resolver('name') r = anytree.resolver.Resolver('name')
@ -274,7 +367,7 @@ class Noder:
# diverse # diverse
############################################################### ###############################################################
def _sort_tree(self, items): def _sort_tree(self, items):
''' sorting a list of items ''' '''sorting a list of items'''
return sorted(items, key=self._sort, reverse=self.sortsize) return sorted(items, key=self._sort, reverse=self.sortsize)
def _sort(self, x): def _sort(self, x):
@ -283,11 +376,11 @@ class Noder:
return self._sort_fs(x) return self._sort_fs(x)
def _sort_fs(self, n): def _sort_fs(self, n):
''' sorting nodes dir first and alpha ''' '''sorting nodes dir first and alpha'''
return (n.type, n.name.lstrip('\.').lower()) return (n.type, n.name.lstrip('\.').lower())
def _sort_size(self, n): def _sort_size(self, n):
''' sorting nodes by size ''' '''sorting nodes by size'''
try: try:
if not n.size: if not n.size:
return 0 return 0
@ -295,37 +388,9 @@ class Noder:
except AttributeError: except AttributeError:
return 0 return 0
def to_dot(self, node, path='tree.dot'):
''' export to dot for graphing '''
anytree.exporter.DotExporter(node).to_dotfile(path)
Logger.info('dot file created under \"{}\"'.format(path))
return 'dot {} -T png -o /tmp/tree.png'.format(path)
def _get_storage(self, node): def _get_storage(self, node):
''' recursively traverse up to find storage ''' '''recursively traverse up to find storage'''
return node.ancestors[1] return node.ancestors[1]
def get_meta_node(self, top): def _has_attr(self, node, attr):
''' return the meta node if any ''' return attr in node.__dict__.keys()
try:
return next(filter(lambda x: x.type == self.TYPE_META,
top.children))
except StopIteration:
return None
def rec_size(self, node):
''' recursively traverse tree and store dir size '''
if self.verbose:
Logger.info('getting folder size recursively')
if node.type == self.TYPE_FILE:
return node.size
size = 0
for i in node.children:
if node.type == self.TYPE_DIR:
size += self.rec_size(i)
if node.type == self.TYPE_STORAGE:
self.rec_size(i)
else:
continue
node.size = size
return size

@ -10,13 +10,14 @@ import hashlib
import sys import sys
import tempfile import tempfile
import subprocess import subprocess
import datetime
# local imports # local imports
from catcli.logger import Logger from catcli.logger import Logger
def md5sum(path): def md5sum(path):
''' calculate md5 sum of a file ''' '''calculate md5 sum of a file'''
p = os.path.realpath(path) p = os.path.realpath(path)
if not os.path.exists(p): if not os.path.exists(p):
Logger.err('\nunable to get md5sum on {}'.format(path)) Logger.err('\nunable to get md5sum on {}'.format(path))
@ -36,7 +37,7 @@ def md5sum(path):
def human(size): def human(size):
''' human readable size ''' '''human readable size'''
div = 1024. div = 1024.
suf = ['B', 'K', 'M', 'G', 'T', 'P'] suf = ['B', 'K', 'M', 'G', 'T', 'P']
if size < div: if size < div:
@ -48,14 +49,21 @@ def human(size):
return '{:.1f}{}'.format(size, suf[-1]) return '{:.1f}{}'.format(size, suf[-1])
def epoch_to_str(epoch):
'''convert epoch to string'''
fmt = '%Y-%m-%d %H:%M:%S'
t = datetime.datetime.fromtimestamp(float(epoch))
return t.strftime(fmt)
def ask(question): def ask(question):
''' ask the user what to do ''' '''ask the user what to do'''
resp = input('{} [y|N] ? '.format(question)) resp = input('{} [y|N] ? '.format(question))
return resp.lower() == 'y' return resp.lower() == 'y'
def edit(string): def edit(string):
''' edit the information with the default EDITOR ''' '''edit the information with the default EDITOR'''
string = string.encode('utf-8') string = string.encode('utf-8')
EDITOR = os.environ.get('EDITOR', 'vim') EDITOR = os.environ.get('EDITOR', 'vim')
with tempfile.NamedTemporaryFile(prefix='catcli', suffix='.tmp') as f: with tempfile.NamedTemporaryFile(prefix='catcli', suffix='.tmp') as f:

@ -17,35 +17,114 @@ class Walker:
MAXLINE = 80 - 15 MAXLINE = 80 - 15
def __init__(self, noder, nohash=False): def __init__(self, noder, nohash=False, debug=False):
self.noder = noder self.noder = noder
self.noder.set_hashing(not nohash) self.noder.set_hashing(not nohash)
self.debug = debug
def index(self, path, name, parentpath=None, parent=None, isdir=False): def index(self, path, parent, name):
''' index a folder and store in tree ''' '''index a directory and store in tree'''
self._debug('indexing starting at {}'.format(path))
if not parent: if not parent:
parent = noder.dir_node(name, path, parent) parent = noder.dir_node(name, path, parent)
cnt = 0 cnt = 0
for (root, dirs, files) in os.walk(path): for (root, dirs, files) in os.walk(path):
for f in files: for f in files:
self._debug('found file {} under {}'.format(f, path))
sub = os.path.join(root, f) sub = os.path.join(root, f)
n = f self._log(f)
if len(n) > self.MAXLINE: self._debug('index file {}'.format(sub))
n = f[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(n))
self.noder.file_node(os.path.basename(f), sub, self.noder.file_node(os.path.basename(f), sub,
parent, parentpath) parent, path)
cnt += 1 cnt += 1
for d in dirs: for d in dirs:
self._debug('found dir {} under {}'.format(d, path))
base = os.path.basename(d) base = os.path.basename(d)
sub = os.path.join(root, d) sub = os.path.join(root, d)
dummy = self.noder.dir_node(base, sub, parent, parentpath) self._debug('index directory {}'.format(sub))
_, cnt2 = self.index(sub, base, dummy = self.noder.dir_node(base, sub, parent, path)
parent=dummy, parentpath=parentpath) cnt += 1
_, cnt2 = self.index(sub, dummy, base)
cnt += cnt2 cnt += cnt2
break break
# clean line self._log(None)
Logger.progr('{:80}'.format(' '))
return parent, cnt return parent, cnt
def reindex(self, path, parent, top):
'''reindex a directory and store in tree'''
cnt = self._reindex(path, parent, top)
cnt += self.noder.clean_not_flagged(top)
return cnt
def _reindex(self, path, parent, top):
'''reindex a directory and store in tree'''
self._debug('reindexing starting at {}'.format(path))
cnt = 0
for (root, dirs, files) in os.walk(path):
for f in files:
self._debug('found file {} under {}'.format(f, path))
sub = os.path.join(root, f)
maccess = os.path.getmtime(sub)
reindex, n = self._need_reindex(parent, f, maccess)
if not reindex:
self._debug('\tignore file {}'.format(sub))
self.noder.flag(n)
continue
self._debug('\tre-index file {}'.format(sub))
self._log(f)
n = self.noder.file_node(os.path.basename(f), sub,
parent, path)
self.noder.flag(n)
cnt += 1
for d in dirs:
self._debug('found dir {} under {}'.format(d, path))
base = os.path.basename(d)
sub = os.path.join(root, d)
maccess = os.path.getmtime(sub)
reindex, dummy = self._need_reindex(parent, base, maccess)
if reindex:
self._debug('\tre-index directory {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, path)
cnt += 1
self.noder.flag(dummy)
self._debug('reindexing deeper under {}'.format(sub))
cnt2 = self._reindex(sub, dummy, top)
cnt += cnt2
break
self._log(None)
return cnt
def _need_reindex(self, top, path, maccess):
'''test if node needs re-indexing'''
cnode, newer = self.noder.get_node_if_newer(top, path, maccess)
if not cnode:
self._debug('\tdoes not exist')
return True, cnode
if cnode and not newer:
# ignore this node
self._debug('\tis not newer')
return False, cnode
if cnode and newer:
# remove this node and re-add
self._debug('\tis newer')
self._debug('\tremoving node {}'.format(cnode))
cnode.parent = None
self._debug('\tis to be re-indexed')
return True, cnode
def _debug(self, string):
if not self.debug:
return
Logger.log(string)
def _log(self, string):
if self.debug:
return
if not string:
# clean
Logger.progr('{:80}'.format(' '))
return
if len(string) > self.MAXLINE:
string = string[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(string))

@ -10,6 +10,7 @@ import string
import random import random
import tempfile import tempfile
import shutil import shutil
import subprocess
TMPSUFFIX = '.catcli' TMPSUFFIX = '.catcli'
@ -35,6 +36,22 @@ def clean(path):
else: else:
os.remove(path) os.remove(path)
def edit_file(path, newcontent):
if not os.path.exists(path):
write_to_file(path, newcontent)
else:
write_to_file(path, newcontent)
def unix_tree(path):
if not os.path.exists(path):
return
# cmd = ['tree', path]
cmd = ['ls', '-R', path]
subprocess.call(cmd)
############################################################ ############################################################
# catcli specific # catcli specific
############################################################ ############################################################
@ -82,9 +99,21 @@ def create_rnd_file(path, filename, content=None):
if not content: if not content:
content = get_rnd_string(100) content = get_rnd_string(100)
fpath = os.path.join(path, filename) fpath = os.path.join(path, filename)
with open(fpath, 'w') as f: return write_to_file(fpath, content)
def write_to_file(path, content):
with open(path, 'w') as f:
f.write(content) f.write(content)
return fpath return path
def read_from_file(path):
if not os.path.exists(path):
return ''
with open(path, 'r') as f:
content = f.read()
return content
############################################################ ############################################################

@ -6,6 +6,8 @@ Basic unittest for graph
""" """
import unittest import unittest
import tempfile
import os
from catcli.catcli import * from catcli.catcli import *
from catcli.noder import Noder from catcli.noder import Noder
@ -19,7 +21,7 @@ class TestGraph(unittest.TestCase):
def test_graph(self): def test_graph(self):
# init # init
path = 'fake' path = 'fake'
gpath = '/tmp/graph.dot' gpath = tempfile.gettempdir() + os.sep + 'graph.dot'
self.addCleanup(clean, path) self.addCleanup(clean, path)
self.addCleanup(clean, gpath) self.addCleanup(clean, gpath)
catalog = Catalog(path, force=True, verbose=False) catalog = Catalog(path, force=True, verbose=False)

@ -47,7 +47,7 @@ class TestIndexing(unittest.TestCase):
# create fake args # create fake args
tmpdirname = 'tmpdir' tmpdirname = 'tmpdir'
args = {'<path>': dirpath, '<name>': tmpdirname, args = {'<path>': dirpath, '<name>': tmpdirname,
'--hash': True, '--meta': 'some meta', '--hash': True, '--meta': ['some meta'],
'--subsize': True, '--verbose': True} '--subsize': True, '--verbose': True}
# index the directory # index the directory

@ -0,0 +1,151 @@
"""
author: deadc0de6 (https://github.com/deadc0de6)
Copyright (c) 2017, deadc0de6
Basic unittest for updating an index
"""
import unittest
from catcli.catcli import *
from catcli.noder import Noder
from catcli.walker import Walker
from catcli.catalog import Catalog
from tests.helpers import *
import anytree
class TestIndexing(unittest.TestCase):
def test_index(self):
# init
workingdir = get_tempdir()
catalogpath = create_rnd_file(workingdir, 'catalog.json', content='')
self.addCleanup(clean, workingdir)
dirpath = get_tempdir()
self.addCleanup(clean, dirpath)
# create 3 files
f1 = create_rnd_file(dirpath, 'file1')
f2 = create_rnd_file(dirpath, 'file2')
f3 = create_rnd_file(dirpath, 'file3')
# create 2 directories
d1 = create_dir(dirpath, 'dir1')
d2 = create_dir(dirpath, 'dir2')
# fill directories with files
d1f1 = create_rnd_file(d1, 'dir1file1')
d1f2 = create_rnd_file(d1, 'dir1file2')
d2f1 = create_rnd_file(d2, 'dir2file1')
noder = Noder()
top = noder.new_top_node()
walker = Walker(noder)
catalog = Catalog(catalogpath, force=True, verbose=False)
# create fake args
tmpdirname = 'tmpdir'
args = {'<path>': dirpath, '<name>': tmpdirname,
'--hash': True, '--meta': ['some meta'],
'--subsize': True, '--verbose': True}
# index the directory
unix_tree(dirpath)
cmd_index(args, noder, catalog, top, debug=True)
self.assertTrue(os.stat(catalogpath).st_size != 0)
# print catalog
noder.print_tree(top)
# add some files and directories
new1 = create_rnd_file(d1, 'newf1')
new2 = create_rnd_file(dirpath, 'newf2')
new3 = create_dir(dirpath, 'newd3')
new4 = create_dir(d2, 'newd4')
new5 = create_rnd_file(new4, 'newf5')
unix_tree(dirpath)
# modify files
EDIT = 'edited'
edit_file(d1f1, EDIT)
# update storage
cmd_update(args, noder, catalog, top, debug=True)
# print catalog
# print(read_from_file(catalogpath))
noder.print_tree(top)
# explore the top node to find all nodes
self.assertTrue(len(top.children) == 1)
storage = top.children[0]
self.assertTrue(len(storage.children) == 7)
# ensures files and directories are in
names = [node.name for node in anytree.PreOrderIter(storage)]
print(names)
self.assertTrue(os.path.basename(f1) in names)
self.assertTrue(os.path.basename(f2) in names)
self.assertTrue(os.path.basename(f3) in names)
self.assertTrue(os.path.basename(d1) in names)
self.assertTrue(os.path.basename(d1f1) in names)
self.assertTrue(os.path.basename(d1f2) in names)
self.assertTrue(os.path.basename(d2) in names)
self.assertTrue(os.path.basename(d2f1) in names)
self.assertTrue(os.path.basename(new1) in names)
self.assertTrue(os.path.basename(new2) in names)
self.assertTrue(os.path.basename(new3) in names)
self.assertTrue(os.path.basename(new4) in names)
self.assertTrue(os.path.basename(new5) in names)
for node in storage.children:
if node.name == os.path.basename(d1):
self.assertTrue(len(node.children) == 3)
elif node.name == os.path.basename(d2):
self.assertTrue(len(node.children) == 2)
elif node.name == os.path.basename(new3):
self.assertTrue(len(node.children) == 0)
elif node.name == os.path.basename(new4):
self.assertTrue(len(node.children) == 1)
self.assertTrue(read_from_file(d1f1) == EDIT)
# remove some files
clean(d1f1)
clean(d2)
clean(new2)
clean(new4)
# update storage
cmd_update(args, noder, catalog, top, debug=True)
# ensures files and directories are (not) in
names = [node.name for node in anytree.PreOrderIter(storage)]
print(names)
self.assertTrue(os.path.basename(f1) in names)
self.assertTrue(os.path.basename(f2) in names)
self.assertTrue(os.path.basename(f3) in names)
self.assertTrue(os.path.basename(d1) in names)
self.assertTrue(os.path.basename(d1f1) not in names)
self.assertTrue(os.path.basename(d1f2) in names)
self.assertTrue(os.path.basename(d2) not in names)
self.assertTrue(os.path.basename(d2f1) not in names)
self.assertTrue(os.path.basename(new1) in names)
self.assertTrue(os.path.basename(new2) not in names)
self.assertTrue(os.path.basename(new3) in names)
self.assertTrue(os.path.basename(new4) not in names)
self.assertTrue(os.path.basename(new5) not in names)
for node in storage.children:
if node.name == os.path.basename(d1):
self.assertTrue(len(node.children) == 2)
elif node.name == os.path.basename(new3):
self.assertTrue(len(node.children) == 0)
def main():
unittest.main()
if __name__ == '__main__':
main()
Loading…
Cancel
Save