You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

818 lines
29 KiB

author: deadc0de6 (
Copyright (c) 2017, deadc0de6
Class that process nodes in the catalog tree
import os
import shutil
import time
from typing import List, Union, Tuple, Any, Optional, Dict
import anytree # type: ignore
from pyfzf.pyfzf import FzfPrompt # type: ignore
# local imports
from catcli.utils import size_to_str, epoch_to_str, md5sum, fix_badchars
from catcli.logger import Logger
from catcli.nodeprinter import NodePrinter
from catcli.decomp import Decomp
from catcli.version import __version__ as VERSION
from catcli.exceptions import CatcliException
class Noder:
handles node in the catalog tree
There are 4 types of node:
* "top" node representing the top node (generic node)
* "storage" node representing a storage
* "dir" node representing a directory
* "file" node representing a file
NAME_TOP = 'top'
NAME_META = 'meta'
TYPE_TOP = 'top'
TYPE_FILE = 'file'
TYPE_DIR = 'dir'
TYPE_ARC = 'arc'
TYPE_STORAGE = 'storage'
TYPE_META = 'meta'
CSV_HEADER = ('name,type,path,size,indexed_at,'
def __init__(self, debug: bool = False,
sortsize: bool = False,
arc: bool = False) -> None:
@debug: debug mode
@sortsize: sort nodes by size
@arch: handle archive
self.hash = True
self.debug = debug
self.sortsize = sortsize
self.arc = arc
if self.arc:
self.decomp = Decomp()
def get_storage_names(top: anytree.AnyNode) -> List[str]:
"""return a list of all storage names"""
return [ for x in list(top.children)]
def get_storage_node(self, top: anytree.AnyNode,
name: str, path: str = '') -> anytree.AnyNode:
return the storage node if any
if path is submitted, it will update the media info
found = None
for node in top.children:
if node.type != self.TYPE_STORAGE:
if == name:
found = node
if found and path and os.path.exists(path): = shutil.disk_usage(path).free = shutil.disk_usage(path).total
found.ts = int(time.time())
return found
def get_node(top: str, path: str,
quiet: bool = False) -> anytree.AnyNode:
"""get the node by internal tree path"""
resolv = anytree.resolver.Resolver('name')
bpath = os.path.basename(path)
return resolv.get(top, bpath)
except anytree.resolver.ChildResolverError:
if not quiet:
Logger.err(f'No node at path \"{bpath}\"')
return None
def get_node_if_changed(self,
top: anytree.AnyNode,
path: str,
treepath: str) -> Tuple[anytree.AnyNode, bool]:
return the node (if any) and if it has changed
@top: top node (storage)
@path: abs path to file
@treepath: rel path from indexed directory
treepath = treepath.lstrip(os.sep)
node = self.get_node(top, treepath, quiet=True)
# node does not exist
if not node:
self._debug('\tchange: node does not exist')
return None, True
if os.path.isdir(path):
return node, False
# force re-indexing if no maccess
maccess = os.path.getmtime(path)
if not self._has_attr(node, 'maccess') or \
not node.maccess:
self._debug('\tchange: no maccess found')
return node, True
# maccess changed
old_maccess = node.maccess
if float(maccess) != float(old_maccess):
self._debug(f'\tchange: maccess changed for \"{path}\"')
return node, True
# test hash
if self.hash and node.md5:
md5 = self._get_hash(path)
if md5 and md5 != node.md5:
msg = f'\tchange: checksum changed for \"{path}\"'
return node, True
self._debug(f'\tchange: no change for \"{path}\"')
return node, False
def rec_size(self, node: anytree.AnyNode,
store: bool = True) -> float:
recursively traverse tree and return size
@store: store the size in the node
if node.type == self.TYPE_FILE:
self._debug(f'getting node size for \"{}\"')
return float(node.size)
msg = f'getting node size recursively for \"{}\"'
size: float = 0
for i in node.children:
if node.type == self.TYPE_DIR:
size = self.rec_size(i, store=store)
if store:
i.size = size
size += size
if node.type == self.TYPE_STORAGE:
size = self.rec_size(i, store=store)
if store:
i.size = size
size += size
if store:
node.size = size
return size
# public helpers
def format_storage_attr(attr: Union[str, List[str]]) -> str:
"""format the storage attr for saving"""
if not attr:
return ''
if isinstance(attr, list):
return ', '.join(attr)
attr = attr.rstrip()
return attr
def set_hashing(self, val: bool) -> None:
"""hash files when indexing"""
self.hash = val
# node creation
def new_top_node(self) -> anytree.AnyNode:
"""create a new top node"""
return anytree.AnyNode(name=self.NAME_TOP, type=self.TYPE_TOP)
def new_file_node(self, name: str, path: str,
parent: str, storagepath: str) -> anytree.AnyNode:
"""create a new node representing a file"""
if not os.path.exists(path):
Logger.err(f'File \"{path}\" does not exist')
return None
path = os.path.abspath(path)
stat = os.lstat(path)
except OSError as exc:
Logger.err(f'OSError: {exc}')
return None
md5 = ''
if self.hash:
md5 = self._get_hash(path)
relpath = os.sep.join([storagepath, name])
maccess = os.path.getmtime(path)
node = self._new_generic_node(name, self.TYPE_FILE,
relpath, parent,
if self.arc:
ext = os.path.splitext(path)[1][1:]
if ext.lower() in self.decomp.get_formats():
self._debug(f'{path} is an archive')
names = self.decomp.get_names(path)
self.list_to_tree(node, names)
self._debug(f'{path} is NOT an archive')
return node
def new_dir_node(self, name: str, path: str,
parent: str, storagepath: str) -> anytree.AnyNode:
"""create a new node representing a directory"""
path = os.path.abspath(path)
relpath = os.sep.join([storagepath, name])
maccess = os.path.getmtime(path)
return self._new_generic_node(name, self.TYPE_DIR, relpath,
parent, maccess=maccess)
def new_storage_node(self, name: str,
path: str,
parent: str,
attr: Optional[str] = None) -> anytree.AnyNode:
"""create a new node representing a storage"""
path = os.path.abspath(path)
free = shutil.disk_usage(path).free
total = shutil.disk_usage(path).total
epoch = int(time.time())
return anytree.AnyNode(name=name, type=self.TYPE_STORAGE, free=free,
total=total, parent=parent, attr=attr, ts=epoch)
def new_archive_node(self, name: str, path: str,
parent: str, archive: str) -> anytree.AnyNode:
"""create a new node for archive data"""
return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path,
parent=parent, size=0, md5=None,
def _new_generic_node(name: str, nodetype: str,
relpath: str, parent: str,
size: float = 0,
md5: str = '',
maccess: float = 0) -> anytree.AnyNode:
"""generic node creation"""
return anytree.AnyNode(name=name, type=nodetype, relpath=relpath,
parent=parent, size=size,
md5=md5, maccess=maccess)
# node management
def update_metanode(self, top: anytree.AnyNode) -> anytree.AnyNode:
"""create or update meta node information"""
meta = self._get_meta_node(top)
epoch = int(time.time())
if not meta:
attr: Dict[str, Any] = {}
attr['created'] = epoch
attr['created_version'] = VERSION
meta = anytree.AnyNode(name=self.NAME_META,
meta.attr['access'] = epoch
meta.attr['access_version'] = VERSION
return meta
def _get_meta_node(self, top: anytree.AnyNode) -> anytree.AnyNode:
"""return the meta node if any"""
return next(filter(lambda x: x.type == self.TYPE_META,
except StopIteration:
return None
def clean_not_flagged(self, top: anytree.AnyNode) -> int:
"""remove any node not flagged and clean flags"""
cnt = 0
for node in anytree.PreOrderIter(top):
if node.type not in [self.TYPE_FILE, self.TYPE_DIR]:
if self._clean(node):
cnt += 1
return cnt
def flag(node: anytree.AnyNode) -> None:
"""flag a node"""
node.flag = True
def _clean(self, node: anytree.AnyNode) -> bool:
"""remove node if not flagged"""
if not self._has_attr(node, 'flag') or \
not node.flag:
node.parent = None
return True
del node.flag
return False
# printing
def _node_to_csv(self, node: anytree.AnyNode,
sep: str = ',',
raw: bool = False) -> None:
print a node to csv
@node: the node to consider
@sep: CSV separator character
@raw: print raw size rather than human readable
if not node:
if node.type == self.TYPE_TOP:
out = []
if node.type == self.TYPE_STORAGE:
# handle storage
out.append( # name
out.append(node.type) # type
out.append('') # fake full path
size = self.rec_size(node, store=False)
out.append(size_to_str(size, raw=raw)) # size
out.append(epoch_to_str(node.ts)) # indexed_at
out.append('') # fake maccess
out.append('') # fake md5
out.append(str(len(node.children))) # nbfiles
# fake free_space
out.append(size_to_str(, raw=raw))
# fake total_space
out.append(size_to_str(, raw=raw))
out.append(node.attr) # meta
# handle other nodes
out.append('"', '""')) # name
out.append(node.type) # type
parents = self._get_parents(node)
storage = self._get_storage(node)
fullpath = os.path.join(, parents)
out.append(fullpath.replace('"', '""')) # full path
out.append(size_to_str(node.size, raw=raw)) # size
out.append(epoch_to_str(storage.ts)) # indexed_at
if self._has_attr(node, 'maccess'):
out.append(epoch_to_str(node.maccess)) # maccess
out.append('') # fake maccess
if node.md5:
out.append(node.md5) # md5
out.append('') # fake md5
if node.type == self.TYPE_DIR:
out.append(str(len(node.children))) # nbfiles
out.append('') # fake nbfiles
out.append('') # fake free_space
out.append('') # fake total_space
out.append('') # fake meta
line = sep.join(['"' + o + '"' for o in out])
if len(line) > 0:
def _print_node_native(self, node: anytree.AnyNode,
pre: str = '',
withpath: bool = False,
withdepth: bool = False,
withstorage: bool = False,
recalcparent: bool = False,
raw: bool = False) -> None:
print a node
@node: the node to print
@pre: string to print before node
@withpath: print the node path
@withdepth: print the node depth info
@withstorage: print the node storage it belongs to
@recalcparent: get relpath from tree instead of relpath field
@raw: print raw size rather than human readable
if node.type == self.TYPE_TOP:
# top node
elif node.type == self.TYPE_FILE:
# node of type file
name =
if withpath:
if recalcparent:
name = os.sep.join([self._get_parents(node.parent), name])
name = node.relpath
name = name.lstrip(os.sep)
if withstorage:
storage = self._get_storage(node)
attr_str = ''
if node.md5:
attr_str = f', md5:{node.md5}'
size = size_to_str(node.size, raw=raw)
compl = f'size:{size}{attr_str}'
if withstorage:
content = Logger.get_bold_text(
compl += f', storage:{content}'
NodePrinter.print_file_native(pre, name, compl)
elif node.type == self.TYPE_DIR:
# node of type directory
name =
if withpath:
if recalcparent:
name = os.sep.join([self._get_parents(node.parent), name])
name = node.relpath
name = name.lstrip(os.sep)
depth = 0
if withdepth:
depth = len(node.children)
if withstorage:
storage = self._get_storage(node)
attr: List[Tuple[str, str]] = []
if node.size:
attr.append(('totsize', size_to_str(node.size, raw=raw)))
if withstorage:
attr.append(('storage', Logger.get_bold_text(
NodePrinter.print_dir_native(pre, name, depth=depth, attr=attr)
elif node.type == self.TYPE_STORAGE:
# node of type storage
sztotal = size_to_str(, raw=raw)
szused = size_to_str( -, raw=raw)
nbchildren = len(node.children)
pcent = * 100 /
freepercent = f'{pcent:.1f}%'
# get the date
timestamp = ''
if self._has_attr(node, 'ts'):
timestamp = 'date:'
timestamp += epoch_to_str(node.ts)
disksize = ''
# the children size
recsize = self.rec_size(node, store=False)
sizestr = size_to_str(recsize, raw=raw)
disksize = 'totsize:' + f'{sizestr}'
# format the output
name =
args = [
'nbfiles:' + f'{nbchildren}',
'du:' + f'{szused}/{sztotal}',
argsstring = ' | '.join(args)
elif node.type == self.TYPE_ARC:
# archive node
if self.arc:
NodePrinter.print_archive_native(pre,, node.archive)
Logger.err(f'bad node encountered: {node}')
def print_tree(self, node: anytree.AnyNode,
fmt: str = 'native',
raw: bool = False) -> None:
print the tree in different format
@node: start node
@style: when fmt=native, defines the tree style
@fmt: output format
@raw: print the raw size rather than human readable
if fmt == 'native':
# "tree" style
rend = anytree.RenderTree(node, childiter=self._sort_tree)
for pre, _, thenode in rend:
self._print_node_native(thenode, pre=pre,
withdepth=True, raw=raw)
elif fmt == 'csv':
# csv output
self._to_csv(node, raw=raw)
elif fmt == 'csv-with-header':
# csv output
self._to_csv(node, raw=raw)
def _to_csv(self, node: anytree.AnyNode,
raw: bool = False) -> None:
"""print the tree to csv"""
rend = anytree.RenderTree(node, childiter=self._sort_tree)
for _, _, item in rend:
self._node_to_csv(item, raw=raw)
def _fzf_prompt(strings: Any) -> Any:
# prompt with fzf
fzf = FzfPrompt()
selected = fzf.prompt(strings)
return selected
def _to_fzf(self, node: anytree.AnyNode, fmt: str) -> None:
fzf prompt with list and print selected node(s)
@node: node to start with
@fmt: output format for selected nodes
rendered = anytree.RenderTree(node, childiter=self._sort_tree)
nodes = {}
# construct node names list
for _, _, rend in rendered:
if not rend:
parents = self._get_parents(rend)
storage = self._get_storage(rend)
fullpath = os.path.join(, parents)
nodes[fullpath] = rend
# prompt with fzf
paths = self._fzf_prompt(nodes.keys())
# print the resulting tree
subfmt = fmt.replace('fzf-', '')
for path in paths:
if not path:
if path not in nodes:
rend = nodes[path]
self.print_tree(rend, fmt=subfmt)
def to_dot(node: anytree.AnyNode,
path: str = '') -> str:
"""export to dot for graphing"""
anytree.exporter.DotExporter(node).to_dotfile(path)'dot file created under \"{path}\"')
return f'dot {path} -T png -o /tmp/tree.png'
# searching
def find_name(self, top: anytree.AnyNode,
key: str,
script: bool = False,
only_dir: bool = False,
startnode: anytree.AnyNode = None,
parentfromtree: bool = False,
fmt: str = 'native',
raw: bool = False) -> List[anytree.AnyNode]:
find files based on their names
@top: top node
@key: term to search for
@script: output script
@directory: only search for directories
@startpath: node to start with
@parentfromtree: get path from parent instead of stored relpath
@fmt: output format
@raw: raw size output
returns the found nodes
self._debug(f'searching for \"{key}\"')
# search for nodes based on path
start = top
if startnode:
start = self.get_node(top, startnode)
filterfunc = self._callback_find_name(key, only_dir)
found = anytree.findall(start, filter_=filterfunc)
nbfound = len(found)
self._debug(f'found {nbfound} node(s)')
# compile found nodes
paths = {}
for item in found:
item = self._sanitize(item)
if parentfromtree:
paths[self._get_parents(item)] = item
paths[item.relpath] = item
# handle fzf mode
if fmt.startswith('fzf'):
selected = self._fzf_prompt(paths.keys())
newpaths = {}
subfmt = fmt.replace('fzf-', '')
for item in selected:
if item not in paths:
newpaths[item] = paths[item]
self.print_tree(newpaths[item], fmt=subfmt)
paths = newpaths
if fmt == 'native':
for _, item in paths.items():
self._print_node_native(item, withpath=True,
elif fmt.startswith('csv'):
if fmt == 'csv-with-header':
for _, item in paths.items():
self._node_to_csv(item, raw=raw)
# execute script if any
if script:
tmp = ['${source}/' + x for x in paths]
tmpstr = ' '.join(tmp)
cmd = f'op=file; source=/media/mnt; $op {tmpstr}'
return list(paths.values())
def _callback_find_name(self, term: str, only_dir: bool) -> Any:
"""callback for finding files"""
def find_name(node: anytree.AnyNode) -> bool:
if node.type == self.TYPE_STORAGE:
# ignore storage nodes
return False
if node.type == self.TYPE_TOP:
# ignore top nodes
return False
if node.type == self.TYPE_META:
# ignore meta nodes
return False
if only_dir and node.type != self.TYPE_DIR:
# ignore non directory
return False
# filter
if not term:
return True
if term.lower() in
return True
# ignore
return False
return find_name
# ls
def list(self, top: anytree.AnyNode,
path: str,
rec: bool = False,
fmt: str = 'native',
raw: bool = False) -> List[anytree.AnyNode]:
list nodes for "ls"
@top: top node
@path: path to search for
@rec: recursive walk
@fmt: output format
@raw: print raw size
self._debug(f'walking path: \"{path}\" from {top}')
resolv = anytree.resolver.Resolver('name')
found = []
# resolve the path in the tree
found = resolv.glob(top, path)
if len(found) < 1:
# nothing found
self._debug('nothing found')
return []
if rec:
# print the entire tree
self.print_tree(found[0].parent, fmt=fmt, raw=raw)
return found
# sort found nodes
found = sorted(found, key=self._sort, reverse=self.sortsize)
# print the parent
if fmt == 'native':
elif fmt.startswith('csv'):
self._node_to_csv(found[0].parent, raw=raw)
elif fmt.startswith('fzf'):
# print all found nodes
if fmt == 'csv-with-header':
for item in found:
if fmt == 'native':
self._print_node_native(item, withpath=False,
pre='- ',
elif fmt.startswith('csv'):
self._node_to_csv(item, raw=raw)
elif fmt.startswith('fzf'):
self._to_fzf(item, fmt)
except anytree.resolver.ChildResolverError:
return found
# tree creation
def _add_entry(self, name: str,
top: anytree.AnyNode,
resolv: Any) -> None:
"""add an entry to the tree"""
entries = name.rstrip(os.sep).split(os.sep)
if len(entries) == 1:
self.new_archive_node(name, name, top,
sub = os.sep.join(entries[:-1])
nodename = entries[-1]
parent = resolv.get(top, sub)
parent = self.new_archive_node(nodename, name, parent,
except anytree.resolver.ChildResolverError:
self.new_archive_node(nodename, name, top,
def list_to_tree(self, parent: anytree.AnyNode, names: List[str]) -> None:
"""convert list of files to a tree"""
if not names:
resolv = anytree.resolver.Resolver('name')
for name in names:
name = name.rstrip(os.sep)
self._add_entry(name, parent, resolv)
# diverse
def _sort_tree(self,
items: List[anytree.AnyNode]) -> List[anytree.AnyNode]:
"""sorting a list of items"""
return sorted(items, key=self._sort, reverse=self.sortsize)
def _sort(self, lst: List[anytree.AnyNode]) -> Any:
"""sort a list"""
if self.sortsize:
return self._sort_size(lst)
return self._sort_fs(lst)
def _sort_fs(node: anytree.AnyNode) -> Tuple[str, str]:
"""sorting nodes dir first and alpha"""
return (node.type,'.').lower())
def _sort_size(node: anytree.AnyNode) -> float:
"""sorting nodes by size"""
if not node.size:
return 0
return float(node.size)
except AttributeError:
return 0
def _get_storage(self, node: anytree.AnyNode) -> anytree.AnyNode:
"""recursively traverse up to find storage"""
if node.type == self.TYPE_STORAGE:
return node
return node.ancestors[1]
def _has_attr(node: anytree.AnyNode, attr: str) -> bool:
"""return True if node has attr as attribute"""
return attr in node.__dict__.keys()
def _get_parents(self, node: anytree.AnyNode) -> str:
"""get all parents recursively"""
if node.type == self.TYPE_STORAGE:
return ''
if node.type == self.TYPE_TOP:
return ''
parent = self._get_parents(node.parent)
if parent:
return os.sep.join([parent,])
return str(
def _get_hash(path: str) -> str:
"""return md5 hash of node"""
return md5sum(path)
except CatcliException as exc:
return ''
def _sanitize(node: anytree.AnyNode) -> anytree.AnyNode:
"""sanitize node strings""" = fix_badchars(
node.relpath = fix_badchars(node.relpath)
return node
def _debug(self, string: str) -> None:
"""print debug"""
if not self.debug: