You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
catcli/catcli/walker.py

134 lines
4.7 KiB
Python

"""
author: deadc0de6 (https://github.com/deadc0de6)
Copyright (c) 2017, deadc0de6
Catcli filesystem indexer
"""
import os
# local imports
from catcli.logger import Logger
class Walker:
MAXLINE = 80 - 15
def __init__(self, noder, nohash=False, debug=False):
self.noder = noder
self.nohash = nohash
self.noder.set_hashing(not self.nohash)
self.debug = debug
def index(self, path, parent, name, storagepath=''):
'''index a directory and store in tree'''
self._debug('indexing starting at {}'.format(path))
if not parent:
parent = self.noder.dir_node(name, path, parent)
cnt = 0
for (root, dirs, files) in os.walk(path):
for f in files:
self._debug('found file {} under {}'.format(f, path))
sub = os.path.join(root, f)
self._log(f)
self._debug('index file {}'.format(sub))
self.noder.file_node(os.path.basename(f), sub,
parent, storagepath)
cnt += 1
for d in dirs:
self._debug('found dir {} under {}'.format(d, path))
base = os.path.basename(d)
sub = os.path.join(root, d)
self._debug('index directory {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, storagepath)
cnt += 1
nstoragepath = os.sep.join([storagepath, base])
if not storagepath:
nstoragepath = base
_, cnt2 = self.index(sub, dummy, base, nstoragepath)
cnt += cnt2
break
self._log(None)
return parent, cnt
def reindex(self, path, parent, top):
'''reindex a directory and store in tree'''
cnt = self._reindex(path, parent, top, '')
cnt += self.noder.clean_not_flagged(parent)
return cnt
def _reindex(self, path, parent, top, storagepath):
'''reindex a directory and store in tree'''
self._debug('reindexing starting at {}'.format(path))
cnt = 0
for (root, dirs, files) in os.walk(path):
for f in files:
self._debug('found file {} under {}'.format(f, path))
sub = os.path.join(root, f)
reindex, n = self._need_reindex(parent, sub)
if not reindex:
self._debug('\tignore file {}'.format(sub))
self.noder.flag(n)
continue
self._debug('\tre-index file {}'.format(sub))
self._log(f)
n = self.noder.file_node(os.path.basename(f), sub,
parent, storagepath)
self.noder.flag(n)
cnt += 1
for d in dirs:
self._debug('found dir {} under {}'.format(d, path))
base = os.path.basename(d)
sub = os.path.join(root, d)
reindex, dummy = self._need_reindex(parent, sub)
if reindex:
self._debug('\tre-index directory {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, storagepath)
cnt += 1
self.noder.flag(dummy)
self._debug('reindexing deeper under {}'.format(sub))
nstoragepath = os.sep.join([storagepath, base])
if not storagepath:
nstoragepath = base
cnt2 = self._reindex(sub, dummy, top, nstoragepath)
cnt += cnt2
break
self._log(None)
return cnt
def _need_reindex(self, top, path):
'''test if node needs re-indexing'''
cnode, changed = self.noder.get_node_if_changed(top, path)
if not cnode:
self._debug('\tdoes not exist')
return True, cnode
if cnode and not changed:
# ignore this node
self._debug('\thas not changed')
return False, cnode
if cnode and changed:
# remove this node and re-add
self._debug('\thas changed')
self._debug('\tremoving node {}'.format(cnode))
cnode.parent = None
self._debug('\tis to be re-indexed')
return True, cnode
def _debug(self, string):
if not self.debug:
return
Logger.log(string)
def _log(self, string):
if self.debug:
return
if not string:
# clean
Logger.progr('{:80}'.format(' '))
return
if len(string) > self.MAXLINE:
string = string[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(string))