Source code for dataprocessor.pipes.scan

# coding=utf-8
"""Scan directories as nodes."""
import os
from glob import glob

from ..nodes import get, validate_link
from ..utility import path_expand, boolenize


[docs]def directory(node_list, root, whitelist, followlinks=False): """Scan nodes from all directories under the directory 'root'. If one directory has properties of both of 'run' and 'project', type of the directory is set to 'run'. Parameters ---------- root : str Scan directories recursively under the directory `root`. whitelist : list of str or str Run node has one or more file or directory which satisfies run_node_dir/`whitelist`. And project nodes satisfy project_dir/run_node_dir/`whitelist`. str can be specified by wildcard. followlinks : {'False', 'True'}, optional Whether scan in symbolic link. Be aware that setting this to True may lead to infinite recursion. Returns ------- node_list Examples -------- >>> # Initialize node_list. >>> node_list = directory([], "scandir_path", ["data/hoge*", "*foo*"]) >>> # Rescan node_list. >>> node_list = [ ... {'path': '/tmp/scan_dir/run0', ... 'parents': [], # empty ... 'children': [], # empty ... 'name': 'run0', ... 'type': 'run'}] >>> node_list = directory([], "scandir_path", ["*.conf"]) """ root = path_expand(root) followlinks = boolenize(followlinks) if isinstance(whitelist, str): whitelist = [whitelist] scan_nodelist = [] for path, dirs, files in os.walk(root, followlinks=followlinks): dirs.sort() node_type = None parents = [] children = [] if not get(node_list, path) is None: continue for child in dirs: for white in whitelist: if glob(os.path.join(path, child, white)): node_type = "project" children.append(os.path.join(path, child)) break for white in whitelist: if glob(os.path.join(path, white)): node_type = "run" parents.append(os.path.dirname(path)) break if not node_type: continue scan_nodelist.append({"path": path, "parents": parents, "children": children, "type": node_type, "name": os.path.basename(path), }) origin_len = len(node_list) node_list = node_list + scan_nodelist for node in node_list[origin_len:]: validate_link(node_list, node, silent=True) return node_list
[docs]def register(pipe_dics): pipe_dics["scan_directory"] = { "func": directory, "args": [("root", {"help": "path of root directory"}), ("whitelist", {"help": "whitelist of file which exists in run directory", "nargs": "+", }), ], "kwds": [("followlinks", {"help": "whether scan in symbolic link"})], "desc": "Scan nodes from all directories under the directory 'root'.", }