Source code for dataprocessor.dataframe

# coding=utf-8

from pandas import DataFrame, Series
from . import utility
from .exception import DataProcessorError


[docs]def get_projects(node_list): """Get projects in dataframe format. Returns ------- projects_dataframe : pandas.DataFrame """ df = DataFrame(node_list) projects = df[df["type"] == "project"] return projects.dropna(how='all', axis=1)
[docs]def get_project(node_list, project_path, properties=["comment", "tags"], index="path"): """Get project in dataframe format. If there are two or more project of specified name, the latter one is selected. Parameters ---------- project_path : str the path of project properties : list of str (optional) properties successed from node_list into project dataframe "name" and "path" are successed always (Default=["comment", "tags"]) index : str or list of str (optional) index of project dataframe if it is `None`, the index of DataFrame(node_list) will be successed (Default="path") Returns ------- project : pandas.DataFrame """ project_path = utility.path_expand(project_path) df = DataFrame(node_list) runs_pre = df[df['parents'].apply(lambda val: project_path in val)] if len(runs_pre) == 0: raise DataProcessorError("There is no project of specified path :" + project_path) for item in ["name", "path"]: if item not in properties: properties.append(item) def _conv(val): """ Convert each lines to a pandas.Series See also #160 """ new = {} if "configure" not in val or not isinstance(val["configure"], dict): sr = Series() else: for key, value in val["configure"].items(): try: new[key] = float(value) except ValueError: new[key] = value sr = Series(new) for prop in properties: sr.set_value(prop, val[prop]) return sr runs = runs_pre.apply(_conv, axis=1) runs = runs.convert_objects(convert_numeric=True) if index: runs = runs.set_index(index) return runs.dropna(how="all", axis=1)