hdf5文件读取orm

发布时间 2024-01-02 10:07:11作者: 一枚码农
点击查看代码

class BaseNode:
    """hdf5 file object relational model basic node class

    Including establishing model relationships and data sets, attribute values, obtaining child nodes, etc.
    """
    _fp = None  # hdf5 file pointer
    _prohibit_groups = set()  # The Group collections that prohibit the establishment of model relationships

    def __init__(self, uri='.'):
        self._uri = uri
        self.attrs = {}
        self.values = None
        self.children = []
        self._get_attrs()
        self._get_dataset()
        self._get_children()

    def _get_dataset(self):
        _d = self.__class__._fp[self._uri]
        self.values = _d[()] if isinstance(_d, h5py.Dataset) else None

    def _get_attrs(self):
        self.attrs = dict(self.__class__._fp[self._uri].attrs)

    def _get_children(self):
        _d = self.__class__._fp[self._uri]
        self.children = list(self.__class__._fp[self._uri].keys()) if isinstance(_d, h5py.Group) else []

    def _create_relation(self, curr_node):
        """Create an object-relational model"""

        # Recursive implementation
        # def _create_relation(self, curr_node, ancestor):
        #     ancestor.append(curr_node)
        #     _uri = f"{'/'.join(ancestor)}"
        #     if curr_node != '.':
        #         setattr(self, curr_node, BaseNode(_uri))
        #         self: BaseNode = self.__getattribute__(curr_node)
        #     if isinstance(self.__class__._fp[_uri], h5py.Group):
        #         for child in self.__class__._fp[_uri].keys():
        #             self._create_relation(child, ancestor)
        #     ancestor.pop(-1)

        # Loop implementation
        stack = []
        ancestor_obj = []
        ancestor_groups = []
        while stack or curr_node:
            if curr_node in self.__class__._prohibit_groups:
                curr_node = None
                continue

            if curr_node:
                ancestor_groups.append((curr_node))
                _uri = f"{'/'.join(ancestor_groups)}"
                if curr_node != '.':
                    setattr(self, curr_node, BaseNode(_uri))
                    self: BaseNode = self.__getattribute__(curr_node)

                if isinstance(self.__class__._fp[_uri], h5py.Group):
                    ancestor_obj.append(self)
                    stack.append(None)
                    stack.extend(list(self.__class__._fp[_uri].keys()))
                else:
                    self = ancestor_obj[-1]
                    ancestor_groups.pop(-1)
                curr_node = None
            else:
                curr_node = stack.pop(-1)
                if curr_node is None:
                    ancestor_obj.pop(-1)
                    if ancestor_obj:
                        self = ancestor_obj[-1]
                    ancestor_groups.pop(-1)

    def filter(self, ):
        """
        Symbol definition
          -equal: __eq or =
          -more than: __rq or >
          -less than: __lq or <
          -contains: __contains

        Example:

        >> p = pd.DataFrame({'x': [1,2,3,4,5],
                              'y': [True, False, False, True, True],
                              'z': ['fire', 'snow', 'sky', 'sea', 'rain']})
        >> p
               x      y     z
            0  1   True  fire
            1  2  False  snow
            2  3  False   sky
            3  4   True   sea
            4  5   True  rain

        :return:
        """
        ...


class Hdf5ReaderModel(BaseNode):
    """hdf5 file reading object relational model"""

    def __init__(self, path: str, *, prohibit_groups: Set[str] = None, current_group: str = '.'):
        """hdf5 file ORM(read)

        :param path: hdf5 file path.
        :param prohibit_groups: It is forbidden to create object model relationship group collection.
        :param current_group: Specify the starting group for establishing model object relationships.
                              By default, it is created from the file root group.
        """
        assert h5py.is_hdf5(path), 'not a hdf5 file'
        if prohibit_groups is None:
            prohibit_groups = set()

        self._path = Path(path)
        self._fp = h5py.File(self._path)
        BaseNode._fp = self._fp
        BaseNode._prohibit_groups = prohibit_groups
        super().__init__()
        self._create_relation(current_group)
        self._fp.close()