Source code for smoothcrawler_cluster.model._data

"""*Data for inner usage*

Here provides some data objects for inner usage in *SmoothCrawler-Cluster*.

*New in version 0.2.0.*
"""
from abc import ABCMeta, abstractmethod
from typing import Callable


[docs]class CrawlerName: """*Data about crawler's name*""" _group: str = None _base_name: str = None _crawler_id: str = None _index_sep: str = None def __repr__(self): """It would return the entire crawler instance's name which is same as *__str__* and includes the group which it is in. """ return f"<{self.__class__.__name__} object(group: {self.group}, name: {str(self)})>" def __str__(self): """It would return the entire crawler instance's name which combines with *base_name*, *index_separation* and *id*. """ return f"{self.base_name}{self.index_separation}{self.id}" @property def group(self) -> str: """:obj:`str`: Properties with both getter and setter for which cluster group the crawler instance is in.""" return self._group @group.setter def group(self, group: str) -> None: self._group = str(group) @property def base_name(self) -> str: """:obj:`str`: Properties with both getter and setter for crawler instance's name part. Its value could be repeated, but the combination of *base_name* and *id* should be unique to let cluster could identify it. For example, a crawler instance's name would be like ``sc-crawler_1``, the ``sc-crawler`` is *base_name* part, and the ``_`` is the index separation, ``1`` is its ID. """ return self._base_name @base_name.setter def base_name(self, name: str) -> None: self._base_name = str(name) @property def id(self) -> str: """:obj:`str`: Properties with both getter and setter for the identity of crawler instance. It MUST be unique so that it could combine with *base_name* and *index_separation* as an entire unique crawler's name which could be identified by cluster. """ return self._crawler_id @id.setter def id(self, crawler_id: str) -> None: self._crawler_id = str(crawler_id) @property def index_separation(self) -> str: """:obj:`str`: Properties with both getter and setter for parsing to get index info from current crawler instance's name. """ return self._index_sep @index_separation.setter def index_separation(self, index_sep: str) -> None: self._index_sep = str(index_sep)
[docs]class TimeInterval: """*Data about each different time interval*""" _check_task: float = None _check_crawler_state: float = None _check_standby_id: float = None @property def check_task(self) -> float: """:obj:`float`: Properties with both getter and setter. It is the interval of checking whether the current crawler receives any task or not. This property for **Runner**. """ return self._check_task @check_task.setter def check_task(self, wait: float) -> None: self._check_task = float(wait) @property def check_crawler_state(self) -> float: """:obj:`float`: Properties with both getter and setter. It is the interval of checking whether it has anyone of all current crawlers is dead or not. This property for primary **Backup Runner**. """ return self._check_crawler_state @check_crawler_state.setter def check_crawler_state(self, wait: float) -> None: self._check_crawler_state = float(wait) @property def check_standby_id(self) -> float: """:obj:`float`: Properties with both getter and setter. It is the interval of checking whether the current crawler could be the primary backup one or not. This property for secondary **BackupRunner**. """ return self._check_standby_id @check_standby_id.setter def check_standby_id(self, wait: float) -> None: self._check_standby_id = float(wait)
[docs]class TimerThreshold: """*Data about threshold of time*""" _reset_timeout: int = None @property def reset_timeout(self) -> int: """:obj:`int`: Properties with both getter and setter. It is the threshold of reset timeout record for checking all the current crawler's heartbeat state. This property for primary **Backup Runner**. """ return self._reset_timeout @reset_timeout.setter def reset_timeout(self, reset_timeout: int) -> None: self._reset_timeout = int(reset_timeout)
[docs]class CrawlerTimer: """*Data about crawler's time attributes, e.g., time interval, threshold, etc.*""" _interval: TimeInterval = None _threshold: TimerThreshold = None @property def time_interval(self) -> TimeInterval: """:obj:`TimeInterval`: Properties with both getter and setter. This property is **TimeInterval** object. Please refer to :ref:`TimeInterval <InnerData_TimeInterval>` to get more info. """ return self._interval @time_interval.setter def time_interval(self, interval: TimeInterval) -> None: if not isinstance(interval, TimeInterval): raise TypeError("Property *time_interval* only support *TimeInterval* type object.") self._interval = interval @property def threshold(self) -> TimerThreshold: """:obj:`TimerThreshold`: Properties with both getter and setter. This property is **TimerThreshold** object. Please refer to :ref:`TimerThreshold <InnerData_TimerThreshold>` to get more info. """ return self._threshold @threshold.setter def threshold(self, threshold: TimerThreshold) -> None: if not isinstance(threshold, TimerThreshold): raise TypeError("Property *time_interval* only support *TimeInterval* type object.") self._threshold = threshold
class MetaDataOpt: """*Data about callback functions of getting and setting meta-data objects*""" _get_callback: Callable = None _set_callback: Callable = None _exist_callback: Callable = None @property def get_callback(self) -> Callable: """:obj:`Callable`: Properties with both getter and setter. This is a callback function about getting meta-data object. """ return self._get_callback @get_callback.setter def get_callback(self, callback: Callable) -> None: if not isinstance(callback, Callable): raise TypeError("Property *get_callback* only support callable object.") self._get_callback = callback @property def set_callback(self) -> Callable: """:obj:`Callable`: Properties with both getter and setter. This is a callback function about setting meta-data object. """ return self._set_callback @set_callback.setter def set_callback(self, callback: Callable) -> None: if not isinstance(callback, Callable): raise TypeError("Property *set_callback* only support callable object.") self._set_callback = callback @property def exist_callback(self) -> Callable: """:obj:`Callable`: Properties with both getter and setter. This is a callback function about checking whether a specific meta-data object exist or not. """ return self._exist_callback @exist_callback.setter def exist_callback(self, callback: Callable) -> None: if not isinstance(callback, Callable): raise TypeError("Property *exist_callback* only support callable object.") self._exist_callback = callback class BaseNode(metaclass=ABCMeta): """*Framework module to define some attributes of node in Zookeeper* A node of Zookeeper. """ @property @abstractmethod def path(self) -> str: """:obj:`str`: Properties with both a getter and setter for the path of node in Zookeeper.""" pass @path.setter @abstractmethod def path(self, val: str) -> None: pass @property @abstractmethod def value(self) -> str: """:obj:`str`: Properties with both a getter and setter for the value of the path. It may need to deserialize the data if it needs. """ pass @value.setter @abstractmethod def value(self, val: str) -> None: pass class BasePath(metaclass=ABCMeta): """*Base class to define all meta-data paths* All the path properties of meta-data objects. """ def __init__(self, name: str, group: str): """ Args: name (str): The name of current crawler instance. group (str): The group what current crawler instance is in. """ self._name = name self._group = group @property def state_node_str(self) -> str: """:obj:`str`: Property with only getter. The node naming of meta-data **GroupState** and **NodeState** path.""" return "state" @property @abstractmethod def group_state(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **GroupState**.""" pass @property @abstractmethod def node_state(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **NodeState**.""" pass @property def task_node_str(self) -> str: """:obj:`str`: Property with only getter. The node naming of meta-data **Task** path.""" return "task" @property @abstractmethod def task(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **Task**.""" pass @property def heartbeat_node_str(self) -> str: """:obj:`str`: Property with only getter. The node naming of meta-data **Heartbeat** path.""" return "heartbeat" @property @abstractmethod def heartbeat(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **Heartbeat**.""" pass class MetaDataPath(BasePath): """*Base class to define all meta-data paths* All the path properties of meta-data objects. """ @property def group_state(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **GroupState**.""" return f"{self.generate_parent_node(self._group, is_group=True)}/{self.state_node_str}" @property def node_state(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **NodeState**.""" return f"{self.generate_parent_node(self._name)}/{self.state_node_str}" @property def task(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **Task**.""" return f"{self.generate_parent_node(self._name)}/{self.task_node_str}" @property def heartbeat(self) -> str: """:obj:`str`: Property with only getter. The path of meta-data **Heartbeat**.""" return f"{self.generate_parent_node(self._name)}/{self.heartbeat_node_str}" @classmethod def generate_parent_node(cls, parent_name: str, is_group: bool = False) -> str: """Generate node path of Zookeeper with fixed format. Args: parent_name (str): The crawler name. is_group (bool): If it's True, generate node path for _group_ type meta-data. Returns: str: A Zookeeper node path. """ if is_group: return f"group/{parent_name}" else: return f"node/{parent_name}"