Apache SINGA
A distributed deep learning platform .
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros
cluster_rt.h
1 #ifndef INCLUDE_UTILS_CLUSTER_RT_H_
2 #define INCLUDE_UTILS_CLUSTER_RT_H_
3 #include <glog/logging.h>
4 #include <string>
5 #include <vector>
6 #include <utility>
7 #include <zookeeper/zookeeper.h>
8 
9 using std::string;
10 using std::vector;
11 
12 namespace singa {
13 
21 typedef void (*rt_callback)(void *contest);
22 
24  public:
25  ClusterRuntime(){}
26  virtual ~ClusterRuntime(){}
27 
31  virtual bool Init(){ return false;}
32 
36  virtual bool sWatchSGroup(int gid, int sid, rt_callback fn, void *ctx){ return false;}
37 
41  virtual bool wJoinSGroup(int gid, int wid, int s_group){ return false;}
42 
46  virtual bool wLeaveSGroup(int gid, int wid, int s_group){ return false;}
47 };
48 
49 
50 
51 class ZKClusterRT : public ClusterRuntime{
52  public:
53  ZKClusterRT(string host, int timeout = 30000);
54  ~ZKClusterRT();
55  bool Init();
56  bool sWatchSGroup(int gid, int sid, rt_callback fn, void *ctx);
57  bool wJoinSGroup(int gid, int wid, int s_group);
58  bool wLeaveSGroup(int gid, int wid, int s_group);
59  static void watcherGlobal(zhandle_t * zh, int type, int state, const char *path, void *watcherCtx);
60 
61  private:
62  static void childChanges(zhandle_t *zh, int type, int state, const char *path, void *watcherCtx);
63  string getSGroupPath(int gid);
64  string getWorkerPath(int gid, int wid);
65 
66  struct RTCallback{
67  rt_callback fn;
68  void* ctx;
69  };
70 
71  string host_;
72  int timeout_;
73  zhandle_t *zkhandle_;
74  vector<RTCallback *> cb_vec_;
75 
76  const int MAX_BUF_LEN = 50;
77  const int RETRY_NUM = 10;
78  const int SLEEP_SEC = 1;
79  const string ZK_P_SINGA = "/singa";
80  const string ZK_P_STATUS = "/status";
81 };
82 
83 } // namespace singa
84 
85 #endif // INCLUDE_UTILS_CLUSTER_RT_H_
bool wJoinSGroup(int gid, int wid, int s_group)
Worker: join a server group (i.e.
virtual bool wLeaveSGroup(int gid, int wid, int s_group)
Worker: leave a server group (i.e.
Definition: cluster_rt.h:46
Definition: cluster_rt.h:51
bool wLeaveSGroup(int gid, int wid, int s_group)
Worker: leave a server group (i.e.
Definition: cluster_rt.h:23
virtual bool Init()
Initialize the runtime instance.
Definition: cluster_rt.h:31
virtual bool sWatchSGroup(int gid, int sid, rt_callback fn, void *ctx)
Server: watch all workers in a server group, will be notified when all workers have left...
Definition: cluster_rt.h:36
virtual bool wJoinSGroup(int gid, int wid, int s_group)
Worker: join a server group (i.e.
Definition: cluster_rt.h:41
bool sWatchSGroup(int gid, int sid, rt_callback fn, void *ctx)
Server: watch all workers in a server group, will be notified when all workers have left...
bool Init()
Initialize the runtime instance.