Kea  1.5.0
communication_state.cc
Go to the documentation of this file.
1 // Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
9 #include <communication_state.h>
10 #include <ha_service_states.h>
11 #include <exceptions/exceptions.h>
12 #include <dhcp/dhcp4.h>
13 #include <dhcp/dhcp6.h>
14 #include <dhcp/option_int.h>
15 #include <dhcp/pkt4.h>
16 #include <dhcp/pkt6.h>
17 #include <http/date_time.h>
18 #include <boost/bind.hpp>
19 #include <boost/pointer_cast.hpp>
20 #include <sstream>
21 #include <utility>
22 
23 using namespace isc::asiolink;
24 using namespace isc::dhcp;
25 using namespace isc::http;
26 using namespace boost::posix_time;
27 
28 namespace {
29 
31 constexpr long WARN_CLOCK_SKEW = 30;
32 
34 constexpr long TERM_CLOCK_SKEW = 60;
35 
37 constexpr long MIN_TIME_SINCE_CLOCK_SKEW_WARN = 60;
38 
39 }
40 
41 namespace isc {
42 namespace ha {
43 
44 CommunicationState::CommunicationState(const IOServicePtr& io_service,
45  const HAConfigPtr& config)
46  : io_service_(io_service), config_(config), timer_(), interval_(0),
47  poke_time_(boost::posix_time::microsec_clock::universal_time()),
48  heartbeat_impl_(0), partner_state_(-1), clock_skew_(0, 0, 0, 0),
49  last_clock_skew_warn_() {
50 }
51 
53  stopHeartbeat();
54 }
55 
56 void
57 CommunicationState::setPartnerState(const std::string& state) {
58  if (state == "hot-standby") {
60  } else if (state == "load-balancing") {
62  } else if (state == "partner-down") {
64  } else if (state == "ready") {
66  } else if (state == "syncing") {
68  } else if (state == "terminated") {
70  } else if (state == "waiting") {
72  } else if (state == "unavailable") {
74  } else {
75  isc_throw(BadValue, "unsupported HA partner state returned "
76  << state);
77  }
78 }
79 
80 void
82  const boost::function<void()>& heartbeat_impl) {
83  startHeartbeatInternal(interval, heartbeat_impl);
84 }
85 
86 void
88  const boost::function<void()>& heartbeat_impl) {
89  bool settings_modified = false;
90 
91  // If we're setting the heartbeat for the first time, it should
92  // be non-null.
93  if (heartbeat_impl) {
94  settings_modified = true;
95  heartbeat_impl_ = heartbeat_impl;
96 
97  } else if (!heartbeat_impl_) {
98  // The heartbeat is re-scheduled but we have no historic implementation
99  // pointer we could re-use. This is a programmatic issue.
100  isc_throw(BadValue, "unable to start heartbeat when pointer"
101  " to the heartbeat implementation is not specified");
102  }
103 
104  // If we're setting the heartbeat for the first time, the interval
105  // should be greater than 0.
106  if (interval != 0) {
107  settings_modified |= (interval_ != interval);
108  interval_ = interval;
109 
110  } else if (interval_ <= 0) {
111  // The heartbeat is re-scheduled but we have no historic interval
112  // which we could re-use. This is a programmatic issue.
113  heartbeat_impl_ = 0;
114  isc_throw(BadValue, "unable to start heartbeat when interval"
115  " for the heartbeat timer is not specified");
116  }
117 
118  if (!timer_) {
119  timer_.reset(new IntervalTimer(*io_service_));
120  }
121 
122  if (settings_modified) {
123  timer_->setup(heartbeat_impl_, interval_, IntervalTimer::ONE_SHOT);
124  }
125 }
126 
127 void
129  if (timer_) {
130  timer_->cancel();
131  timer_.reset();
132  interval_ = 0;
133  heartbeat_impl_ = 0;
134  }
135 }
136 
137 void
139  // Remember previous poke time.
140  boost::posix_time::ptime prev_poke_time = poke_time_;
141  // Set poke time to the current time.
142  poke_time_ = boost::posix_time::microsec_clock::universal_time();
143 
144  // If we have been tracking the unanswered DHCP messages directed to the
145  // partner, we need to clear any gathered information because the connection
146  // seems to be (re)established.
148 
149  if (timer_) {
150  // Check the duration since last poke. If it is less than a second, we don't
151  // want to reschedule the timer. The only case when the poke time duration is
152  // lower than 1s is when we're performing lease updates. In order to avoid the
153  // overhead of re-scheduling the timer too frequently we reschedule it only if the
154  // duration is 1s or more. This matches the time resolution for heartbeats.
155  boost::posix_time::time_duration duration_since_poke = poke_time_ - prev_poke_time;
156  if (duration_since_poke.total_seconds() > 0) {
157  // A poke causes the timer to be re-scheduled to prevent it
158  // from triggering a heartbeat shortly after confirming the
159  // connection is ok, based on the lease update or another
160  // command.
162  }
163  }
164 }
165 
166 int64_t
168  ptime now = boost::posix_time::microsec_clock::universal_time();
169  time_duration duration = now - poke_time_;
170  return (duration.total_milliseconds());
171 }
172 
173 bool
175  return (getDurationInMillisecs() > config_->getMaxResponseDelay());
176 }
177 
178 bool
180  // First check if the clock skew is beyond the threshold.
181  if (isClockSkewGreater(WARN_CLOCK_SKEW)) {
182 
183  // In order to prevent to frequent warnings we provide a gating mechanism
184  // which doesn't allow for issuing a warning earlier than 60 seconds after
185  // the previous one.
186 
187  // Find the current time and the duration since last warning.
188  ptime now = boost::posix_time::microsec_clock::universal_time();
189  time_duration since_warn_duration = now - last_clock_skew_warn_;
190 
191  // If the last warning was issued more than 60 seconds ago or it is a
192  // first warning, we need to update the last warning timestamp and return
193  // true to indicate that new warning should be issued.
194  if (last_clock_skew_warn_.is_not_a_date_time() ||
195  (since_warn_duration.total_seconds() > MIN_TIME_SINCE_CLOCK_SKEW_WARN)) {
196  last_clock_skew_warn_ = now;
197  return (true);
198  }
199  }
200 
201  // The warning should not be issued.
202  return (false);
203 }
204 
205 bool
207  // Issue a warning if the clock skew is greater than 60s.
208  return (isClockSkewGreater(TERM_CLOCK_SKEW));
209 }
210 
211 bool
212 CommunicationState::isClockSkewGreater(const long seconds) const {
213  return ((clock_skew_.total_seconds() > seconds) ||
214  (clock_skew_.total_seconds() < -seconds));
215 }
216 
217 void
218 CommunicationState::setPartnerTime(const std::string& time_text) {
219  HttpDateTime partner_time = HttpDateTime().fromRfc1123(time_text);
220  HttpDateTime current_time = HttpDateTime();
221 
222  clock_skew_ = partner_time.getPtime() - current_time.getPtime();
223 }
224 
225 std::string
227  std::ostringstream s;
228 
229  // If negative clock skew, the partner's time is behind our time.
230  if (clock_skew_.is_negative()) {
231  s << clock_skew_.invert_sign().total_seconds() << "s behind";
232 
233  } else {
234  // Partner's time is ahead of ours.
235  s << clock_skew_.total_seconds() << "s ahead";
236  }
237 
238  return (s.str());
239 }
240 
242  const HAConfigPtr& config)
243  : CommunicationState(io_service, config), unacked_clients_() {
244 }
245 
246 void
247 CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
248  // The DHCP message must successfully cast to a Pkt4 object.
249  Pkt4Ptr msg = boost::dynamic_pointer_cast<Pkt4>(message);
250  if (!msg) {
251  isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv4 message");
252  }
253 
254  // Check value of the "secs" field by comparing it with the configured
255  // threshold.
256  uint16_t secs = msg->getSecs();
257 
258  // It was observed that some Windows clients may send swapped bytes in the
259  // "secs" field. When the second byte is 0 and the first byte is non-zero
260  // we consider bytes to be swapped and so we correct them.
261  if ((secs > 255) && ((secs & 0xFF) == 0)) {
262  secs = ((secs >> 8) | (secs << 8));
263  }
264 
265  // Check the value of the "secs" field. If it is below the threshold there
266  // is nothing to do. The "secs" field holds a value in seconds, hence we
267  // have to multiple by 1000 to get a value in milliseconds.
268  if (secs * 1000 <= config_->getMaxAckDelay()) {
269  return;
270  }
271 
272  // The "secs" value is above the threshold so we should count it as unacked
273  // request, but we will first have to check if there is such request already
274  // recorded.
275  auto existing_requests = unacked_clients_.equal_range(msg->getHWAddr()->hwaddr_);
276 
277  // Client identifier will be stored together with the hardware address. It
278  // may remain empty if the client hasn't specified it.
279  std::vector<uint8_t> client_id;
280  OptionPtr opt_client_id = msg->getOption(DHO_DHCP_CLIENT_IDENTIFIER);
281  if (opt_client_id) {
282  client_id = opt_client_id->getData();
283  }
284 
285  // Iterate over the requests we found so far and see if we have a match with
286  // the client identifier (this includes empty client identifiers).
287  for (auto r = existing_requests.first; r != existing_requests.second; ++r) {
288  if (r->second == client_id) {
289  // There is a match so we have already recorded this client as
290  // unacked.
291  return;
292  }
293  }
294 
295  // New unacked client detected, so record the required information.
296  unacked_clients_.insert(std::make_pair(msg->getHWAddr()->hwaddr_, client_id));
297 }
298 
299 bool
301  return ((config_->getMaxUnackedClients() == 0) ||
302  (unacked_clients_.size() > config_->getMaxUnackedClients()));
303 }
304 
305 void
307  unacked_clients_.clear();
308 }
309 
311  const HAConfigPtr& config)
312  : CommunicationState(io_service, config), unacked_clients_() {
313 }
314 
315 void
316 CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) {
317  // The DHCP message must successfully cast to a Pkt6 object.
318  Pkt6Ptr msg = boost::dynamic_pointer_cast<Pkt6>(message);
319  if (!msg) {
320  isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv6 message");
321  }
322 
323  // Check the value of the "elapsed time" option. If it is below the threshold
324  // there is nothing to do. The "elapsed time" option holds the time in
325  // 1/100 of second, hence we have to multiply by 10 to get a value in milliseconds.
326  OptionUint16Ptr elapsed_time = boost::dynamic_pointer_cast<
327  OptionUint16>(msg->getOption(D6O_ELAPSED_TIME));
328  if (!elapsed_time || elapsed_time->getValue() * 10 <= config_->getMaxAckDelay()) {
329  return;
330  }
331 
332  // Get the DUID of the client to see if it hasn't been recorded already.
333  OptionPtr duid = msg->getOption(D6O_CLIENTID);
334  if (duid && unacked_clients_.count(duid->getData()) == 0) {
335  // New unacked client detected, so record the required information.
336  unacked_clients_.insert(duid->getData());
337  }
338 }
339 
340 bool
342  return ((config_->getMaxUnackedClients() == 0) ||
343  (unacked_clients_.size() > config_->getMaxUnackedClients()));
344 }
345 
346 void
348  unacked_clients_.clear();
349 }
350 
351 } // end of namespace isc::ha
352 } // end of namespace isc
boost::function< void()> heartbeat_impl_
Pointer to the function providing heartbeat implementation.
const int HA_TERMINATED_ST
HA service terminated state.
CommunicationState6(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
virtual bool failureDetected() const
Checks if the partner failure has been detected based on the DHCP traffic analysis.
const int HA_HOT_STANDBY_ST
Hot standby state.
int64_t getDurationInMillisecs() const
Returns duration between the poke time and current time.
void setPartnerTime(const std::string &time_text)
Provide partner's notion of time so the new clock skew can be calculated.
Holds communication state between the two HA peers.
const int HA_PARTNER_DOWN_ST
Partner down state.
void startHeartbeatInternal(const long interval=0, const boost::function< void()> &heartbeat_impl=0)
Starts recurring heartbeat.
boost::shared_ptr< Option > OptionPtr
Definition: option.h:37
const int HA_LOAD_BALANCING_ST
Load balancing state.
bool isCommunicationInterrupted() const
Checks if communication with the partner is interrupted.
asiolink::IOServicePtr io_service_
Pointer to the common IO service instance.
Forward declaration to OptionInt.
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
long interval_
Interval specified for the heartbeat.
boost::posix_time::ptime poke_time_
Last poke time.
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:28
boost::shared_ptr< OptionUint16 > OptionUint16Ptr
Definition: option_int.h:33
virtual void clearUnackedClients()
Removes information about clients which the partner server failed to respond to.
virtual void clearUnackedClients()=0
Removes information about clients which the partner server failed to respond to.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
boost::posix_time::ptime getPtime() const
Returns time encapsulated by this class.
Definition: date_time.h:58
int partner_state_
Last known state of the partner server.
OptionPtr getOption(uint16_t type) const
Returns shared_ptr to suboption of specific type.
Definition: option.cc:201
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:546
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv6 message appears to be unanswered.
static HttpDateTime fromRfc1123(const std::string &time_string)
Creates an instance from a string containing time value formatted as specified in RFC 1123.
Definition: date_time.cc:45
void stopHeartbeat()
Stops recurring heartbeat.
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
virtual void clearUnackedClients()
Removes information about clients which the partner server failed to respond to.
Defines the logger used by the top-level component of kea-dhcp-ddns.
void startHeartbeat(const long interval, const boost::function< void()> &heartbeat_impl)
Starts recurring heartbeat (public interface).
void poke()
Pokes the communication state.
std::set< std::vector< uint8_t > > unacked_clients_
Holds information about the clients which the partner server failed to respond to.
boost::posix_time::time_duration clock_skew_
Clock skew between the active servers.
CommunicationState4(const asiolink::IOServicePtr &io_service, const HAConfigPtr &config)
Constructor.
virtual void analyzeMessage(const boost::shared_ptr< dhcp::Pkt > &message)
Checks if the DHCPv4 message appears to be unanswered.
void setPartnerState(const std::string &state)
Sets partner state.
std::string logFormatClockSkew() const
Returns current clock skew value in the logger friendly format.
std::multimap< std::vector< uint8_t >, std::vector< uint8_t > > unacked_clients_
Holds information about the clients which the partner server failed to respond to.
virtual ~CommunicationState()
Destructor.
bool clockSkewShouldWarn()
Indicates whether the HA service should issue a warning about high clock skew between the active serv...
bool clockSkewShouldTerminate() const
Indicates whether the HA service should enter "terminated" state as a result of the clock skew exceed...
boost::posix_time::ptime last_clock_skew_warn_
Holds a time when last warning about too high clock skew was issued.
asiolink::IntervalTimerPtr timer_
Interval timer triggering heartbeat commands.
const int HA_SYNCING_ST
Synchronizing database state.
HAConfigPtr config_
High availability configuration.
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:509
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
bool isClockSkewGreater(const long seconds) const
Checks if the clock skew is greater than the specified number of seconds.