summaryrefslogtreecommitdiffstats
path: root/net/tools/flip_server/epoll_server.h
diff options
context:
space:
mode:
authormbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-04 01:21:22 +0000
committermbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-12-04 01:21:22 +0000
commit6a4c749008dad827b0699e7de9eb458a0ed6d39f (patch)
tree38165061573b715fe3a737b9720e2be5f5dfbb62 /net/tools/flip_server/epoll_server.h
parent93ef176c40555a6f3a3895f745a977efe1b5037b (diff)
downloadchromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.zip
chromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.tar.gz
chromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.tar.bz2
Landing the open source version of the FLIP server.
BUG=none TEST=none Review URL: http://codereview.chromium.org/463009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33766 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/tools/flip_server/epoll_server.h')
-rw-r--r--net/tools/flip_server/epoll_server.h1071
1 files changed, 1071 insertions, 0 deletions
diff --git a/net/tools/flip_server/epoll_server.h b/net/tools/flip_server/epoll_server.h
new file mode 100644
index 0000000..5999aa4
--- /dev/null
+++ b/net/tools/flip_server/epoll_server.h
@@ -0,0 +1,1071 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
+#define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
+
+#include <fcntl.h>
+#include <sys/queue.h>
+#include <hash_map>
+#include <hash_set>
+#include <map>
+#include <string>
+#include <utility>
+#include <set>
+#include <vector>
+
+// #define GFE_GFE2_EPOLL_SERVER_EVENT_TRACING 1
+//
+// Defining GFE_GFE2_EPOLL_SERVER_EVENT_TRACING
+// causes code to exist which didn't before.
+// This code tracks each event generated by the epollserver,
+// as well as providing a per-fd-registered summary of
+// events. Note that enabling this code vastly slows
+// down operations, and uses substantially more
+// memory. For these reasons, it should only be enabled when doing
+// developer debugging at his/her workstation.
+//
+// A structure called 'EventRecorder' will exist when
+// the macro is defined. See the EventRecorder class interface
+// within the EpollServer class for more details.
+#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING
+#include <iostream>
+#include "base/logging.h"
+#endif
+
+#include "base/scoped_ptr.h"
+#include "util/hash/hash.h"
+
+#ifdef CHROMIUM
+#include <sys/epoll.h>
+#else
+#include "net/base/epollstubs.h"
+#endif
+
+namespace gfe2 {
+
+class EpollServer;
+class EpollAlarmCallbackInterface;
+class ReadPipeCallback;
+class WatchDogToken;
+
+struct EpollEvent {
+ EpollEvent(int events, bool is_epoll_wait)
+ : in_events(events),
+ out_ready_mask(0) {
+ }
+
+ int in_events; // incoming events
+ int out_ready_mask; // the new event mask for ready list (0 means don't
+ // get on the ready list). This field is always
+ // initialized to 0 when the event is passed to
+ // OnEvent.
+};
+
+// Callbacks which go into EpollServers are expected to derive from this class.
+class EpollCallbackInterface {
+ public:
+ // Summary:
+ // Called when the callback is registered into a EpollServer.
+ // Args:
+ // eps - the poll server into which this callback was registered
+ // fd - the file descriptor which was registered
+ // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
+ // which was registered (and will initially be used
+ // in the epoll() calls)
+ virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
+
+ // Summary:
+ // Called when the event_mask is modified (for a file-descriptor)
+ // Args:
+ // fd - the file descriptor which was registered
+ // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
+ // which was is now curren (and will be used
+ // in subsequent epoll() calls)
+ virtual void OnModification(int fd, int event_mask) = 0;
+
+ // Summary:
+ // Called whenever an event occurs on the file-descriptor.
+ // This is where the bulk of processing is expected to occur.
+ // Args:
+ // fd - the file descriptor which was registered
+ // event - a struct that contains the event mask (composed of EPOLLIN,
+ // EPOLLOUT, etc), a flag that indicates whether this is a true
+ // epoll_wait event vs one from the ready list, and an output
+ // parameter for OnEvent to inform the EpollServer whether to put
+ // this fd on the ready list.
+ virtual void OnEvent(int fd, EpollEvent* event) = 0;
+
+ // Summary:
+ // Called when the file-descriptor is unregistered from the poll-server.
+ // Args:
+ // fd - the file descriptor which was registered, and of this call, is now
+ // unregistered.
+ // replaced - If true, this callback is being replaced by another, otherwise
+ // it is simply being removed.
+ virtual void OnUnregistration(int fd, bool replaced) = 0;
+
+ // Summary:
+ // Called when the epoll server is shutting down. This is different from
+ // OnUnregistration because the subclass may want to clean up memory.
+ // This is called in leiu of OnUnregistration.
+ // Args:
+ // fd - the file descriptor which was registered.
+ virtual void OnShutdown(EpollServer* eps, int fd) = 0;
+
+ virtual ~EpollCallbackInterface() {}
+
+ protected:
+ EpollCallbackInterface() {}
+};
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+class EpollServer {
+ public:
+ typedef EpollAlarmCallbackInterface AlarmCB;
+ typedef EpollCallbackInterface CB;
+
+ typedef multimap<int64, AlarmCB*> TimeToAlarmCBMap;
+ typedef TimeToAlarmCBMap::iterator AlarmRegToken;
+
+ // Summary:
+ // Constructor:
+ // By default, we don't wait any amount of time for events, and
+ // we suggest to the epoll-system that we're going to use on-the-order
+ // of 1024 FDs.
+ EpollServer();
+
+ ////////////////////////////////////////
+
+ // Destructor
+ virtual ~EpollServer();
+
+ ////////////////////////////////////////
+
+ // Summary
+ // Register a callback to be called whenever an event contained
+ // in the set of events included in event_mask occurs on the
+ // file-descriptor 'fd'
+ //
+ // Note that only one callback is allowed to be registered for
+ // any specific file-decriptor.
+ //
+ // If a callback is registered for a file-descriptor which has already
+ // been registered, then the previous callback is unregistered with
+ // the 'replaced' flag set to true. I.e. the previous callback's
+ // OnUnregistration() function is called like so:
+ // OnUnregistration(fd, true);
+ //
+ // The epoll server does NOT take on ownership of the callback: the callback
+ // creator is responsible for managing that memory.
+ //
+ // Args:
+ // fd - a valid file-descriptor
+ // cb - an instance of a subclass of EpollCallbackInterface
+ // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
+ // the events for which the callback would like to be
+ // called.
+ virtual void RegisterFD(int fd, CB* cb, int event_mask);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // A shortcut for RegisterFD which sets things up such that the
+ // callback is called when 'fd' is available for writing.
+ // Args:
+ // fd - a valid file-descriptor
+ // cb - an instance of a subclass of EpollCallbackInterface
+ virtual void RegisterFDForWrite(int fd, CB* cb);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // A shortcut for RegisterFD which sets things up such that the
+ // callback is called when 'fd' is available for reading or writing.
+ // Args:
+ // fd - a valid file-descriptor
+ // cb - an instance of a subclass of EpollCallbackInterface
+ virtual void RegisterFDForReadWrite(int fd, CB* cb);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // A shortcut for RegisterFD which sets things up such that the
+ // callback is called when 'fd' is available for reading.
+ // Args:
+ // fd - a valid file-descriptor
+ // cb - an instance of a subclass of EpollCallbackInterface
+ virtual void RegisterFDForRead(int fd, CB* cb);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Removes the FD and the associated callback from the pollserver.
+ // If the callback is registered with other FDs, they will continue
+ // to be processed using the callback without modification.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the file-descriptor which should no-longer be monitored.
+ virtual void UnregisterFD(int fd);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies the event mask for the file-descriptor, replacing
+ // the old event_mask with the new one specified here.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the fd whose event mask should be modified.
+ // event_mask - the new event mask.
+ virtual void ModifyCallback(int fd, int event_mask);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies the event mask for the file-descriptor such that we
+ // no longer request events when 'fd' is readable.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the fd whose event mask should be modified.
+ virtual void StopRead(int fd);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies the event mask for the file-descriptor such that we
+ // request events when 'fd' is readable.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the fd whose event mask should be modified.
+ virtual void StartRead(int fd);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies the event mask for the file-descriptor such that we
+ // no longer request events when 'fd' is writable.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the fd whose event mask should be modified.
+ virtual void StopWrite(int fd);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies the event mask for the file-descriptor such that we
+ // request events when 'fd' is writable.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the fd whose event mask should be modified.
+ virtual void StartWrite(int fd);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Looks up the callback associated with the file-desriptor 'fd'.
+ // If a callback is associated with this file-descriptor, then
+ // it's OnEvent() method is called with the file-descriptor 'fd',
+ // and event_mask 'event_mask'
+ //
+ // If no callback is registered for this file-descriptor, nothing
+ // will happen as a result of this call.
+ //
+ // This function is used internally by the EpollServer, but is
+ // available publically so that events might be 'faked'. Calling
+ // this function with an fd and event_mask is equivalent (as far
+ // as the callback is concerned) to having a real event generated
+ // by epoll (except, of course, that read(), etc won't necessarily
+ // be able to read anything)
+ // Args:
+ // fd - the file-descriptor on which an event has occured.
+ // event_mask - a bitmask representing the events which have occured
+ // on/for this fd. This bitmask is composed of
+ // POLLIN, POLLOUT, etc.
+ //
+ void HandleEvent(int fd, int event_mask);
+
+ // Summary:
+ // Call this when you want the pollserver to
+ // wait for events and execute the callbacks associated with
+ // the file-descriptors on which those events have occured.
+ // Depending on the value of timeout_in_us_, this may or may
+ // not return immediately. Please reference the set_timeout()
+ // function for the specific behaviour.
+ virtual void WaitForEventsAndExecuteCallbacks();
+
+ // Summary:
+ // When an fd is registered to use edge trigger notification, the ready
+ // list can be used to simulate level trigger semantics. Edge trigger
+ // registration doesn't send an initial event, and only rising edge (going
+ // from blocked to unblocked) events are sent. A callback can put itself on
+ // the ready list by calling SetFDReady() after calling RegisterFD(). The
+ // OnEvent method of all callbacks associated with the fds on the ready
+ // list will be called immediately after processing the events returned by
+ // epoll_wait(). The fd is removed from the ready list before the
+ // callback's OnEvent() method is invoked. To stay on the ready list, the
+ // OnEvent() (or some function in that call chain) must call SetFDReady
+ // again. When a fd is unregistered using UnregisterFD(), the fd is
+ // automatically removed from the ready list.
+ //
+ // When the callback for a edge triggered fd hits the falling edge (about
+ // to block, either because of it got an EAGAIN, or had a short read/write
+ // operation), it should remove itself from the ready list using
+ // SetFDNotReady() (since OnEvent cannot distinguish between invocation
+ // from the ready list vs from a normal epoll event). All four ready list
+ // methods are safe to be called within the context of the callbacks.
+ //
+ // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
+ // that are registered with the EpollServer will be put on the ready list.
+ // SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
+ // doesn't know about the fd passed in.
+ //
+ // Since the ready list cannot reliably determine proper set of events
+ // which should be sent to the callback, SetFDReady() requests the caller
+ // to provide the ready list with the event mask, which will be used later
+ // when OnEvent() is invoked by the ready list. Hence, the event_mask
+ // passedto SetFDReady() does not affect the actual epoll registration of
+ // the fd with the kernel. If a fd is already put on the ready list, and
+ // SetFDReady() is called again for that fd with a different event_mask,
+ // the event_mask will be updated.
+ virtual void SetFDReady(int fd, int events_to_fake);
+
+ virtual void SetFDNotReady(int fd);
+
+ // Summary:
+ // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
+ // debugging tools and for writing unit tests.
+ // ISFDReady() returns whether a fd is in the ready list.
+ // ReadyListSize() returns the number of fds on the ready list.
+ // VerifyReadyList() checks the consistency of internal data structure. It
+ // will CHECK if it finds an error.
+ virtual bool IsFDReady(int fd) const;
+
+ size_t ReadyListSize() const { return ready_list_size_; }
+
+ void VerifyReadyList() const;
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
+ // If the callback returns a positive number from its OnAlarm() function,
+ // then the callback will be re-registered at that time, else the alarm
+ // owner is responsible for freeing up memory.
+ //
+ // Important: A give AlarmCB* can not be registered again if it is already
+ // registered. If a user wants to register a callback again it should first
+ // unregister the previous callback before calling RegisterAlarm again.
+ // Args:
+ // timeout_time_in_us - the absolute time at which the alarm should go off
+ // ac - the alarm which will be called.
+ virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
+
+ // Summary:
+ // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
+ // delta_in_us). While this is somewhat less accurate (see the description
+ // for ApproximateNowInUs() to see how 'approximate'), the error is never
+ // worse than the amount of time it takes to process all events in one
+ // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a
+ // positive number from its OnAlarm() function, then the callback will be
+ // re-registered at that time, else the alarm owner is responsible for
+ // freeing up memory.
+ // Note that this function is purely a convienence. The
+ // same thing may be accomplished by using RegisterAlarm with
+ // ApproximateNowInUs() directly.
+ //
+ // Important: A give AlarmCB* can not be registered again if it is already
+ // registered. If a user wants to register a callback again it should first
+ // unregister the previous callback before calling RegisterAlarm again.
+ // Args:
+ // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
+ // which point the alarm should go off.
+ // ac - the alarm which will be called.
+ void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
+ RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
+ }
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Unregister the alarm referred to by iterator_token; Callers should
+ // be warned that a token may have become already invalid when OnAlarm()
+ // is called, was unregistered, or OnShutdown was called on that alarm.
+ // Args:
+ // iterator_token - iterator to the alarm callback to unregister.
+ virtual void UnregisterAlarm(
+ const EpollServer::AlarmRegToken& iterator_token);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // returns the number of file-descriptors registered in this EpollServer.
+ // Returns:
+ // number of FDs registered (discounting the internal pipe used for Wake)
+ virtual int NumFDsRegistered() const;
+
+ // Summary:
+ // Force the epoll server to wake up (by writing to an internal pipe).
+ virtual void Wake();
+
+ // Summary:
+ // Wrapper around WallTimer's NowInUsec. We do this so that we can test
+ // EpollServer without using the system clock (and can avoid the flakiness
+ // that would ensue)
+ // Returns:
+ // the current time as number of microseconds since the Unix epoch.
+ virtual int64 NowInUsec() const;
+
+ // Summary:
+ // Since calling NowInUsec() many thousands of times per
+ // WaitForEventsAndExecuteCallbacks function call is, to say the least,
+ // inefficient, we allow users to use an approximate time instead. The
+ // time returned from this function is as accurate as NowInUsec() when
+ // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
+ // callstack.
+ // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
+ // this function returns the time at which the
+ // WaitForEventsAndExecuteCallbacks function started to process events or
+ // alarms.
+ //
+ // Essentially, this function makes available a fast and mostly accurate
+ // mechanism for getting the time for any function handling an event or
+ // alarm. When functions which are not handling callbacks or alarms call
+ // this function, they get the slow and "absolutely" accurate time.
+ //
+ // Users should be encouraged to use this function.
+ // Returns:
+ // the "approximate" current time as number of microseconds since the Unix
+ // epoch.
+ virtual int64 ApproximateNowInUsec() const {
+ if (recorded_now_in_us_ != 0) {
+ return recorded_now_in_us_;
+ }
+ return this->NowInUsec();
+ }
+
+ static string EventMaskToString(int event_mask);
+
+ // Summary:
+ // Logs the state of the epoll server with LOG(ERROR).
+ void LogStateOnCrash();
+
+ // Summary:
+ // Set the timeout to the value specified.
+ // If the timeout is set to a negative number,
+ // WaitForEventsAndExecuteCallbacks() will only return when an event has
+ // occured
+ // If the timeout is set to zero,
+ // WaitForEventsAndExecuteCallbacks() will return immediately
+ // If the timeout is set to a positive number,
+ // WaitForEventsAndExecuteCallbacks() will return when an event has
+ // occured, or when timeout_in_us microseconds has elapsed, whichever
+ // is first.
+ // Args:
+ // timeout_in_us - value specified depending on behaviour desired.
+ // See above.
+ void set_timeout_in_us(int64 timeout_in_us) {
+ timeout_in_us_ = timeout_in_us;
+ }
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Accessor for the current value of timeout_in_us.
+ int timeout_in_us() const { return timeout_in_us_; }
+
+ // Summary:
+ // Returns true when the EpollServer() is being destroyed.
+ bool in_shutdown() const { return in_shutdown_; }
+
+ bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
+ return all_alarms_.find(alarm) != all_alarms_.end();
+ }
+
+ // Summary:
+ // A function for implementing the ready list. It invokes OnEvent for each
+ // of the fd in the ready list, and takes care of adding them back to the
+ // ready list if the callback requests it (by checking that out_ready_mask
+ // is non-zero).
+ void CallReadyListCallbacks();
+
+ // Granularity at which time moves when considering what alarms are on.
+ // See function: DoRoundingOnNow() on exact usage.
+ static const int kMinimumEffectiveAlarmQuantum;
+ protected:
+
+ // These have to be in the .h file so that we can override them in tests.
+ virtual inline int GetFlags(int fd) { return fcntl(fd, F_GETFL, 0); }
+ inline int SetFlags(int fd, int flags) {
+ return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ }
+
+ virtual void SetNonblocking (int fd);
+
+ // This exists here so that we can override this function in unittests
+ // in order to make effective mock EpollServer objects.
+ virtual int epoll_wait_impl(int epfd,
+ struct epoll_event* events,
+ int max_events,
+ int timeout_in_ms) {
+ return epoll_wait(epfd, events, max_events, timeout_in_ms);
+ }
+
+ // this struct is used internally, and is never used by anything external
+ // to this class. Some of its members are declared mutable to get around the
+ // restriction imposed by hash_set. Since hash_set knows nothing about the
+ // objects it stores, it has to assume that every bit of the object is used
+ // in the hash function and equal_to comparison. Thus hash_set::iterator is a
+ // const iterator. In this case, the only thing that must stay constant is
+ // fd. Everything else are just along for the ride and changing them doesn't
+ // compromise the hash_set integrity.
+ struct CBAndEventMask {
+ CBAndEventMask()
+ : cb(NULL),
+ fd(-1),
+ event_mask(0),
+ events_asserted(0),
+ events_to_fake(0),
+ in_use(false) {
+ entry.le_next = NULL;
+ entry.le_prev = NULL;
+ }
+
+ CBAndEventMask(EpollCallbackInterface* cb,
+ int event_mask,
+ int fd)
+ : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
+ events_to_fake(0), in_use(false) {
+ entry.le_next = NULL;
+ entry.le_prev = NULL;
+ }
+
+ // Required operator for hash_set. Normally operator== should be a free
+ // standing function. However, since CBAndEventMask is a protected type and
+ // it will never be a base class, it makes no difference.
+ bool operator==(const CBAndEventMask& cb_and_mask) const {
+ return fd == cb_and_mask.fd;
+ }
+ // A callback. If the fd is unregistered inside the callchain of OnEvent,
+ // the cb will be set to NULL.
+ mutable EpollCallbackInterface* cb;
+
+ mutable LIST_ENTRY(CBAndEventMask) entry;
+ // file descriptor registered with the epoll server.
+ int fd;
+ // the current event_mask registered for this callback.
+ mutable int event_mask;
+ // the event_mask that was returned by epoll
+ mutable int events_asserted;
+ // the event_mask for the ready list to use to call OnEvent.
+ mutable int events_to_fake;
+ // toggle around calls to OnEvent to tell UnregisterFD to not erase the
+ // iterator because HandleEvent is using it.
+ mutable bool in_use;
+ };
+
+ // Custom hash function to be used by hash_set.
+ struct CBAndEventMaskHash {
+ size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
+ return hash<int>()(cb_and_eventmask.fd);
+ }
+ };
+
+ typedef hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
+
+ // the following four functions are OS-specific, and are likely
+ // to be changed in a subclass if the poll/select method is changed
+ // from epoll.
+
+ // Summary:
+ // Deletes a file-descriptor from the set of FDs that should be
+ // monitored with epoll.
+ // Note that this only deals with modifying data relating -directly-
+ // with the epoll call-- it does not modify any data within the
+ // epoll_server.
+ // Args:
+ // fd - the file descriptor to-be-removed from the monitoring set
+ virtual void DelFD(int fd) const;
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Adds a file-descriptor to the set of FDs that should be
+ // monitored with epoll.
+ // Note that this only deals with modifying data relating -directly-
+ // with the epoll call.
+ // Args:
+ // fd - the file descriptor to-be-added to the monitoring set
+ // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
+ // OR'd together) which will be associated with this
+ // FD initially.
+ virtual void AddFD(int fd, int event_mask) const;
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modifies a file-descriptor in the set of FDs that should be
+ // monitored with epoll.
+ // Note that this only deals with modifying data relating -directly-
+ // with the epoll call.
+ // Args:
+ // fd - the file descriptor to-be-added to the monitoring set
+ // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
+ // OR'd together) which will be associated with this
+ // FD after this call.
+ virtual void ModFD(int fd, int event_mask) const;
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Modified the event mask associated with an FD in the set of
+ // data needed by epoll.
+ // Events are removed before they are added, thus, if ~0 is put
+ // in 'remove_event', whatever is put in 'add_event' will be
+ // the new event mask.
+ // If the file-descriptor specified is not registered in the
+ // epoll_server, then nothing happens as a result of this call.
+ // Args:
+ // fd - the file descriptor whose event mask is to be modified
+ // remove_event - the events which are to be removed from the current
+ // event_mask
+ // add_event - the events which are to be added to the current event_mask
+ //
+ //
+ virtual void ModifyFD(int fd, int remove_event, int add_event);
+
+ ////////////////////////////////////////
+
+ // Summary:
+ // Waits for events, and calls HandleEvents() for each
+ // fd, event pair discovered to possibly have an event.
+ // Note that a callback (B) may get a spurious event if
+ // another callback (A) has closed a file-descriptor N, and
+ // the callback (B) has a newly opened file-descriptor, which
+ // also happens to be N.
+ virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
+ struct epoll_event events[],
+ int events_size);
+
+
+
+ // Summary:
+ // An internal function for implementing the ready list. It adds a fd's
+ // CBAndEventMask to the ready list. If the fd is already on the ready
+ // list, it is a no-op.
+ void AddToReadyList(CBAndEventMask* cb_and_mask);
+
+ // Summary:
+ // An internal function for implementing the ready list. It remove a fd's
+ // CBAndEventMask from the ready list. If the fd is not on the ready list,
+ // it is a no-op.
+ void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
+
+ // Summary:
+ // Calls any pending alarms that should go off and reregisters them if they
+ // were recurring.
+ virtual void CallAndReregisterAlarmEvents();
+
+ // The file-descriptor created for epolling
+ int epoll_fd_;
+
+ // The mapping of file-descriptor to CBAndEventMasks
+ FDToCBMap cb_map_;
+
+ // TOOD(sushantj): Having this hash_set is avoidable. We currently have it
+ // only so that we can enforce stringent checks that a caller can not register
+ // the same alarm twice. One option is to have an implementation in which
+ // this hash_set is used only in the debug mode.
+ hash_set<AlarmCB*> all_alarms_;
+
+ TimeToAlarmCBMap alarm_map_;
+
+ // The amount of time in microseconds that we'll wait before returning
+ // from the WaitForEventsAndExecuteCallbacks() function.
+ // If this is positive, wait that many microseconds.
+ // If this is negative, wait forever, or for the first event that occurs
+ // If this is zero, never wait for an event.
+ int64 timeout_in_us_;
+
+ // This is nonzero only after the invocation of epoll_wait_impl within
+ // WaitForEventsAndCallHandleEvents and before the function
+ // WaitForEventsAndExecuteCallbacks returns. At all other times, this is
+ // zero. This enables us to have relatively accurate time returned from the
+ // ApproximateNowInUs() function. See that function for more details.
+ int64 recorded_now_in_us_;
+
+ // This is used to implement CallAndReregisterAlarmEvents. This stores
+ // all alarms that were reregistered because OnAlarm() returned a
+ // value > 0 and the time at which they should be executed is less that
+ // the current time. By storing such alarms in this map we ensure
+ // that while calling CallAndReregisterAlarmEvents we do not call
+ // OnAlarm on any alarm in this set. This ensures that we do not
+ // go in an infinite loop.
+ hash_set<AlarmCB*> alarms_reregistered_and_should_be_skipped_;
+
+ LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
+ LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
+ int ready_list_size_;
+ // TODO(alyssar): make this into something that scales up.
+ static const int events_size_ = 256;
+ struct epoll_event events_[256];
+
+ // These controls the granularity for alarms
+ // See function CallAndReregisterAlarmEvents()
+ // TODO(sushantj): Add test for this.
+ int64 DoRoundingOnNow(int64 now_in_us) const;
+
+#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING
+ struct EventRecorder {
+ public:
+ EventRecorder() : num_records_(0), record_threshold_(10000) {}
+
+ ~EventRecorder() {
+ Clear();
+ }
+
+ // When a number of events equals the record threshold,
+ // the collected data summary for all FDs will be written
+ // to LOG(INFO). Note that this does not include the
+ // individual events (if you'reinterested in those, you'll
+ // have to get at them programmatically).
+ // After any such flushing to LOG(INFO) all events will
+ // be cleared.
+ // Note that the definition of an 'event' is a bit 'hazy',
+ // as it includes the 'Unregistration' event, and perhaps
+ // others.
+ void set_record_threshold(int64 new_threshold) {
+ record_threshold_ = new_threshold;
+ }
+
+ void Clear() {
+ for (int i = 0; i < debug_events_.size(); ++i) {
+ delete debug_events_[i];
+ }
+ debug_events_.clear();
+ unregistered_fds_.clear();
+ event_counts_.clear();
+ }
+
+ void MaybeRecordAndClear() {
+ ++num_records_;
+ if ((num_records_ > record_threshold_) &&
+ (record_threshold_ > 0)) {
+ LOG(INFO) << "\n" << *this;
+ num_records_ = 0;
+ Clear();
+ }
+ }
+
+ void RecordFDMaskEvent(int fd, int mask, const char* function) {
+ FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
+ debug_events_.push_back(fdmo);
+ MaybeRecordAndClear();
+ }
+
+ void RecordEpollWaitEvent(int timeout_in_ms,
+ int num_events_generated) {
+ EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
+ num_events_generated);
+ debug_events_.push_back(ewo);
+ MaybeRecordAndClear();
+ }
+
+ void RecordEpollEvent(int fd, int event_mask) {
+ Events& events_for_fd = event_counts_[fd];
+ events_for_fd.AssignFromMask(event_mask);
+ MaybeRecordAndClear();
+ }
+
+ friend ostream& operator<<(ostream& os, const EventRecorder& er) {
+ for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
+ os << "fd: " << er.unregistered_fds_[i] << "\n";
+ os << er.unregistered_fds_[i];
+ }
+ for (hash_map<int, Events>::const_iterator i = er.event_counts_.begin();
+ i != er.event_counts_.end();
+ ++i) {
+ os << "fd: " << i->first << "\n";
+ os << i->second;
+ }
+ for (int i = 0; i < er.debug_events_.size(); ++i) {
+ os << *(er.debug_events_[i]) << "\n";
+ }
+ return os;
+ }
+
+ void RecordUnregistration(int fd) {
+ hash_map<int, Events>::iterator i = event_counts_.find(fd);
+ if (i != event_counts_.end()) {
+ unregistered_fds_.push_back(i->second);
+ event_counts_.erase(i);
+ }
+ MaybeRecordAndClear();
+ }
+
+ protected:
+ class DebugOutput {
+ public:
+ friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
+ debug_output.OutputToStream(os);
+ return os;
+ }
+ virtual void OutputToStream(ostream* os) const = 0;
+ virtual ~DebugOutput() {}
+ };
+
+ class FDMaskOutput : public DebugOutput {
+ public:
+ FDMaskOutput(int fd, int mask, const char* function) :
+ fd_(fd), mask_(mask), function_(function) {}
+ virtual void OutputToStream(ostream* os) const {
+ (*os) << "func: " << function_
+ << "\tfd: " << fd_;
+ if (mask_ != 0) {
+ (*os) << "\tmask: " << EventMaskToString(mask_);
+ }
+ }
+ int fd_;
+ int mask_;
+ const char* function_;
+ };
+
+ class EpollWaitOutput : public DebugOutput {
+ public:
+ EpollWaitOutput(int timeout_in_ms,
+ int num_events_generated) :
+ timeout_in_ms_(timeout_in_ms),
+ num_events_generated_(num_events_generated) {}
+ virtual void OutputToStream(ostream* os) const {
+ (*os) << "timeout_in_ms: " << timeout_in_ms_
+ << "\tnum_events_generated: " << num_events_generated_;
+ }
+ protected:
+ int timeout_in_ms_;
+ int num_events_generated_;
+ };
+
+ struct Events {
+ Events() :
+ epoll_in(0),
+ epoll_pri(0),
+ epoll_out(0),
+ epoll_rdnorm(0),
+ epoll_rdband(0),
+ epoll_wrnorm(0),
+ epoll_wrband(0),
+ epoll_msg(0),
+ epoll_err(0),
+ epoll_hup(0),
+ epoll_oneshot(0),
+ epoll_et(0) {}
+
+ void AssignFromMask(int event_mask) {
+ if (event_mask & EPOLLIN) ++epoll_in;
+ if (event_mask & EPOLLPRI) ++epoll_pri;
+ if (event_mask & EPOLLOUT) ++epoll_out;
+ if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
+ if (event_mask & EPOLLRDBAND) ++epoll_rdband;
+ if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
+ if (event_mask & EPOLLWRBAND) ++epoll_wrband;
+ if (event_mask & EPOLLMSG) ++epoll_msg;
+ if (event_mask & EPOLLERR) ++epoll_err;
+ if (event_mask & EPOLLHUP) ++epoll_hup;
+ if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
+ if (event_mask & EPOLLET) ++epoll_et;
+ };
+
+ friend ostream& operator<<(ostream& os, const Events& ev) {
+ if (ev.epoll_in) {
+ os << "\t EPOLLIN: " << ev.epoll_in << "\n";
+ }
+ if (ev.epoll_pri) {
+ os << "\t EPOLLPRI: " << ev.epoll_pri << "\n";
+ }
+ if (ev.epoll_out) {
+ os << "\t EPOLLOUT: " << ev.epoll_out << "\n";
+ }
+ if (ev.epoll_rdnorm) {
+ os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
+ }
+ if (ev.epoll_rdband) {
+ os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n";
+ }
+ if (ev.epoll_wrnorm) {
+ os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
+ }
+ if (ev.epoll_wrband) {
+ os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n";
+ }
+ if (ev.epoll_msg) {
+ os << "\t EPOLLMSG: " << ev.epoll_msg << "\n";
+ }
+ if (ev.epoll_err) {
+ os << "\t EPOLLERR: " << ev.epoll_err << "\n";
+ }
+ if (ev.epoll_hup) {
+ os << "\t EPOLLHUP: " << ev.epoll_hup << "\n";
+ }
+ if (ev.epoll_oneshot) {
+ os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
+ }
+ if (ev.epoll_et) {
+ os << "\t EPOLLET: " << ev.epoll_et << "\n";
+ }
+ return os;
+ }
+
+ unsigned int epoll_in;
+ unsigned int epoll_pri;
+ unsigned int epoll_out;
+ unsigned int epoll_rdnorm;
+ unsigned int epoll_rdband;
+ unsigned int epoll_wrnorm;
+ unsigned int epoll_wrband;
+ unsigned int epoll_msg;
+ unsigned int epoll_err;
+ unsigned int epoll_hup;
+ unsigned int epoll_oneshot;
+ unsigned int epoll_et;
+ };
+
+ vector<DebugOutput*> debug_events_;
+ vector<Events> unregistered_fds_;
+ hash_map<int, Events> event_counts_;
+ int64 num_records_;
+ int64 record_threshold_;
+ };
+
+ void ClearEventRecords() {
+ event_recorder_.Clear();
+ }
+ void WriteEventRecords(ostream* os) const {
+ (*os) << event_recorder_;
+ }
+
+ mutable EventRecorder event_recorder_;
+
+#endif
+
+ private:
+ // Helper functions used in the destructor.
+ void CleanupFDToCBMap();
+ void CleanupTimeToAlarmCBMap();
+
+ // The callback registered to the fds below. As the purpose of their
+ // registration is to wake the epoll server it just clears the pipe and
+ // returns.
+ scoped_ptr<ReadPipeCallback> wake_cb_;
+
+ // A pipe owned by the epoll server. The server will be registered to listen
+ // on read_fd_ and can be woken by Wake() which writes to write_fd_.
+ int read_fd_;
+ int write_fd_;
+
+ // This boolean is checked to see if it is false at the top of the
+ // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
+ // without doing work, and logs to ERROR, or aborts the program (in
+ // DEBUG mode). If so, then it sets the bool to true, does work, and
+ // sets it back to false when done. This catches unwanted recursion.
+ bool in_wait_for_events_and_execute_callbacks_;
+
+ // Returns true when the EpollServer() is being destroyed.
+ bool in_shutdown_;
+
+ DISALLOW_COPY_AND_ASSIGN(EpollServer);
+};
+
+class EpollAlarmCallbackInterface {
+ public:
+ // Summary:
+ // Called when an alarm times out. Invalidates an AlarmRegToken.
+ // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
+ // delete it, as the reference is no longer valid.
+ // Returns:
+ // the unix time (in microseconds) at which this alarm should be signaled
+ // again, or 0 if the alarm should be removed.
+ virtual int64 OnAlarm() = 0;
+
+ // Summary:
+ // Called when the an alarm is registered. Invalidates an AlarmRegToken.
+ // Args:
+ // token: the iterator to the the alarm registered in the alarm map.
+ // WARNING: this token becomes invalid when the alarm fires, is
+ // unregistered, or OnShutdown is called on that alarm.
+ // eps: the epoll server the alarm is registered with.
+ virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
+ EpollServer* eps) = 0;
+
+ // Summary:
+ // Called when the an alarm is unregistered.
+ // WARNING: It is not valid to unregister a callback and then use the token
+ // that was saved to refer to the callback.
+ virtual void OnUnregistration() = 0;
+
+ // Summary:
+ // Called when the epoll server is shutting down.
+ // Invalidates the AlarmRegToken that was given when this alarm was
+ // registered.
+ virtual void OnShutdown(EpollServer* eps) = 0;
+
+ virtual ~EpollAlarmCallbackInterface() {}
+
+ protected:
+ EpollAlarmCallbackInterface() {}
+};
+
+// A simple alarm which unregisters itself on destruction.
+//
+// PLEASE NOTE:
+// Any classes overriding these functions must either call the implementation
+// of the parent class, or is must otherwise make sure that the 'registered_'
+// boolean and the token, 'token_', are updated appropriately.
+class EpollAlarm : public EpollAlarmCallbackInterface {
+ public:
+ EpollAlarm();
+
+ virtual ~EpollAlarm();
+
+ // Marks the alarm as unregistered and returns 0. The return value may be
+ // safely ignored by subclasses.
+ virtual int64 OnAlarm();
+
+ // Marks the alarm as registered, and stores the token.
+ virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
+ EpollServer* eps);
+
+ // Marks the alarm as unregistered.
+ virtual void OnUnregistration();
+
+ // Marks the alarm as unregistered.
+ virtual void OnShutdown(EpollServer* eps);
+
+ // If the alarm was registered, unregister it.
+ void UnregisterIfRegistered();
+
+ bool registered() const { return registered_; }
+
+ const EpollServer* eps() const { return eps_; }
+
+ private:
+ EpollServer::AlarmRegToken token_;
+ EpollServer* eps_;
+ bool registered_;
+};
+
+} // namespace gfe2
+
+#endif // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
+