diff options
author | mbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-04 01:21:22 +0000 |
---|---|---|
committer | mbelshe@chromium.org <mbelshe@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-12-04 01:21:22 +0000 |
commit | 6a4c749008dad827b0699e7de9eb458a0ed6d39f (patch) | |
tree | 38165061573b715fe3a737b9720e2be5f5dfbb62 /net | |
parent | 93ef176c40555a6f3a3895f745a977efe1b5037b (diff) | |
download | chromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.zip chromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.tar.gz chromium_src-6a4c749008dad827b0699e7de9eb458a0ed6d39f.tar.bz2 |
Landing the open source version of the FLIP server.
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/463009
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33766 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net')
24 files changed, 10105 insertions, 0 deletions
diff --git a/net/tools/flip_server/balsa_enums.h b/net/tools/flip_server/balsa_enums.h new file mode 100644 index 0000000..342b3bf --- /dev/null +++ b/net/tools/flip_server/balsa_enums.h @@ -0,0 +1,111 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_BALSA_ENUMS_H_ +#define NET_TOOLS_FLIP_SERVER_BALSA_ENUMS_H_ + +namespace gfe2 { + +struct BalsaFrameEnums { + enum ParseState { + ERROR, + READING_HEADER_AND_FIRSTLINE, + READING_CHUNK_LENGTH, + READING_CHUNK_EXTENSION, + READING_CHUNK_DATA, + READING_CHUNK_TERM, + READING_LAST_CHUNK_TERM, + READING_TRAILER, + READING_UNTIL_CLOSE, + READING_CONTENT, + MESSAGE_FULLY_READ, + NUM_STATES, + }; + + enum ErrorCode { + NO_ERROR = 0, // A sentinel value for convenience, none of the callbacks + // should ever see this error code. + // Header parsing errors + // Note that adding one to many of the REQUEST errors yields the + // appropriate RESPONSE error. + // Particularly, when parsing the first line of a request or response, + // there are three sequences of non-whitespace regardless of whether or + // not it is a request or response. These are listed below, in order. + // + // firstline_a firstline_b firstline_c + // REQ: method request_uri version + // RESP: version statuscode reason + // + // As you can see, the first token is the 'method' field for a request, + // and 'version' field for a response. We call the first non whitespace + // token firstline_a, the second firstline_b, and the third token + // followed by [^\r\n]*) firstline_c. + // + // This organization is important, as it lets us determine the error code + // to use without a branch based on is_response. Instead, we simply add + // is_response to the response error code-- If is_response is true, then + // we'll get the response error code, thanks to the fact that the error + // code numbers are organized to ensure that response error codes always + // precede request error codes. + // | Triggered + // | while processing + // | this NONWS + // | sequence... + NO_STATUS_LINE_IN_RESPONSE, // | + NO_REQUEST_LINE_IN_REQUEST, // | + FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION, // | firstline_a + FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD, // | firstline_a + FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE, // | firstline_b + FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI, // | firstline_b + FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE, // | firstline_c + FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION, // | firstline_c + + FAILED_CONVERTING_STATUS_CODE_TO_INT, + REQUEST_URI_TOO_LONG, // Request URI greater than kMaxUrlLen. + + HEADERS_TOO_LONG, + UNPARSABLE_CONTENT_LENGTH, + // Warning: there may be a body but there was no content-length/chunked + // encoding + MAYBE_BODY_BUT_NO_CONTENT_LENGTH, + + // This is used if a body is required for a request. + REQUIRED_BODY_BUT_NO_CONTENT_LENGTH, + + HEADER_MISSING_COLON, + + // Chunking errors + INVALID_CHUNK_LENGTH, + CHUNK_LENGTH_OVERFLOW, + + // Other errors. + CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO, + CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT, + MULTIPLE_CONTENT_LENGTH_KEYS, + MULTIPLE_TRANSFER_ENCODING_KEYS, + UNKNOWN_TRANSFER_ENCODING, + INVALID_HEADER_FORMAT, + + // A detected internal inconsistency was found. + INTERNAL_LOGIC_ERROR, + + NUM_ERROR_CODES + }; + static const char* ParseStateToString(ParseState error_code); + static const char* ErrorCodeToString(ErrorCode error_code); +}; + +struct BalsaHeadersEnums { + enum ContentLengthStatus { + INVALID_CONTENT_LENGTH, + CONTENT_LENGTH_OVERFLOW, + NO_CONTENT_LENGTH, + VALID_CONTENT_LENGTH, + }; +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_BALSA_ENUMS_H_ + diff --git a/net/tools/flip_server/balsa_frame.cc b/net/tools/flip_server/balsa_frame.cc new file mode 100644 index 0000000..39695cd --- /dev/null +++ b/net/tools/flip_server/balsa_frame.cc @@ -0,0 +1,1571 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/balsa_frame.h" + +#include <assert.h> +#include <emmintrin.h> +#include <strings.h> + +#include <limits> +#include <iostream> +#include <string> +#include <utility> +#include <vector> + +#include "base/logging.h" +#include "base/port.h" +#include "net/tools/flip_server/balsa_enums.h" +#include "net/tools/flip_server/balsa_headers.h" +#include "net/tools/flip_server/balsa_visitor_interface.h" +#include "net/tools/flip_server/buffer_interface.h" +#include "net/tools/flip_server/simple_buffer.h" +#ifdef CHROMIUM +#else +#include "strings/split.h" +#include "strings/stringpiece.h" // for StringPiece +#include "strings/stringpiece_utils.h" +#endif + +namespace gfe2 { + +// Constants holding some header names for headers which can affect the way the +// HTTP message is framed, and so must be processed specially: +static const char kContentLength[] = "content-length"; +static const size_t kContentLengthSize = sizeof(kContentLength) - 1; +static const char kTransferEncoding[] = "transfer-encoding"; +static const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1; + +void BalsaFrame::Reset() { + last_char_was_slash_r_ = false; + saw_non_newline_char_ = false; + start_was_space_ = true; + chunk_length_character_extracted_ = false; + // is_request_ = true; // not reset between messages. + // request_was_head_ = false; // not reset between messages. + // max_header_length_ = 4096; // not reset between messages. + // max_request_uri_length_ = 2048; // not reset between messages. + // visitor_ = &do_nothing_visitor_; // not reset between messages. + chunk_length_remaining_ = 0; + content_length_remaining_ = 0; + last_slash_n_loc_ = NULL; + last_recorded_slash_n_loc_ = NULL; + last_slash_n_idx_ = 0; + term_chars_ = 0; + parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE; + last_error_ = BalsaFrameEnums::NO_ERROR; + lines_.clear(); + if (headers_ != NULL) { + headers_->Clear(); + } +} + +const char* BalsaFrameEnums::ParseStateToString( + BalsaFrameEnums::ParseState error_code) { + switch (error_code) { + case ERROR: + return "ERROR"; + case READING_HEADER_AND_FIRSTLINE: + return "READING_HEADER_AND_FIRSTLINE"; + case READING_CHUNK_LENGTH: + return "READING_CHUNK_LENGTH"; + case READING_CHUNK_EXTENSION: + return "READING_CHUNK_EXTENSION"; + case READING_CHUNK_DATA: + return "READING_CHUNK_DATA"; + case READING_CHUNK_TERM: + return "READING_CHUNK_TERM"; + case READING_LAST_CHUNK_TERM: + return "READING_LAST_CHUNK_TERM"; + case READING_TRAILER: + return "READING_TRAILER"; + case READING_UNTIL_CLOSE: + return "READING_UNTIL_CLOSE"; + case READING_CONTENT: + return "READING_CONTENT"; + case MESSAGE_FULLY_READ: + return "MESSAGE_FULLY_READ"; + case NUM_STATES: + return "UNKNOWN_STATE"; + } + return "UNKNOWN_STATE"; +} + +const char* BalsaFrameEnums::ErrorCodeToString( + BalsaFrameEnums::ErrorCode error_code) { + switch (error_code) { + case NO_ERROR: + return "NO_ERROR"; + case NO_STATUS_LINE_IN_RESPONSE: + return "NO_STATUS_LINE_IN_RESPONSE"; + case NO_REQUEST_LINE_IN_REQUEST: + return "NO_REQUEST_LINE_IN_REQUEST"; + case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION: + return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION"; + case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD: + return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD"; + case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE: + return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE"; + case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI: + return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI"; + case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE: + return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE"; + case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION: + return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION"; + case FAILED_CONVERTING_STATUS_CODE_TO_INT: + return "FAILED_CONVERTING_STATUS_CODE_TO_INT"; + case REQUEST_URI_TOO_LONG: + return "REQUEST_URI_TOO_LONG"; + case HEADERS_TOO_LONG: + return "HEADERS_TOO_LONG"; + case UNPARSABLE_CONTENT_LENGTH: + return "UNPARSABLE_CONTENT_LENGTH"; + case MAYBE_BODY_BUT_NO_CONTENT_LENGTH: + return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH"; + case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH: + return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH"; + case HEADER_MISSING_COLON: + return "HEADER_MISSING_COLON"; + case INVALID_CHUNK_LENGTH: + return "INVALID_CHUNK_LENGTH"; + case CHUNK_LENGTH_OVERFLOW: + return "CHUNK_LENGTH_OVERFLOW"; + case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO: + return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO"; + case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT: + return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT"; + case MULTIPLE_CONTENT_LENGTH_KEYS: + return "MULTIPLE_CONTENT_LENGTH_KEYS"; + case MULTIPLE_TRANSFER_ENCODING_KEYS: + return "MULTIPLE_TRANSFER_ENCODING_KEYS"; + case UNKNOWN_TRANSFER_ENCODING: + return "UNKNOWN_TRANSFER_ENCODING"; + case INVALID_HEADER_FORMAT: + return "INVALID_HEADER_FORMAT"; + case INTERNAL_LOGIC_ERROR: + return "INTERNAL_LOGIC_ERROR"; + case NUM_ERROR_CODES: + return "UNKNOWN_ERROR"; + } + return "UNKNOWN_ERROR"; +} + +// Summary: +// Parses the first line of either a request or response. +// Note that in the case of a detected warning, error_code will be set +// but the function will not return false. +// Exactly zero or one warning or error (but not both) may be detected +// by this function. +// Note that this function will not write the data of the first-line +// into the header's buffer (that should already have been done elsewhere). +// +// Pre-conditions: +// begin != end +// *begin should be a character which is > ' '. This implies that there +// is at least one non-whitespace characters between [begin, end). +// headers is a valid pointer to a BalsaHeaders class. +// error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value. +// Entire first line must exist between [begin, end) +// Exactly zero or one newlines -may- exist between [begin, end) +// [begin, end) should exist in the header's buffer. +// +// Side-effects: +// headers will be modified +// error_code may be modified if either a warning or error is detected +// +// Returns: +// True if no error (as opposed to warning) is detected. +// False if an error (as opposed to warning) is detected. + +// +// If there is indeed non-whitespace in the line, then the following +// will take care of this for you: +// while (*begin <= ' ') ++begin; +// ProcessFirstLine(begin, end, is_request, &headers, &error_code); +// +bool ParseHTTPFirstLine(const char* begin, + const char* end, + bool is_request, + size_t max_request_uri_length, + BalsaHeaders* headers, + BalsaFrameEnums::ErrorCode* error_code) { + const char* current = begin; + // HTTP firstlines all have the following structure: + // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF + // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n" + // ws1 nws1 ws2 nws2 ws3 nws3 ws4 + // | [-------) [-------) [----------------) + // REQ: method request_uri version + // RESP: version statuscode reason + // + // The first NONWS->LWS component we'll call firstline_a. + // The second firstline_b, and the third firstline_c. + // + // firstline_a goes from nws1 to (but not including) ws2 + // firstline_b goes from nws2 to (but not including) ws3 + // firstline_c goes from nws3 to (but not including) ws4 + // + // In the code: + // ws1 == whitespace_1_idx_ + // nws1 == non_whitespace_1_idx_ + // ws2 == whitespace_2_idx_ + // nws2 == non_whitespace_2_idx_ + // ws3 == whitespace_3_idx_ + // nws3 == non_whitespace_3_idx_ + // ws4 == whitespace_4_idx_ + + // Kill all whitespace (including '\r\n') at the end of the line. + --end; + if (*end != '\n') { + *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; + LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" + << headers->OriginalHeadersForDebugging(); + return false; + } + while (begin < end && *end <= ' ') { + --end; + } + DCHECK(*end != '\n'); + if (*end == '\n') { + *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; + LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" + << headers->OriginalHeadersForDebugging(); + return false; + } + ++end; + + // The two following statements should not be possible. + if (end == begin) { + *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; + LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" + << headers->OriginalHeadersForDebugging(); + return false; + } + + // whitespace_1_idx_ + headers->whitespace_1_idx_ = current - begin; + // This loop is commented out as it is never used in current code. This is + // true only because we don't begin parsing the headers at all until we've + // encountered a non whitespace character at the beginning of the stream, at + // which point we begin our demarcation of header-start. If we did -not- do + // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop + // would be necessary for the proper functioning of this parsing. + // This is left here as this function may (in the future) be refactored out + // of the BalsaFrame class so that it may be shared between code in + // BalsaFrame and BalsaHeaders (where it would be used in some variant of the + // set_first_line() function (at which point it would be necessary). +#if 0 + while (*current <= ' ') { + ++current; + } +#endif + // non_whitespace_1_idx_ + headers->non_whitespace_1_idx_ = current - begin; + do { + // The first time through, we're guaranteed that the current character + // won't be a whitespace (else the loop above wouldn't have terminated). + // That implies that we're guaranteed to get at least one non-whitespace + // character if we get into this loop at all. + ++current; + if (current == end) { + headers->whitespace_2_idx_ = current - begin; + headers->non_whitespace_2_idx_ = current - begin; + headers->whitespace_3_idx_ = current - begin; + headers->non_whitespace_3_idx_ = current - begin; + headers->whitespace_4_idx_ = current - begin; + // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request + // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response + *error_code = + static_cast<BalsaFrameEnums::ErrorCode>( + BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + + is_request); + if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + return false; + } + goto output_exhausted; + } + } while (*current > ' '); + // whitespace_2_idx_ + headers->whitespace_2_idx_ = current - begin; + do { + ++current; + // Note that due to the loop which consumes all of the whitespace + // at the end of the line, current can never == end while in this function. + } while (*current <= ' '); + // non_whitespace_2_idx_ + headers->non_whitespace_2_idx_ = current - begin; + do { + ++current; + if (current == end) { + headers->whitespace_3_idx_ = current - begin; + headers->non_whitespace_3_idx_ = current - begin; + headers->whitespace_4_idx_ = current - begin; + // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request + // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response + *error_code = + static_cast<BalsaFrameEnums::ErrorCode>( + BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE + + is_request); + goto output_exhausted; + } + } while (*current > ' '); + // whitespace_3_idx_ + headers->whitespace_3_idx_ = current - begin; + do { + ++current; + // Note that due to the loop which consumes all of the whitespace + // at the end of the line, current can never == end while in this function. + } while (*current <= ' '); + // non_whitespace_3_idx_ + headers->non_whitespace_3_idx_ = current - begin; + headers->whitespace_4_idx_ = end - begin; + + output_exhausted: + // Note that we don't fail the parse immediately when parsing of the + // firstline fails. Depending on the protocol type, we may want to accept + // a firstline with only one or two elements, e.g., for HTTP/0.9: + // GET\r\n + // or + // GET /\r\n + // should be parsed without issue (though the visitor should know that + // parsing the entire line was not exactly as it should be). + // + // Eventually, these errors may be removed alltogether, as the visitor can + // detect them on its own by examining the size of the various fields. + // headers->set_first_line(non_whitespace_1_idx_, current); + + if (is_request) { + if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > + max_request_uri_length) { + // For requests, we need at least the method. We could assume that a + // blank URI means "/". If version isn't stated, it should be assumed + // to be HTTP/0.9 by the visitor. + *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; + return false; + } + } else { + headers->parsed_response_code_ = 0; + { + const char* parsed_response_code_current = + begin + headers->non_whitespace_2_idx_; + const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; + const size_t kMaxDiv10 = numeric_limits<size_t>::max() / 10; + + // Convert a string of [0-9]* into an int. + // Note that this allows for the conversion of response codes which + // are outside the bounds of normal HTTP response codes (no checking + // is done to ensure that these are valid-- they're merely parsed)! + while (parsed_response_code_current < parsed_response_code_end) { + if (*parsed_response_code_current < '0' || + *parsed_response_code_current > '9') { + *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; + return false; + } + size_t status_code_x_10 = headers->parsed_response_code_ * 10; + uint8 c = *parsed_response_code_current - '0'; + if ((headers->parsed_response_code_ > kMaxDiv10) || + (numeric_limits<size_t>::max() - status_code_x_10) < c) { + // overflow. + *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; + return false; + } + headers->parsed_response_code_ = status_code_x_10 + c; + ++parsed_response_code_current; + } + } + } + return true; +} + +// begin - beginning of the firstline +// end - end of the firstline +// +// A precondition for this function is that there is non-whitespace between +// [begin, end). If this precondition is not met, the function will not perform +// as expected (and bad things may happen, and it will eat your first, second, +// and third unborn children!). +// +// Another precondition for this function is that [begin, end) includes +// at most one newline, which must be at the end of the line. +void BalsaFrame::ProcessFirstLine(const char* begin, const char* end) { + BalsaFrameEnums::ErrorCode previous_error = last_error_; + if (!ParseHTTPFirstLine(begin, + end, + is_request_, + max_request_uri_length_, + headers_, + &last_error_)) { + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleHeaderError(this); + return; + } + if (previous_error != last_error_) { + visitor_->HandleHeaderWarning(this); + } + + if (is_request_) { + int version_length = + headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_; + visitor_->ProcessRequestFirstLine( + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_2_idx_, + headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, + begin + headers_->non_whitespace_3_idx_, + version_length); + if (version_length == 0) + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + } else { + visitor_->ProcessResponseFirstLine( + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_1_idx_, + headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, + begin + headers_->non_whitespace_2_idx_, + headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, + begin + headers_->non_whitespace_3_idx_, + headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_); + } +} + +// 'stream_begin' points to the first character of the headers buffer. +// 'line_begin' points to the first character of the line. +// 'current' points to a char which is ':'. +// 'line_end' points to the position of '\n' + 1. +// 'line_begin' points to the position of first character of line. +void BalsaFrame::CleanUpKeyValueWhitespace( + const char* stream_begin, + const char* line_begin, + const char* current, + const char* line_end, + HeaderLineDescription* current_header_line) { + const char* colon_loc = current; + DCHECK_LT(colon_loc, line_end); + DCHECK_EQ(':', *colon_loc); + DCHECK_EQ(':', *current); + DCHECK_GE(' ', *line_end) << "\"" << string(line_begin, line_end) << "\""; + + // TODO(fenix): Investigate whether or not the bounds tests in the + // while loops here are redundant, and if so, remove them. + --current; + while (current > line_begin && *current <= ' ') --current; + current += (current != colon_loc); + current_header_line->key_end_idx = current - stream_begin; + + current = colon_loc; + DCHECK_EQ(':', *current); + ++current; + while (current < line_end && *current <= ' ') ++current; + current_header_line->value_begin_idx = current - stream_begin; + + DCHECK_GE(current_header_line->key_end_idx, + current_header_line->first_char_idx); + DCHECK_GE(current_header_line->value_begin_idx, + current_header_line->key_end_idx); + DCHECK_GE(current_header_line->last_char_idx, + current_header_line->value_begin_idx); +} + +inline void BalsaFrame::FindColonsAndParseIntoKeyValue() { + DCHECK(!lines_.empty()); + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + // The last line is always just a newline (and is uninteresting). + const Lines::size_type lines_size_m1 = lines_.size() - 1; +#if __SSE2__ + const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':', + ':', ':', ':', ':', ':', ':', ':', ':'}; + const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; +#endif // __SSE2__ + const char* current = stream_begin + lines_[1].first; + // This code is a bit more subtle than it may appear at first glance. + // This code looks for a colon in the current line... but it also looks + // beyond the current line. If there is no colon in the current line, then + // for each subsequent line (until the colon which -has- been found is + // associated with a line), no searching for a colon will be performed. In + // this way, we minimize the amount of bytes we have scanned for a colon. + for (Lines::size_type i = 1; i < lines_size_m1;) { + const char* line_begin = stream_begin + lines_[i].first; + + // Here we handle possible continuations. Note that we do not replace + // the '\n' in the line before a continuation (at least, as of now), + // which implies that any code which looks for a value must deal with + // "\r\n", etc -within- the line (and not just at the end of it). + for (++i; i < lines_size_m1; ++i) { + const char c = *(stream_begin + lines_[i].first); + if (c > ' ') { + // Not a continuation, so stop. Note that if the 'original' i = 1, + // and the next line is not a continuation, we'll end up with i = 2 + // when we break. This handles the incrementing of i for the outer + // loop. + break; + } + } + const char* line_end = stream_begin + lines_[i - 1].second; + DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); + + // We cleanup the whitespace at the end of the line before doing anything + // else of interest as it allows us to do nothing when irregularly formatted + // headers are parsed (e.g. those with only keys, only values, or no colon). + // + // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. + --line_end; + DCHECK_EQ('\n', *line_end) << "\"" << string(line_begin, line_end) << "\""; + while (*line_end <= ' ' && line_end > line_begin) { + --line_end; + } + ++line_end; + DCHECK_GE(' ', *line_end); + DCHECK_LT(line_begin, line_end); + + // We use '0' for the block idx, because we're always writing to the first + // block from the framer (we do this because the framer requires that the + // entire header sequence be in a contiguous buffer). + headers_->header_lines_.push_back( + HeaderLineDescription(line_begin - stream_begin, + line_end - stream_begin, + line_end - stream_begin, + line_end - stream_begin, + 0)); + if (current >= line_end) { + last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; + visitor_->HandleHeaderWarning(this); + // Then the next colon will not be found within this header line-- time + // to try again with another header-line. + continue; + } else if (current < line_begin) { + // When this condition is true, the last detected colon was part of a + // previous line. We reset to the beginning of the line as we don't care + // about the presence of any colon before the beginning of the current + // line. + current = line_begin; + } +#if __SSE2__ + while (current < header_lines_end_m16) { + __m128i header_bytes = + _mm_loadu_si128(reinterpret_cast<const __m128i *>(current)); + __m128i colon_cmp = + _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons)); + int colon_msk = _mm_movemask_epi8(colon_cmp); + if (colon_msk == 0) { + current += 16; + continue; + } + current += (ffs(colon_msk) - 1); + if (current > line_end) { + break; + } + goto found_colon; + } +#endif // __SSE2__ + for (; current < line_end; ++current) { + if (*current != ':') { + continue; + } + goto found_colon; + } + // If we've gotten to here, then there was no colon + // in the line. The arguments we passed into the construction + // for the HeaderLineDescription object should be OK-- it assumes + // that the entire content is 'key' by default (which is true, as + // there was no colon, there can be no value). Note that this is a + // construct which is technically not allowed by the spec. + last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; + visitor_->HandleHeaderWarning(this); + continue; + found_colon: + DCHECK_EQ(*current, ':'); + DCHECK_LE(current - stream_begin, line_end - stream_begin); + DCHECK_LE(stream_begin - stream_begin, current - stream_begin); + + HeaderLineDescription& current_header_line = headers_->header_lines_.back(); + current_header_line.key_end_idx = current - stream_begin; + current_header_line.value_begin_idx = current_header_line.key_end_idx; + if (current < line_end) { + ++current_header_line.key_end_idx; + + CleanUpKeyValueWhitespace(stream_begin, + line_begin, + current, + line_end, + ¤t_header_line); + } + } +} + +void BalsaFrame::ProcessContentLengthLine( + HeaderLines::size_type line_idx, + BalsaHeadersEnums::ContentLengthStatus* status, + size_t* length) { + const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + const char* line_end = stream_begin + header_line.last_char_idx; + const char* value_begin = (stream_begin + header_line.value_begin_idx); + + if (value_begin >= line_end) { + // There is no non-whitespace value data. +#if DEBUGFRAMER + LOG(INFO) << "invalid content-length -- no non-whitespace value data"; +#endif + *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; + return; + } + + *length = 0; + while (value_begin < line_end) { + if (*value_begin < '0' || *value_begin > '9') { + // bad! content-length found, and couldn't parse all of it! + *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; +#if DEBUGFRAMER + LOG(INFO) << "invalid content-length - non numeric character detected"; +#endif // DEBUGFRAMER + return; + } + const size_t kMaxDiv10 = numeric_limits<size_t>::max() / 10; + size_t length_x_10 = *length * 10; + const char c = *value_begin - '0'; + if (*length > kMaxDiv10 || + (numeric_limits<size_t>::max() - length_x_10) < c) { + *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW; +#if DEBUGFRAMER + LOG(INFO) << "content-length overflow"; +#endif // DEBUGFRAMER + return; + } + *length = length_x_10 + c; + ++value_begin; + } +#if DEBUGFRAMER + LOG(INFO) << "content_length parsed: " << *length; +#endif // DEBUGFRAMER + *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; +} + +void BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { + const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + const char* line_end = stream_begin + header_line.last_char_idx; + const char* value_begin = stream_begin + header_line.value_begin_idx; + size_t value_length = line_end - value_begin; + + if ((value_length == 7) && + !strncasecmp(value_begin, "chunked", 7)) { + headers_->transfer_encoding_is_chunked_ = true; + } else if ((value_length == 8) && + !strncasecmp(value_begin, "identity", 8)) { + headers_->transfer_encoding_is_chunked_ = false; + } else { + last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleHeaderError(this); + return; + } +} + +namespace { +bool SplitStringPiece(StringPiece original, char delim, + StringPiece* before, StringPiece* after) { + const char* p = original.data(); + const char* end = p + original.size(); + + while (p != end) { + if (*p == delim) { + ++p; + } else { + const char* start = p; + while (++p != end && *p != delim) { + // Skip to the next occurence of the delimiter. + } + *before = StringPiece(start, p - start); + if (p != end) + *after = StringPiece(p + 1, end - (p + 1)); + else + *after = StringPiece(""); + StringPieceUtils::RemoveWhitespaceContext(before); + StringPieceUtils::RemoveWhitespaceContext(after); + return true; + } + } + + *before = original; + *after = ""; + return false; +} + +// TODO(phython): Fix this function to properly deal with quoted values. +// E.g. ";;foo", "\";;\"", or \"aa; +// The last example, the semi-colon is a separator between extensions. +void ProcessChunkExtensionsManual(StringPiece all_extensions, + BalsaHeaders* extensions) { + StringPiece extension; + StringPiece remaining; + StringPieceUtils::RemoveWhitespaceContext(&all_extensions); + SplitStringPiece(all_extensions, ';', &extension, &remaining); + while (!extension.empty()) { + StringPiece key; + StringPiece value; + SplitStringPiece(extension, '=', &key, &value); + if (!value.empty()) { + // Strip quotation marks if they exist. + if (!value.empty() && value[0] == '"') + value.remove_prefix(1); + if (!value.empty() && value[value.length() - 1] == '"') + value.remove_suffix(1); + } + + extensions->AppendHeader(key, value); + + StringPieceUtils::RemoveWhitespaceContext(&remaining); + SplitStringPiece(remaining, ';', &extension, &remaining); + } +} + +// TODO(phython): Fix this function to properly deal with quoted values. +// E.g. ";;foo", "\";;\"", or \"aa; +// The last example, the semi-colon is a separator between extensions. +void ProcessChunkExtensionsGoogle3(const char* input, size_t size, + BalsaHeaders* extensions) { + vector<StringPiece> key_values; + SplitStringPieceToVector(StringPiece(input, size), ";", &key_values, true); + for (int i = 0; i < key_values.size(); ++i) { + StringPiece key = key_values[i].substr(0, key_values[i].find('=')); + StringPiece value; + if (key.length() < key_values[i].length()) { + value = key_values[i].substr(key.length() + 1); + // Remove any leading and trailing whitespace. + StringPieceUtils::RemoveWhitespaceContext(&value); + + // Strip quotation marks if they exist. + if (!value.empty() && value[0] == '"') + value.remove_prefix(1); + if (!value.empty() && value[value.length() - 1] == '"') + value.remove_suffix(1); + } + + // Strip the key whitespace after checking that there is a value. + StringPieceUtils::RemoveWhitespaceContext(&key); + extensions->AppendHeader(key, value); + } +} + +} // anonymous namespace + +void BalsaFrame::ProcessChunkExtensions(const char* input, size_t size, + BalsaHeaders* extensions) { +#if 0 + ProcessChunkExtensionsGoogle3(input, size, extensions); +#else + ProcessChunkExtensionsManual(StringPiece(input, size), extensions); +#endif +} + +void BalsaFrame::ProcessHeaderLines() { + HeaderLines::size_type content_length_idx = 0; + HeaderLines::size_type transfer_encoding_idx = 0; + + DCHECK(!lines_.empty()); +#if DEBUGFRAMER + LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; +#endif // DEBUGFRAMER + + // There is no need to attempt to process headers if no header lines exist. + // There are at least two lines in the message which are not header lines. + // These two non-header lines are the first line of the message, and the + // last line of the message (which is an empty line). + // Thus, we test to see if we have more than two lines total before attempting + // to parse any header lines. + if (lines_.size() > 2) { + const char* stream_begin = headers_->OriginalHeaderStreamBegin(); + + // Then, for the rest of the header data, we parse these into key-value + // pairs. + FindColonsAndParseIntoKeyValue(); + // At this point, we've parsed all of the headers. Time to look for those + // headers which we require for framing. + const HeaderLines::size_type + header_lines_size = headers_->header_lines_.size(); + for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { + const HeaderLineDescription& current_header_line = + headers_->header_lines_[i]; + const char* key_begin = + (stream_begin + current_header_line.first_char_idx); + const char* key_end = (stream_begin + current_header_line.key_end_idx); + const size_t key_len = key_end - key_begin; + const char c = *key_begin; +#if DEBUGFRAMER + LOG(INFO) << "[" << i << "]: " << string(key_begin, key_len) + << " c: '" << c << "' key_len: " << key_len; +#endif // DEBUGFRAMER + // If a header begins with either lowercase or uppercase 'c' or 't', then + // the header may be one of content-length, connection, content-encoding + // or transfer-encoding. These headers are special, as they change the way + // that the message is framed, and so the framer is required to search + // for them. + + + if (c == 'c' || c == 'C') { + if ((key_len == kContentLengthSize) && + 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { + BalsaHeadersEnums::ContentLengthStatus content_length_status = + BalsaHeadersEnums::NO_CONTENT_LENGTH; + size_t length = 0; + ProcessContentLengthLine(i, &content_length_status, &length); + if (content_length_idx != 0) { // then we've already seen one! + if ((headers_->content_length_status_ != content_length_status) || + ((headers_->content_length_status_ == + BalsaHeadersEnums::VALID_CONTENT_LENGTH) && + length != headers_->content_length_)) { + last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleHeaderError(this); + return; + } + continue; + } else { + content_length_idx = i + 1; + headers_->content_length_status_ = content_length_status; + headers_->content_length_ = length; + content_length_remaining_ = length; + } + + } + } else if (c == 't' || c == 'T') { + if ((key_len == kTransferEncodingSize) && + 0 == strncasecmp(key_begin, kTransferEncoding, + kTransferEncodingSize)) { + if (transfer_encoding_idx != 0) { + last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleHeaderError(this); + return; + } + transfer_encoding_idx = i + 1; + } + } else if (i == 0 && (key_len == 0 || c == ' ')) { + last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleHeaderError(this); + return; + } + } + if (headers_->transfer_encoding_is_chunked_) { + headers_->content_length_ = 0; + headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + content_length_remaining_ = 0; + } + if (transfer_encoding_idx != 0) { + ProcessTransferEncodingLine(transfer_encoding_idx - 1); + } + } +} + +void BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { + // For responses, can't have a body if the request was a HEAD, or if it is + // one of these response-codes. rfc2616 section 4.3 + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + if (is_request_ || + !(request_was_head_ || + (headers_->parsed_response_code_ >= 100 && + headers_->parsed_response_code_ < 200) || + (headers_->parsed_response_code_ == 204) || + (headers_->parsed_response_code_ == 304))) { + // Then we can have a body. + if (headers_->transfer_encoding_is_chunked_) { + // Note that + // if ( Transfer-Encoding: chunked && Content-length: ) + // then Transfer-Encoding: chunked trumps. + // This is as specified in the spec. + // rfc2616 section 4.4.3 + parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; + } else { + // Errors parsing content-length definitely can cause + // protocol errors/warnings + switch (headers_->content_length_status_) { + // If we have a content-length, and it is parsed + // properly, there are two options. + // 1) zero content, in which case the message is done, and + // 2) nonzero content, in which case we have to + // consume the body. + case BalsaHeadersEnums::VALID_CONTENT_LENGTH: + if (headers_->content_length_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + } else { + parse_state_ = BalsaFrameEnums::READING_CONTENT; + } + break; + case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: + case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: + // If there were characters left-over after parsing the + // content length, we should flag an error and stop. + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; + visitor_->HandleHeaderError(this); + break; + // We can have: no transfer-encoding, no content length, and no + // connection: close... + // Unfortunately, this case doesn't seem to be covered in the spec. + // We'll assume that the safest thing to do here is what the google + // binaries before 2008 already do, which is to assume that + // everything until the connection is closed is body. + case BalsaHeadersEnums::NO_CONTENT_LENGTH: + if (is_request_) { + StringPiece method = headers_->request_method(); + // POSTs and PUTs should have a detectable body length. If they + // do not we consider it an error. + if ((method.size() == 4 && + strncmp(method.data(), "POST", 4) == 0) || + (method.size() == 3 && + strncmp(method.data(), "PUT", 3) == 0)) { + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = + BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; + visitor_->HandleHeaderError(this); + break; + } + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + } else { + parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; + last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; + visitor_->HandleHeaderWarning(this); + } + break; + // The COV_NF_... statements here provide hints to the apparatus + // which computes coverage reports/ratios that this code is never + // intended to be executed, and should technically be impossible. + // COV_NF_START + default: + LOG(FATAL) << "Saw a content_length_status: " + << headers_->content_length_status_ << " which is unknown."; + // COV_NF_END + } + } + } +} + +size_t BalsaFrame::ProcessHeaders(const char* message_start, + size_t message_length) { + const char* const original_message_start = message_start; + const char* const message_end = message_start + message_length; + const char* message_current = message_start; + const char* checkpoint = message_start; + + if (message_length == 0) { + goto bottom; + } + + while (message_current < message_end) { + size_t base_idx = headers_->GetReadableBytesFromHeaderStream(); + + // Yes, we could use strchr (assuming null termination), or + // memchr, but as it turns out that is slower than this tight loop + // for the input that we see. + if (!saw_non_newline_char_) { + do { + const char c = *message_current; + if (c != '\r' && c != '\n') { + if (c <= ' ') { + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST; + visitor_->HandleHeaderError(this); + goto bottom; + } else { + saw_non_newline_char_ = true; + checkpoint = message_start = message_current; + goto read_real_message; + } + } + ++message_current; + } while (message_current < message_end); + goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks + } else { + read_real_message: + // Note that SSE2 can be enabled on certain piii platforms. +#if __SSE2__ + { + const char* const message_end_m16 = message_end - 16; + __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' }; + while (message_current < message_end_m16) { + // What this does (using compiler intrinsics): + // + // Load 16 '\n's into an xmm register + // Load 16 bytes of currennt message into an xmm register + // Do byte-wise equals on those two xmm registers + // Take the first bit of each byte, and put that into the first + // 16 bits of a mask + // If the mask is zero, no '\n' found. increment by 16 and try again + // Else scan forward to find the first set bit. + // Increment current by the index of the first set bit + // (ffs returns index of first set bit + 1) + __m128i msg_bytes = + _mm_loadu_si128(const_cast<__m128i *>( + reinterpret_cast<const __m128i *>(message_current))); + __m128i newline_cmp = + _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines)); + int newline_msk = _mm_movemask_epi8(newline_cmp); + if (newline_msk == 0) { + message_current += 16; + continue; + } + message_current += (ffs(newline_msk) - 1); + const size_t relative_idx = message_current - message_start; + const size_t message_current_idx = 1 + base_idx + relative_idx; + lines_.push_back(make_pair(last_slash_n_idx_, message_current_idx)); + if (lines_.size() == 1) { + headers_->WriteFromFramer(checkpoint, + 1 + message_current - checkpoint); + checkpoint = message_current + 1; + const char* begin = headers_->OriginalHeaderStreamBegin(); +#if DEBUGFRAMER + LOG(INFO) << "First line " << string(begin, lines_[0].second); + LOG(INFO) << "is_request_: " << is_request_; +#endif + ProcessFirstLine(begin, begin + lines_[0].second); + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) + goto process_lines; + else if (parse_state_ == BalsaFrameEnums::ERROR) + goto bottom; + } + const size_t chars_since_last_slash_n = (message_current_idx - + last_slash_n_idx_); + last_slash_n_idx_ = message_current_idx; + if (chars_since_last_slash_n > 2) { + // We have a slash-n, but the last slash n was + // more than 2 characters away from this. Thus, we know + // that this cannot be an end-of-header. + ++message_current; + continue; + } + if ((chars_since_last_slash_n == 1) || + (((message_current > message_start) && + (*(message_current - 1) == '\r')) || + (last_char_was_slash_r_))) { + goto process_lines; + } + ++message_current; + } + } +#endif // __SSE2__ + while (message_current < message_end) { + if (*message_current != '\n') { + ++message_current; + continue; + } + const size_t relative_idx = message_current - message_start; + const size_t message_current_idx = 1 + base_idx + relative_idx; + lines_.push_back(make_pair(last_slash_n_idx_, message_current_idx)); + if (lines_.size() == 1) { + headers_->WriteFromFramer(checkpoint, + 1 + message_current - checkpoint); + checkpoint = message_current + 1; + const char* begin = headers_->OriginalHeaderStreamBegin(); +#if DEBUGFRAMER + LOG(INFO) << "First line " << string(begin, lines_[0].second); + LOG(INFO) << "is_request_: " << is_request_; +#endif + ProcessFirstLine(begin, begin + lines_[0].second); + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) + goto process_lines; + else if (parse_state_ == BalsaFrameEnums::ERROR) + goto bottom; + } + const size_t chars_since_last_slash_n = (message_current_idx - + last_slash_n_idx_); + last_slash_n_idx_ = message_current_idx; + if (chars_since_last_slash_n > 2) { + // false positive. + ++message_current; + continue; + } + if ((chars_since_last_slash_n == 1) || + (((message_current > message_start) && + (*(message_current - 1) == '\r')) || + (last_char_was_slash_r_))) { + goto process_lines; + } + ++message_current; + } + } + continue; + process_lines: + ++message_current; + DCHECK(message_current >= message_start); + if (message_current > message_start) { + headers_->WriteFromFramer(checkpoint, message_current - checkpoint); + } + + // Check if we have exceeded maximum headers length + // Although we check for this limit before and after we call this function + // we check it here as well to make sure that in case the visitor changed + // the max_header_length_ (for example after processing the first line) + // we handle it gracefully. + if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) { + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; + visitor_->HandleHeaderError(this); + goto bottom; + } + + // Since we know that we won't be writing any more bytes of the header, + // we tell that to the headers object. The headers object may make + // more efficient allocation decisions when this is signaled. + headers_->DoneWritingFromFramer(); + { + const char* readable_ptr = NULL; + size_t readable_size = 0; + headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size); + visitor_->ProcessHeaderInput(readable_ptr, readable_size); + } + + // Ok, now that we've written everything into our header buffer, it is + // time to process the header lines (extract proper values for headers + // which are important for framing). + ProcessHeaderLines(); + if (parse_state_ == BalsaFrameEnums::ERROR) { + goto bottom; + } + AssignParseStateAfterHeadersHaveBeenParsed(); + if (parse_state_ == BalsaFrameEnums::ERROR) { + goto bottom; + } + visitor_->ProcessHeaders(*headers_); + visitor_->HeaderDone(); + if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) { + visitor_->MessageDone(); + } + goto bottom; + } + // If we've gotten to here, it means that we've consumed all of the + // available input. We need to record whether or not the last character we + // saw was a '\r' so that a subsequent call to ProcessInput correctly finds + // a header framing that is split across the two calls. + last_char_was_slash_r_ = (*(message_end - 1) == '\r'); + DCHECK(message_current >= message_start); + if (message_current > message_start) { + headers_->WriteFromFramer(checkpoint, message_current - checkpoint); + } + bottom: + return message_current - original_message_start; +} + + +size_t BalsaFrame::BytesSafeToSplice() const { + switch (parse_state_) { + case BalsaFrameEnums::READING_CHUNK_DATA: + return chunk_length_remaining_; + case BalsaFrameEnums::READING_UNTIL_CLOSE: + return numeric_limits<size_t>::max(); + case BalsaFrameEnums::READING_CONTENT: + return content_length_remaining_; + default: + return 0; + } +} + +void BalsaFrame::BytesSpliced(size_t bytes_spliced) { + switch (parse_state_) { + case BalsaFrameEnums::READING_CHUNK_DATA: + if (chunk_length_remaining_ >= bytes_spliced) { + chunk_length_remaining_ -= bytes_spliced; + if (chunk_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; + } + return; + } else { + last_error_ = + BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; + goto error_exit; + } + + case BalsaFrameEnums::READING_UNTIL_CLOSE: + return; + + case BalsaFrameEnums::READING_CONTENT: + if (content_length_remaining_ >= bytes_spliced) { + content_length_remaining_ -= bytes_spliced; + if (content_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->MessageDone(); + } + return; + } else { + last_error_ = + BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; + goto error_exit; + } + + default: + last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; + goto error_exit; + } + + error_exit: + parse_state_ = BalsaFrameEnums::ERROR; + visitor_->HandleBodyError(this); +}; + +// You may note that the state-machine contained within this function has both +// switch and goto labels for nearly the same thing. For instance, the +// following two labels refer to the same code block: +// label_reading_chunk_data: +// case BalsaFrameEnums::READING_CHUNK_DATA: +// The 'case' statement is required for the switch statement which occurs when +// ProcessInput is invoked. The goto label is required as the state-machine +// does not use a computed goto in any subsequent operations. +// +// Since several states exit the state machine for various reasons, there is +// also one label at the bottom of the function. When it is appropriate to +// return from the function, that part of the state machine instead issues a +// goto bottom; This results in less code duplication, and makes debugging +// easier (as you can add a statement to a section of code which is guaranteed +// to be invoked when the function is exiting. +size_t BalsaFrame::ProcessInput(const char* input, size_t size) { + const char* current = input; + const char* on_entry = current; + const char* end = current + size; +#if DEBUGFRAMER + LOG(INFO) << "\n==============" + << BalsaFrameEnums::ParseStateToString(parse_state_) + << "===============\n"; +#endif // DEBUGFRAMER + + DCHECK(headers_ != NULL); + if (headers_ == NULL) return 0; + + if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { + const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); + // Yes, we still have to check this here as the user can change the + // max_header_length amount! + // Also it is possible that we have reached the maximum allowed header size, + // and we have more to consume (remember we are still inside + // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. + if (header_length > max_header_length_ || + (header_length == max_header_length_ && size > 0)) { + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; + visitor_->HandleHeaderError(this); + goto bottom; + } + size_t bytes_to_process = max_header_length_ - header_length; + if (bytes_to_process > size) { + bytes_to_process = size; + } + current += ProcessHeaders(input, bytes_to_process); + // If we are still reading headers check if we have crossed the headers + // limit. Note that we check for >= as opposed to >. This is because if + // header_length_after equals max_header_length_ and we are still in the + // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for + // sure that the headers limit will be crossed later on + if ((parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE)) { + // Note that headers_ is valid only if we are still reading headers. + const size_t header_length_after = + headers_->GetReadableBytesFromHeaderStream(); + if (header_length_after >= max_header_length_) { + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; + visitor_->HandleHeaderError(this); + } + } + goto bottom; + } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || + parse_state_ == BalsaFrameEnums::ERROR) { + // Can do nothing more 'till we're reset. + goto bottom; + } + + while (current < end) { + switch (parse_state_) { + label_reading_chunk_length: + case BalsaFrameEnums::READING_CHUNK_LENGTH: + // In this state we read the chunk length. + // Note that once we hit a character which is not in: + // [0-9;A-Fa-f\n], we transition to a different state. + // + { + // If we used strtol, etc, we'd have to buffer this line. + // This is more annoying than simply doing the conversion + // here. This code accounts for overflow. + static const signed char buf[] = { + // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, + // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f + -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, + // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }; + // valid cases: + // "09123\n" // -> 09123 + // "09123\r\n" // -> 09123 + // "09123 \n" // -> 09123 + // "09123 \r\n" // -> 09123 + // "09123 12312\n" // -> 09123 + // "09123 12312\r\n" // -> 09123 + // "09123; foo=bar\n" // -> 09123 + // "09123; foo=bar\r\n" // -> 09123 + // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF + // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF + // invalid cases: + // "[ \t]+[^\n]*\n" + // "FFFFFFFFFFFFFFFFF\r\n" (would overflow) + // "\r\n" + // "\n" + while (current < end) { + const char c = *current; + ++current; + const signed char addition = buf[c]; + if (addition >= 0) { + chunk_length_character_extracted_ = true; + size_t length_x_16 = chunk_length_remaining_ * 16; + const size_t kMaxDiv16 = numeric_limits<size_t>::max() / 16; + if ((chunk_length_remaining_ > kMaxDiv16) || + (numeric_limits<size_t>::max() - length_x_16) < addition) { + // overflow -- asked for a chunk-length greater than 2^64 - 1!! + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW; + visitor_->ProcessBodyInput(on_entry, current - on_entry); + visitor_->HandleChunkingError(this); + goto bottom; + } + chunk_length_remaining_ = length_x_16 + addition; + continue; + } + + if (!chunk_length_character_extracted_ || addition == -1) { + // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no + // characters were converted, or an unexpected character was + // seen. + parse_state_ = BalsaFrameEnums::ERROR; + last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH; + visitor_->ProcessBodyInput(on_entry, current - on_entry); + visitor_->HandleChunkingError(this); + goto bottom; + } + + --current; + parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; + visitor_->ProcessChunkLength(chunk_length_remaining_); + goto label_reading_chunk_extension; + } + } + visitor_->ProcessBodyInput(on_entry, current - on_entry); + goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH + + label_reading_chunk_extension: + case BalsaFrameEnums::READING_CHUNK_EXTENSION: + { + // TODO(phython): Convert this scanning to be 16 bytes at a time if + // there is data to be read. + const char* extensions_start = current; + size_t extensions_length = 0; + while (current < end) { + const char c = *current; + if (c == '\r' || c == '\n') { + extensions_length = + (extensions_start == current) ? + 0 : + current - extensions_start - 1; + } + + ++current; + if (c == '\n') { + chunk_length_character_extracted_ = false; + visitor_->ProcessChunkExtensions( + extensions_start, extensions_length); + if (chunk_length_remaining_ != 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; + goto label_reading_chunk_data; + } + HeaderFramingFound('\n'); + parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; + goto label_reading_last_chunk_term; + } + } + visitor_->ProcessChunkExtensions( + extensions_start, extensions_length); + } + + visitor_->ProcessBodyInput(on_entry, current - on_entry); + goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION + + label_reading_chunk_data: + case BalsaFrameEnums::READING_CHUNK_DATA: + while (current < end) { + if (chunk_length_remaining_ == 0) { + break; + } + // read in the chunk + size_t bytes_remaining = end - current; + size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ? + chunk_length_remaining_ : bytes_remaining; + const char* tmp_current = current + consumed_bytes; + visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); + visitor_->ProcessBodyData(current, consumed_bytes); + on_entry = current = tmp_current; + chunk_length_remaining_ -= consumed_bytes; + } + if (chunk_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; + goto label_reading_chunk_term; + } + visitor_->ProcessBodyInput(on_entry, current - on_entry); + goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA + + label_reading_chunk_term: + case BalsaFrameEnums::READING_CHUNK_TERM: + while (current < end) { + const char c = *current; + ++current; + + if (c == '\n') { + parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; + goto label_reading_chunk_length; + } + } + visitor_->ProcessBodyInput(on_entry, current - on_entry); + goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM + + label_reading_last_chunk_term: + case BalsaFrameEnums::READING_LAST_CHUNK_TERM: + while (current < end) { + const char c = *current; + + if (!HeaderFramingFound(c)) { + // If not, however, since the spec only suggests that the + // client SHOULD indicate the presence of trailers, we get to + // *test* that they did or didn't. + // If all of the bytes we've seen since: + // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF + // are either '\r', or '\n', then we can assume that we don't yet + // know if we need to parse headers, or if the next byte will make + // the HeaderFramingFound condition (above) true. + if (HeaderFramingMayBeFound()) { + // If true, then we have seen only characters '\r' or '\n'. + ++current; + + // Lets try again! There is no state change here. + continue; + } else { + // If (!HeaderFramingMayBeFound()), then we know that we must be + // reading the first non CRLF character of a trailer. + parse_state_ = BalsaFrameEnums::READING_TRAILER; + visitor_->ProcessBodyInput(on_entry, current - on_entry); + on_entry = current; + goto label_reading_trailer; + } + } else { + // If we've found a "\r\n\r\n", then the message + // is done. + ++current; + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->ProcessBodyInput(on_entry, current - on_entry); + visitor_->MessageDone(); + goto bottom; + } + break; // from while loop + } + visitor_->ProcessBodyInput(on_entry, current - on_entry); + goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM + + label_reading_trailer: + case BalsaFrameEnums::READING_TRAILER: + while (current < end) { + const char c = *current; + ++current; + // TODO(fenix): If we ever care about trailers as part of framing, + // deal with them here (see below for part of the 'solution') + // if (LineFramingFound(c)) { + // trailer_lines_.push_back(make_pair(start_of_line_, + // trailer_length_ - 1)); + // start_of_line_ = trailer_length_; + // } + if (HeaderFramingFound(c)) { + // ProcessTrailers(visitor_, &trailers_); + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->ProcessTrailerInput(on_entry, current - on_entry); + visitor_->MessageDone(); + goto bottom; + } + } + visitor_->ProcessTrailerInput(on_entry, current - on_entry); + break; // case BalsaFrameEnums::READING_TRAILER + + // Note that there is no label: + // 'label_reading_until_close' + // here. This is because the state-machine exists immediately after + // reading the headers instead of transitioning here (as it would + // do if it was consuming all the data it could, all the time). + case BalsaFrameEnums::READING_UNTIL_CLOSE: + { + const size_t bytes_remaining = end - current; + if (bytes_remaining > 0) { + visitor_->ProcessBodyInput(current, bytes_remaining); + visitor_->ProcessBodyData(current, bytes_remaining); + current += bytes_remaining; + } + } + goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE + + // label_reading_content: + case BalsaFrameEnums::READING_CONTENT: +#if DEBUGFRAMER + LOG(INFO) << "ReadingContent: " << content_length_remaining_; +#endif // DEBUGFRAMER + while (content_length_remaining_ && current < end) { + // read in the content + const size_t bytes_remaining = end - current; + const size_t consumed_bytes = + (content_length_remaining_ < bytes_remaining) ? + content_length_remaining_ : bytes_remaining; + visitor_->ProcessBodyInput(current, consumed_bytes); + visitor_->ProcessBodyData(current, consumed_bytes); + current += consumed_bytes; + content_length_remaining_ -= consumed_bytes; + } + if (content_length_remaining_ == 0) { + parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; + visitor_->MessageDone(); + } + goto bottom; // case BalsaFrameEnums::READING_CONTENT + + default: + // The state-machine should never be in a state that isn't handled + // above. This is a glaring logic error, and we should do something + // drastic to ensure that this gets looked-at and fixed. + LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE + << " memory corruption?!"; // COV_NF_LINE + } + } + bottom: +#if DEBUGFRAMER + LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" + << string(input, current) + << "\n$$$$$$$$$$$$$$" + << BalsaFrameEnums::ParseStateToString(parse_state_) + << "$$$$$$$$$$$$$$$" + << " consumed: " << (current - input); + if (Error()) { + LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); + } +#endif // DEBUGFRAMER + return current - input; +} + +const int32 BalsaFrame::kValidTerm1; +const int32 BalsaFrame::kValidTerm1Mask; +const int32 BalsaFrame::kValidTerm2; +const int32 BalsaFrame::kValidTerm2Mask; + +} // namespace gfe2 + diff --git a/net/tools/flip_server/balsa_frame.h b/net/tools/flip_server/balsa_frame.h new file mode 100644 index 0000000..25e9e54 --- /dev/null +++ b/net/tools/flip_server/balsa_frame.h @@ -0,0 +1,283 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_BALSA_FRAME_H_ +#define NET_TOOLS_FLIP_SERVER_BALSA_FRAME_H_ + +#include <strings.h> + +#include <string> +#include <utility> +#include <vector> + +#include "base/port.h" +#include "net/tools/flip_server/balsa_enums.h" +#include "net/tools/flip_server/balsa_headers.h" +#include "net/tools/flip_server/balsa_visitor_interface.h" +#include "net/tools/flip_server/buffer_interface.h" +#include "net/tools/flip_server/http_message_constants.h" +#include "net/tools/flip_server/simple_buffer.h" + +// For additional debug output, uncomment the following: +// #define DEBUGFRAMER 1 + +namespace gfe2 { + +// BalsaFrame is a 'Model' of a framer (haha). +// It exists as a proof of concept headers framer. +class BalsaFrame { + public: + typedef vector<pair<size_t, size_t> > Lines; + + typedef BalsaHeaders::HeaderLineDescription HeaderLineDescription; + typedef BalsaHeaders::HeaderLines HeaderLines; + typedef BalsaHeaders::HeaderTokenList HeaderTokenList; + + // TODO(fenix): get rid of the 'kValidTerm*' stuff by using the 'since last + // index' strategy. Note that this implies getting rid of the HeaderFramed() + + static const int32 kValidTerm1 = '\n' << 16 | + '\r' << 8 | + '\n'; + static const int32 kValidTerm1Mask = 0xFF << 16 | + 0xFF << 8 | + 0xFF; + static const int32 kValidTerm2 = '\n' << 8 | + '\n'; + static const int32 kValidTerm2Mask = 0xFF << 8 | + 0xFF; + BalsaFrame() : + last_char_was_slash_r_(false), + saw_non_newline_char_(false), + start_was_space_(true), + chunk_length_character_extracted_(false), + is_request_(true), + request_was_head_(false), + max_header_length_(16 * 1024), + max_request_uri_length_(2048), + visitor_(&do_nothing_visitor_), + chunk_length_remaining_(0), + content_length_remaining_(0), + last_slash_n_loc_(NULL), + last_recorded_slash_n_loc_(NULL), + last_slash_n_idx_(0), + term_chars_(0), + parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), + last_error_(BalsaFrameEnums::NO_ERROR), + headers_(NULL) {} + + ~BalsaFrame() {} + + // Reset reinitializes all the member variables of the framer and clears the + // attached header object (but doesn't change the pointer value headers_). + void Reset(); + + const BalsaHeaders* const_balsa_headers() const { return headers_; } + BalsaHeaders* balsa_headers() { return headers_; } + // The method set_balsa_headers clears the headers provided and attaches them + // to the framer. This is a required step before the framer will process any + // input message data. + // To detach the header object from the framer, use set_balsa_headers(NULL). + void set_balsa_headers(BalsaHeaders* headers) { + if (headers_ != headers) { + headers_ = headers; + } + if (headers_) { + // Clear the headers if they are non-null, even if the new headers are + // the same as the old. + headers_->Clear(); + } + } + + void set_balsa_visitor(BalsaVisitorInterface* visitor) { + visitor_ = visitor; + if (visitor_ == NULL) { + visitor_ = &do_nothing_visitor_; + } + } + + void set_is_request(bool is_request) { is_request_ = is_request; } + + bool is_request() const { + return is_request_; + } + + void set_request_was_head(bool request_was_head) { + request_was_head_ = request_was_head; + } + + bool request_was_head() const { + return request_was_head_; + } + + void set_max_header_length(size_t max_header_length) { + max_header_length_ = max_header_length; + } + + size_t max_header_length() const { + return max_header_length_; + } + + void set_max_request_uri_length(size_t max_request_uri_length) { + max_request_uri_length_ = max_request_uri_length; + } + + size_t max_request_uri_length() const { + return max_request_uri_length_; + } + + + bool MessageFullyRead() { + return parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ; + } + + BalsaFrameEnums::ParseState ParseState() const { return parse_state_; } + + + bool Error() { + return parse_state_ == BalsaFrameEnums::ERROR; + } + + BalsaFrameEnums::ErrorCode ErrorCode() const { return last_error_; } + + const BalsaHeaders* headers() const { return headers_; } + BalsaHeaders* mutable_headers() { return headers_; } + + size_t BytesSafeToSplice() const; + void BytesSpliced(size_t bytes_spliced); + + size_t ProcessInput(const char* input, size_t size); + + // Parses input and puts the key, value chunk extensions into extensions. + // TODO(phython): Find a better data structure to put the extensions into. + static void ProcessChunkExtensions(const char* input, size_t size, + BalsaHeaders* extensions); + + protected: + // The utils object needs access to the ParseTokenList in order to do its + // job. + friend class BalsaHeadersTokenUtils; + + inline void ProcessContentLengthLine( + size_t line_idx, + BalsaHeadersEnums::ContentLengthStatus* status, + size_t* length); + + inline void ProcessTransferEncodingLine(size_t line_idx); + + void ProcessFirstLine(const char* begin, + const char* end); + + void CleanUpKeyValueWhitespace( + const char* stream_begin, + const char* line_begin, + const char* current, + const char* line_end, + HeaderLineDescription* current_header_line); + + void FindColonsAndParseIntoKeyValue(); + + void ProcessHeaderLines(); + + inline size_t ProcessHeaders(const char* message_start, + size_t message_length); + + void AssignParseStateAfterHeadersHaveBeenParsed(); + + inline bool LineFramingFound(char current_char) { + return current_char == '\n'; + } + + // TODO(fenix): get rid of the following function and its uses (and + // replace with something more efficient) + inline bool HeaderFramingFound(char current_char) { + // Note that the 'if (current_char == '\n' ...)' test exists to ensure that + // the HeaderFramingMayBeFound test works properly. In benchmarking done on + // 2/13/2008, the 'if' actually speeds up performance of the function + // anyway.. + if (current_char == '\n' || current_char == '\r') { + term_chars_ <<= 8; + // This is necessary IFF architecture has > 8 bit char. Alas, I'm + // paranoid. + term_chars_ |= current_char & 0xFF; + + if ((term_chars_ & kValidTerm1Mask) == kValidTerm1) { + term_chars_ = 0; + return true; + } + if ((term_chars_ & kValidTerm2Mask) == kValidTerm2) { + term_chars_ = 0; + return true; + } + } else { + term_chars_ = 0; + } + return false; + } + + inline bool HeaderFramingMayBeFound() const { + return term_chars_ != 0; + } + + private: + class DoNothingBalsaVisitor : public BalsaVisitorInterface { + virtual void ProcessBodyInput(const char *input, size_t size) {} + virtual void ProcessBodyData(const char *input, size_t size) {} + virtual void ProcessHeaderInput(const char *input, size_t size) {} + virtual void ProcessTrailerInput(const char *input, size_t size) {} + virtual void ProcessHeaders(const BalsaHeaders& headers) {} + virtual void ProcessRequestFirstLine(const char* line_input, + size_t line_length, + const char* method_input, + size_t method_length, + const char* request_uri_input, + size_t request_uri_length, + const char* version_input, + size_t version_length) {} + virtual void ProcessResponseFirstLine(const char *line_input, + size_t line_length, + const char *version_input, + size_t version_length, + const char *status_input, + size_t status_length, + const char *reason_input, + size_t reason_length) {} + virtual void ProcessChunkLength(size_t chunk_length) {} + virtual void ProcessChunkExtensions(const char *input, size_t size) {} + virtual void HeaderDone() {} + virtual void MessageDone() {} + virtual void HandleHeaderError(BalsaFrame* framer) {} + virtual void HandleHeaderWarning(BalsaFrame* framer) {} + virtual void HandleChunkingError(BalsaFrame* framer) {} + virtual void HandleBodyError(BalsaFrame* framer) {} + }; + + bool last_char_was_slash_r_; + bool saw_non_newline_char_; + bool start_was_space_; + bool chunk_length_character_extracted_; + bool is_request_; // This is not reset in Reset() + bool request_was_head_; // This is not reset in Reset() + size_t max_header_length_; // This is not reset in Reset() + size_t max_request_uri_length_; // This is not reset in Reset() + BalsaVisitorInterface* visitor_; + size_t chunk_length_remaining_; + size_t content_length_remaining_; + const char* last_slash_n_loc_; + const char* last_recorded_slash_n_loc_; + size_t last_slash_n_idx_; + uint32 term_chars_; + BalsaFrameEnums::ParseState parse_state_; + BalsaFrameEnums::ErrorCode last_error_; + + Lines lines_; + + BalsaHeaders* headers_; // This is not reset to NULL in Reset(). + DoNothingBalsaVisitor do_nothing_visitor_; +}; + +} // namespace gfe2; + +#endif // NET_TOOLS_FLIP_SERVER_BALSA_FRAME_H_ + diff --git a/net/tools/flip_server/balsa_headers.cc b/net/tools/flip_server/balsa_headers.cc new file mode 100644 index 0000000..c957a82 --- /dev/null +++ b/net/tools/flip_server/balsa_headers.cc @@ -0,0 +1,748 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/balsa_headers.h" + +#include <emmintrin.h> + +#include <algorithm> +#include <hash_set> +#include <string> +#include <utility> +#include <vector> + +#include "base/googleinit.h" +#include "base/port.h" +#include "net/tools/flip_server/balsa_enums.h" +#include "net/tools/flip_server/buffer_interface.h" +#include "net/tools/flip_server/simple_buffer.h" +#include "strings/stringpiece.h" +#include "strings/strutil.h" +// #include "util/gtl/iterator_adaptors-inl.h" +// #include "util/gtl/map-util.h" + +namespace { + +const char kContentLength[] = "Content-Length"; +const char kTransferEncoding[] = "Transfer-Encoding"; +const char kSpaceChar = ' '; + +hash_set<StringPiece, StringPieceCaseHash, StringPieceCaseEqual> +g_multivalued_headers; + +void InitMultivaluedHeaders() { + g_multivalued_headers.insert("accept"); + g_multivalued_headers.insert("accept-charset"); + g_multivalued_headers.insert("accept-encoding"); + g_multivalued_headers.insert("accept-language"); + g_multivalued_headers.insert("accept-ranges"); + g_multivalued_headers.insert("allow"); + g_multivalued_headers.insert("cache-control"); + g_multivalued_headers.insert("connection"); + g_multivalued_headers.insert("content-encoding"); + g_multivalued_headers.insert("content-language"); + g_multivalued_headers.insert("expect"); + g_multivalued_headers.insert("if-match"); + g_multivalued_headers.insert("if-none-match"); + g_multivalued_headers.insert("pragma"); + g_multivalued_headers.insert("proxy-authenticate"); + g_multivalued_headers.insert("te"); + g_multivalued_headers.insert("trailer"); + g_multivalued_headers.insert("transfer-encoding"); + g_multivalued_headers.insert("upgrade"); + g_multivalued_headers.insert("vary"); + g_multivalued_headers.insert("via"); + g_multivalued_headers.insert("warning"); + g_multivalued_headers.insert("www-authenticate"); + // Not mentioned in RFC 2616, but it can have multiple values. + g_multivalued_headers.insert("set-cookie"); +} + +REGISTER_MODULE_INITIALIZER(multivalued_headers, InitMultivaluedHeaders()); + +} // namespace + +namespace gfe2 { + +const size_t BalsaBuffer::kDefaultBlocksize; + +void BalsaHeaders::Clear() { + balsa_buffer_.Clear(); + transfer_encoding_is_chunked_ = false; + content_length_ = 0; + content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + parsed_response_code_ = 0; + firstline_buffer_base_idx_ = 0; + whitespace_1_idx_ = 0; + non_whitespace_1_idx_ = 0; + whitespace_2_idx_ = 0; + non_whitespace_2_idx_ = 0; + whitespace_3_idx_ = 0; + non_whitespace_3_idx_ = 0; + whitespace_4_idx_ = 0; + end_of_firstline_idx_ = 0; + header_lines_.clear(); +} + +void BalsaHeaders::Swap(BalsaHeaders* other) { + // Protect against swapping with self. + if (this == other) return; + + balsa_buffer_.Swap(&other->balsa_buffer_); + + bool tmp_bool = transfer_encoding_is_chunked_; + transfer_encoding_is_chunked_ = other->transfer_encoding_is_chunked_; + other->transfer_encoding_is_chunked_ = tmp_bool; + + size_t tmp_size_t = content_length_; + content_length_ = other->content_length_; + other->content_length_ = tmp_size_t; + + BalsaHeadersEnums::ContentLengthStatus tmp_status = + content_length_status_; + content_length_status_ = other->content_length_status_; + other->content_length_status_ = tmp_status; + + tmp_size_t = parsed_response_code_; + parsed_response_code_ = other->parsed_response_code_; + other->parsed_response_code_ = tmp_size_t; + + BalsaBuffer::Blocks::size_type tmp_blk_idx = firstline_buffer_base_idx_; + firstline_buffer_base_idx_ = other->firstline_buffer_base_idx_; + other->firstline_buffer_base_idx_ = tmp_blk_idx; + + tmp_size_t = whitespace_1_idx_; + whitespace_1_idx_ = other->whitespace_1_idx_; + other->whitespace_1_idx_ = tmp_size_t; + + tmp_size_t = non_whitespace_1_idx_; + non_whitespace_1_idx_ = other->non_whitespace_1_idx_; + other->non_whitespace_1_idx_ = tmp_size_t; + + tmp_size_t = whitespace_2_idx_; + whitespace_2_idx_ = other->whitespace_2_idx_; + other->whitespace_2_idx_ = tmp_size_t; + + tmp_size_t = non_whitespace_2_idx_; + non_whitespace_2_idx_ = other->non_whitespace_2_idx_; + other->non_whitespace_2_idx_ = tmp_size_t; + + tmp_size_t = whitespace_3_idx_; + whitespace_3_idx_ = other->whitespace_3_idx_; + other->whitespace_3_idx_ = tmp_size_t; + + tmp_size_t = non_whitespace_3_idx_; + non_whitespace_3_idx_ = other->non_whitespace_3_idx_; + other->non_whitespace_3_idx_ = tmp_size_t; + + tmp_size_t = whitespace_4_idx_; + whitespace_4_idx_ = other->whitespace_4_idx_; + other->whitespace_4_idx_ = tmp_size_t; + + tmp_size_t = end_of_firstline_idx_; + end_of_firstline_idx_ = other->end_of_firstline_idx_; + other->end_of_firstline_idx_ = tmp_size_t; + + swap(header_lines_, other->header_lines_); +} + +void BalsaHeaders::CopyFrom(const BalsaHeaders& other) { + // Protect against copying with self. + if (this == &other) return; + + balsa_buffer_.CopyFrom(other.balsa_buffer_); + transfer_encoding_is_chunked_ = other.transfer_encoding_is_chunked_; + content_length_ = other.content_length_; + content_length_status_ = other.content_length_status_; + parsed_response_code_ = other.parsed_response_code_; + firstline_buffer_base_idx_ = other.firstline_buffer_base_idx_; + whitespace_1_idx_ = other.whitespace_1_idx_; + non_whitespace_1_idx_ = other.non_whitespace_1_idx_; + whitespace_2_idx_ = other.whitespace_2_idx_; + non_whitespace_2_idx_ = other.non_whitespace_2_idx_; + whitespace_3_idx_ = other.whitespace_3_idx_; + non_whitespace_3_idx_ = other.non_whitespace_3_idx_; + whitespace_4_idx_ = other.whitespace_4_idx_; + end_of_firstline_idx_ = other.end_of_firstline_idx_; + header_lines_ = other.header_lines_; +} + +void BalsaHeaders::AddAndMakeDescription(const StringPiece& key, + const StringPiece& value, + HeaderLineDescription* d) { + CHECK(d != NULL); + // + 2 to size for ": " + size_t line_size = key.size() + 2 + value.size(); + BalsaBuffer::Blocks::size_type block_buffer_idx = 0; + char* storage = balsa_buffer_.Reserve(line_size, &block_buffer_idx); + size_t base_idx = storage - GetPtr(block_buffer_idx); + + char* cur_loc = storage; + memcpy(cur_loc, key.data(), key.size()); + cur_loc += key.size(); + *cur_loc = ':'; + ++cur_loc; + *cur_loc = ' '; + ++cur_loc; + memcpy(cur_loc, value.data(), value.size()); + *d = HeaderLineDescription(base_idx, + base_idx + key.size(), + base_idx + key.size() + 2, + base_idx + key.size() + 2 + value.size(), + block_buffer_idx); +} + +void BalsaHeaders::AppendOrPrependAndMakeDescription(const StringPiece& key, + const StringPiece& value, + bool append, + HeaderLineDescription* d) { + // Figure out how much space we need to reserve for the new header size. + size_t old_value_size = d->last_char_idx - d->value_begin_idx; + if (old_value_size == 0) { + AddAndMakeDescription(key, value, d); + return; + } + StringPiece old_value(GetPtr(d->buffer_base_idx) + d->value_begin_idx, + old_value_size); + + BalsaBuffer::Blocks::size_type block_buffer_idx = 0; + // + 3 because we potentially need to add ": ", and "," to the line. + size_t new_size = key.size() + 3 + old_value_size + value.size(); + char* storage = balsa_buffer_.Reserve(new_size, &block_buffer_idx); + size_t base_idx = storage - GetPtr(block_buffer_idx); + + StringPiece first_value = old_value; + StringPiece second_value = value; + if (!append) { // !append == prepend + first_value = value; + second_value = old_value; + } + char* cur_loc = storage; + memcpy(cur_loc, key.data(), key.size()); + cur_loc += key.size(); + *cur_loc = ':'; + ++cur_loc; + *cur_loc = ' '; + ++cur_loc; + memcpy(cur_loc, first_value.data(), first_value.size()); + cur_loc += first_value.size(); + *cur_loc = ','; + ++cur_loc; + memcpy(cur_loc, second_value.data(), second_value.size()); + + *d = HeaderLineDescription(base_idx, + base_idx + key.size(), + base_idx + key.size() + 2, + base_idx + new_size, + block_buffer_idx); +} + +// Removes all keys value pairs with key 'key' starting at 'start'. +void BalsaHeaders::RemoveAllOfHeaderStartingAt(const StringPiece& key, + HeaderLines::iterator start) { + while (start != header_lines_.end()) { + start->skip = true; + ++start; + start = GetHeaderLinesIterator(key, start); + } +} + +void BalsaHeaders::HackHeader(const StringPiece& key, + const StringPiece& value) { + // See TODO in balsa_headers.h + const HeaderLines::iterator end = header_lines_.end(); + const HeaderLines::iterator begin = header_lines_.begin(); + HeaderLines::iterator i = GetHeaderLinesIteratorNoSkip(key, begin); + if (i != end) { + // First, remove all of the header lines including this one. We want to + // remove before replacing, in case our replacement ends up being appended + // at the end (and thus would be removed by this call) + RemoveAllOfHeaderStartingAt(key, i); + // Now add the replacement, at this location. + AddAndMakeDescription(key, value, &(*i)); + return; + } + AppendHeader(key, value); +} + +void BalsaHeaders::HackAppendToHeader(const StringPiece& key, + const StringPiece& append_value) { + // See TODO in balsa_headers.h + const HeaderLines::iterator end = header_lines_.end(); + const HeaderLines::iterator begin = header_lines_.begin(); + + HeaderLines::iterator i = GetHeaderLinesIterator(key, begin); + if (i == end) { + HackHeader(key, append_value); + return; + } + + AppendOrPrependAndMakeDescription(key, append_value, true, &(*i)); +} + +void BalsaHeaders::ReplaceOrAppendHeader(const StringPiece& key, + const StringPiece& value) { + const HeaderLines::iterator end = header_lines_.end(); + const HeaderLines::iterator begin = header_lines_.begin(); + HeaderLines::iterator i = GetHeaderLinesIterator(key, begin); + if (i != end) { + // First, remove all of the header lines including this one. We want to + // remove before replacing, in case our replacement ends up being appended + // at the end (and thus would be removed by this call) + RemoveAllOfHeaderStartingAt(key, i); + // Now, take the first instance and replace it. This will remove the + // 'skipped' tag if the replacement is done in-place. + AddAndMakeDescription(key, value, &(*i)); + return; + } + AppendHeader(key, value); +} + +void BalsaHeaders::AppendHeader(const StringPiece& key, + const StringPiece& value) { + HeaderLineDescription hld; + AddAndMakeDescription(key, value, &hld); + header_lines_.push_back(hld); +} + +void BalsaHeaders::AppendToHeader(const StringPiece& key, + const StringPiece& value) { + AppendOrPrependToHeader(key, value, true); +} + +void BalsaHeaders::PrependToHeader(const StringPiece& key, + const StringPiece& value) { + AppendOrPrependToHeader(key, value, false); +} + +StringPiece BalsaHeaders::GetValueFromHeaderLineDescription( + const HeaderLineDescription& line) const { + DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return StringPiece(GetPtr(line.buffer_base_idx) + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx); +} + +const StringPiece BalsaHeaders::GetHeader(const StringPiece& key) const { + DCHECK(!IsMultivaluedHeader(key)) + << "Header '" << key << "' may consist of multiple lines. Do not " + << "use BalsaHeaders::GetHeader() or you may be missing some of its " + << "values."; + const HeaderLines::const_iterator end = header_lines_.end(); + const HeaderLines::const_iterator begin = header_lines_.begin(); + HeaderLines::const_iterator i = GetConstHeaderLinesIterator(key, begin); + if (i == end) { + return StringPiece(NULL, 0); + } + return GetValueFromHeaderLineDescription(*i); +} + +BalsaHeaders::const_header_lines_iterator BalsaHeaders::GetHeaderPosition( + const StringPiece& key) const { + const HeaderLines::const_iterator end = header_lines_.end(); + const HeaderLines::const_iterator begin = header_lines_.begin(); + HeaderLines::const_iterator i = GetConstHeaderLinesIterator(key, begin); + if (i == end) { + return header_lines_end(); + } + + return const_header_lines_iterator(this, (i - begin)); +} + +BalsaHeaders::const_header_lines_key_iterator BalsaHeaders::GetIteratorForKey( + const StringPiece& key) const { + HeaderLines::const_iterator i = + GetConstHeaderLinesIterator(key, header_lines_.begin()); + if (i == header_lines_.end()) { + return header_lines_key_end(); + } + + const HeaderLines::const_iterator begin = header_lines_.begin(); + return const_header_lines_key_iterator(this, (i - begin), key); +} + +void BalsaHeaders::AppendOrPrependToHeader(const StringPiece& key, + const StringPiece& value, + bool append) { + HeaderLines::iterator i = GetHeaderLinesIterator(key, header_lines_.begin()); + if (i == header_lines_.end()) { + // The header did not exist already. Instead of appending to an existing + // header simply append the key/value pair to the headers. + AppendHeader(key, value); + return; + } + HeaderLineDescription hld = *i; + + AppendOrPrependAndMakeDescription(key, value, append, &hld); + + // Invalidate the old header line and add the new one. + i->skip = true; + header_lines_.push_back(hld); +} + +BalsaHeaders::HeaderLines::const_iterator +BalsaHeaders::GetConstHeaderLinesIterator( + const StringPiece& key, + BalsaHeaders::HeaderLines::const_iterator start) const { + const HeaderLines::const_iterator end = header_lines_.end(); + for (HeaderLines::const_iterator i = start; i != end; ++i) { + const HeaderLineDescription& line = *i; + if (line.skip) { + continue; + } + const size_t key_len = line.key_end_idx - line.first_char_idx; + + if (key_len != key.size()) { + continue; + } + if (strncasecmp(GetPtr(line.buffer_base_idx) + line.first_char_idx, + key.data(), key_len) == 0) { + DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return i; + } + } + return end; +} + +BalsaHeaders::HeaderLines::iterator BalsaHeaders::GetHeaderLinesIteratorNoSkip( + const StringPiece& key, + BalsaHeaders::HeaderLines::iterator start) { + const HeaderLines::iterator end = header_lines_.end(); + for (HeaderLines::iterator i = start; i != end; ++i) { + const HeaderLineDescription& line = *i; + const size_t key_len = line.key_end_idx - line.first_char_idx; + + if (key_len != key.size()) { + continue; + } + if (strncasecmp(GetPtr(line.buffer_base_idx) + line.first_char_idx, + key.data(), key_len) == 0) { + DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return i; + } + } + return end; +} + +BalsaHeaders::HeaderLines::iterator BalsaHeaders::GetHeaderLinesIterator( + const StringPiece& key, + BalsaHeaders::HeaderLines::iterator start) { + const HeaderLines::iterator end = header_lines_.end(); + for (HeaderLines::iterator i = start; i != end; ++i) { + const HeaderLineDescription& line = *i; + if (line.skip) { + continue; + } + const size_t key_len = line.key_end_idx - line.first_char_idx; + + if (key_len != key.size()) { + continue; + } + if (strncasecmp(GetPtr(line.buffer_base_idx) + line.first_char_idx, + key.data(), key_len) == 0) { + DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return i; + } + } + return end; +} + +void BalsaHeaders::GetAllOfHeader( + const StringPiece& key, vector<StringPiece>* out) const { + for (const_header_lines_key_iterator it = GetIteratorForKey(key); + it != header_lines_end(); ++it) { + out->push_back(it->second); + } +} + +bool BalsaHeaders::HasNonEmptyHeader(const StringPiece& key) const { + for (const_header_lines_key_iterator it = GetIteratorForKey(key); + it != header_lines_key_end(); ++it) { + if (!it->second.empty()) + return true; + } + return false; +} + +void BalsaHeaders::GetAllOfHeaderAsString(const StringPiece& key, + string* out) const { + const_header_lines_iterator it = header_lines_begin(); + const_header_lines_iterator end = header_lines_end(); + + for (; it != end; ++it) { + if (key == it->first) { + if (!out->empty()) { + out->append(","); + } + out->append(string(it->second.data(), it->second.size())); + } + } +} + +// static +bool BalsaHeaders::IsMultivaluedHeader(const StringPiece& header) { + return g_multivalued_headers.find(header) != g_multivalued_headers.end(); +} + +void BalsaHeaders::RemoveAllOfHeader(const StringPiece& key) { + HeaderLines::iterator it = GetHeaderLinesIterator(key, header_lines_.begin()); + RemoveAllOfHeaderStartingAt(key, it); +} + +void BalsaHeaders::RemoveAllHeadersWithPrefix(const StringPiece& key) { + for (HeaderLines::size_type i = 0; i < header_lines_.size(); ++i) { + if (header_lines_[i].skip) { + continue; + } + HeaderLineDescription& line = header_lines_[i]; + const size_t key_len = line.key_end_idx - line.first_char_idx; + if (key_len < key.size()) { + // If the key given to us is longer than this header, don't consider it. + continue; + } + if (!strncasecmp(GetPtr(line.buffer_base_idx) + line.first_char_idx, + key.data(), key.size())) { + line.skip = true; + } + } +} + +size_t BalsaHeaders::GetMemoryUsedLowerBound() const { + return (sizeof(*this) + + balsa_buffer_.GetTotalBufferBlockSize() + + header_lines_.capacity() * sizeof(HeaderLineDescription)); +} + +size_t BalsaHeaders::GetSizeForWriteBuffer() const { + // First add the space required for the first line + CRLF + size_t write_buf_size = whitespace_4_idx_ - non_whitespace_1_idx_ + 2; + // Then add the space needed for each header line to write out + CRLF. + const HeaderLines::size_type end = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < end; ++i) { + const HeaderLineDescription& line = header_lines_[i]; + if (!line.skip) { + // Add the key size and ": ". + write_buf_size += line.key_end_idx - line.first_char_idx + 2; + // Add the value size and the CRLF + write_buf_size += line.last_char_idx - line.value_begin_idx + 2; + } + } + // Finally tag on the terminal CRLF. + return write_buf_size + 2; +} + +void BalsaHeaders::DumpToString(string* str) const { + const StringPiece firstline = first_line(); + const int buffer_length = + OriginalHeaderStreamEnd() - OriginalHeaderStreamBegin(); + // First check whether the header object is empty. + if (firstline.empty() && buffer_length == 0) { + str->append("\n<empty header>\n"); + return; + } + + // Then check whether the header is in a partially parsed state. If so, just + // dump the raw data. + if (balsa_buffer_.can_write_to_contiguous_buffer()) { + StringAppendF(str, "\n<incomplete header len: %d>\n%.*s\n", + buffer_length, buffer_length, OriginalHeaderStreamBegin()); + return; + } + + // If the header is complete, then just dump them with the logical key value + // pair. + str->reserve(str->size() + GetSizeForWriteBuffer()); + StringAppendF(str, "\n %.*s\n", firstline.size(), firstline.data()); + BalsaHeaders::const_header_lines_iterator i = header_lines_begin(); + for (; i != header_lines_end(); ++i) { + StringAppendF(str, " %.*s: %.*s\n", + i->first.size(), i->first.data(), + i->second.size(), i->second.data()); + } +} + +void BalsaHeaders::SetFirstLine(const StringPiece& line) { + StringPiece new_line = balsa_buffer_.Write(line, &firstline_buffer_base_idx_); + whitespace_1_idx_ = new_line.data() - GetPtr(firstline_buffer_base_idx_); + non_whitespace_1_idx_ = whitespace_1_idx_; + whitespace_4_idx_ = whitespace_1_idx_ + line.size(); + whitespace_2_idx_ = whitespace_4_idx_; + non_whitespace_2_idx_ = whitespace_4_idx_; + whitespace_3_idx_ = whitespace_4_idx_; + non_whitespace_3_idx_ = whitespace_4_idx_; + end_of_firstline_idx_ = whitespace_4_idx_; +} + +void BalsaHeaders::SetContentLength(size_t length) { + // If the content-length is already the one we want, don't do anything. + if (content_length_status_ == BalsaHeadersEnums::VALID_CONTENT_LENGTH && + content_length_ == length) { + return; + } + const StringPiece content_length(kContentLength, sizeof(kContentLength) - 1); + // If header state indicates that there is either a content length or + // transfer encoding header, remove them before adding the new content + // length. There is always the possibility that client can manually add + // either header directly and cause content_length_status_ or + // transfer_encoding_is_chunked_ to be inconsistent with the actual header. + // In the interest of efficiency, however, we will assume that clients will + // use the header object correctly and thus we will not scan the all headers + // each time this function is called. + if (content_length_status_ != BalsaHeadersEnums::NO_CONTENT_LENGTH) { + RemoveAllOfHeader(content_length); + } else if (transfer_encoding_is_chunked_) { + const StringPiece transfer_encoding(kTransferEncoding, + sizeof(kTransferEncoding) - 1); + RemoveAllOfHeader(transfer_encoding); + transfer_encoding_is_chunked_ = false; + } + content_length_status_ = BalsaHeadersEnums::VALID_CONTENT_LENGTH; + content_length_ = length; + // FastUInt64ToBuffer is supposed to use a maximum of kFastToBufferSize bytes. + char buffer[kFastToBufferSize]; + const char* endp = FastUInt64ToBufferLeft(length, buffer); + const StringPiece length_str(buffer, endp - buffer); + AppendHeader(content_length, length_str); +} + +void BalsaHeaders::SetChunkEncoding(bool chunk_encode) { + if (transfer_encoding_is_chunked_ == chunk_encode) { + return; + } + if (content_length_status_ != BalsaHeadersEnums::NO_CONTENT_LENGTH && + chunk_encode) { + // Want to change to chunk encoding, but have content length. Arguably we + // can leave this step out, since transfer-encoding overrides + // content-length. + const StringPiece content_length(kContentLength, + sizeof(kContentLength) - 1); + RemoveAllOfHeader(content_length); + content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; + content_length_ = 0; + } + const StringPiece transfer_encoding(kTransferEncoding, + sizeof(kTransferEncoding) - 1); + if (chunk_encode) { + const char kChunked[] = "chunked"; + const StringPiece chunked(kChunked, sizeof(kChunked) - 1); + AppendHeader(transfer_encoding, chunked); + } else { + RemoveAllOfHeader(transfer_encoding); + } + transfer_encoding_is_chunked_ = chunk_encode; +} + +// See the comment about this function in the header file for a +// warning about its usage. +void BalsaHeaders::SetFirstlineFromStringPieces( + const StringPiece& firstline_a, + const StringPiece& firstline_b, + const StringPiece& firstline_c) { + size_t line_size = (firstline_a.size() + + firstline_b.size() + + firstline_c.size() + + 2); + char* storage = balsa_buffer_.Reserve(line_size, &firstline_buffer_base_idx_); + char* cur_loc = storage; + + memcpy(cur_loc, firstline_a.data(), firstline_a.size()); + cur_loc += firstline_a.size(); + + *cur_loc = ' '; + ++cur_loc; + + memcpy(cur_loc, firstline_b.data(), firstline_b.size()); + cur_loc += firstline_b.size(); + + *cur_loc = ' '; + ++cur_loc; + + memcpy(cur_loc, firstline_c.data(), firstline_c.size()); + + whitespace_1_idx_ = storage - GetPtr(firstline_buffer_base_idx_); + non_whitespace_1_idx_ = whitespace_1_idx_; + whitespace_2_idx_ = non_whitespace_1_idx_ + firstline_a.size(); + non_whitespace_2_idx_ = whitespace_2_idx_ + 1; + whitespace_3_idx_ = non_whitespace_2_idx_ + firstline_b.size(); + non_whitespace_3_idx_ = whitespace_3_idx_ + 1; + whitespace_4_idx_ = non_whitespace_3_idx_ + firstline_c.size(); + end_of_firstline_idx_ = whitespace_4_idx_; +} + +void BalsaHeaders::SetRequestMethod(const StringPiece& method) { + // This is the first of the three parts of the firstline. + if (method.size() <= (whitespace_2_idx_ - non_whitespace_1_idx_)) { + non_whitespace_1_idx_ = whitespace_2_idx_ - method.size(); + char* stream_begin = GetPtr(firstline_buffer_base_idx_); + memcpy(stream_begin + non_whitespace_1_idx_, + method.data(), + method.size()); + } else { + // The new method is too large to fit in the space available for the old + // one, so we have to reformat the firstline. + SetFirstlineFromStringPieces(method, request_uri(), request_version()); + } +} + +void BalsaHeaders::SetResponseVersion(const StringPiece& version) { + // Note: There is no difference between request_method() and + // response_Version(). Thus, a function to set one is equivalent to a + // function to set the other. We maintain two functions for this as it is + // much more descriptive, and makes code more understandable. + SetRequestMethod(version); +} + +void BalsaHeaders::SetRequestUri(const StringPiece& uri) { + SetFirstlineFromStringPieces(request_method(), uri, request_version()); +} + +void BalsaHeaders::SetResponseCode(const StringPiece& code) { + // Note: There is no difference between request_uri() and response_code(). + // Thus, a function to set one is equivalent to a function to set the other. + // We maintain two functions for this as it is much more descriptive, and + // makes code more understandable. + SetRequestUri(code); +} + +void BalsaHeaders::SetParsedResponseCodeAndUpdateFirstline( + size_t parsed_response_code) { + char buffer[kFastToBufferSize]; + const char* endp = FastUInt32ToBufferLeft(parsed_response_code, buffer); + parsed_response_code_ = parsed_response_code; + + SetResponseCode(StringPiece(buffer, endp - buffer)); +} + +void BalsaHeaders::SetRequestVersion(const StringPiece& version) { + // This is the last of the three parts of the firstline. + // Since whitespace_3_idx and non_whitespace_3_idx may point to the same + // place, we ensure below that any available space includes space for a + // litteral space (' ') character between the second component and the third + // component. If the space between whitespace_3_idx_ and + // end_of_firstline_idx_ is >= to version.size() + 1 (for the space), then we + // can update the firstline in-place. + char* stream_begin = GetPtr(firstline_buffer_base_idx_); + if (version.size() + 1 <= end_of_firstline_idx_ - whitespace_3_idx_) { + *(stream_begin + whitespace_3_idx_) = kSpaceChar; + non_whitespace_3_idx_ = whitespace_3_idx_ + 1; + whitespace_4_idx_ = non_whitespace_3_idx_ + version.size(); + memcpy(stream_begin + non_whitespace_3_idx_, + version.data(), + version.size()); + } else { + // The new version is to large to fit in the space available for the old + // one, so we have to reformat the firstline. + SetFirstlineFromStringPieces(request_method(), request_uri(), version); + } +} + +void BalsaHeaders::SetResponseReasonPhrase(const StringPiece& reason) { + // Note: There is no difference between request_version() and + // response_reason_phrase(). Thus, a function to set one is equivalent to a + // function to set the other. We maintain two functions for this as it is + // much more descriptive, and makes code more understandable. + SetRequestVersion(reason); +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/balsa_headers.h b/net/tools/flip_server/balsa_headers.h new file mode 100644 index 0000000..8685ac5 --- /dev/null +++ b/net/tools/flip_server/balsa_headers.h @@ -0,0 +1,1306 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_H_ +#define NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_H_ + +#include <iostream> +#include <iterator> +#include <string> +#include <utility> +#include <vector> + +#include "base/port.h" +#include "net/tools/flip_server/balsa_enums.h" +#ifdef CHROMIUM +#include "base/string_piece.h" + +void SplitStringPieceToVector(StringPiece, char*, vector<StringPiece>*, bool) { + ... +} + +struct StringPieceCaseHash { + size_t operator()(const StringPiece& sp) const { + // based on __stl_string_hash in http://www.sgi.com/tech/stl/string + unsigned long hash_val = 0; + for (StringPiece::const_iterator it = sp.begin(); + it != sp.end(); ++it) { + hash_val = 5 * hash_val + ascii_tolower(*it); + } + return static_cast<size_t>(hash_val); + } +}; + +struct StringPieceCaseEqual { + bool operator()(const StringPiece& piece1, const StringPiece& piece2) const { + return StringPieceUtils::EqualIgnoreCase(piece1, piece2); + } +}; + +#else +#include "strings/stringpiece.h" +#include "strings/stringpiece_utils.h" +#endif + +namespace gfe2 { + +// WARNING: +// Note that -no- char* returned by any function in this +// file is null-terminated. + +// This class exists to service the specific needs of BalsaHeaders. +// +// Functional goals: +// 1) provide a backing-store for all of the StringPieces that BalsaHeaders +// returns. Every StringPiece returned from BalsaHeaders should remain +// valid until the BalsaHeader's object is cleared, or the header-line is +// erased. +// 2) provide a backing-store for BalsaFrame, which requires contiguous memory +// for its fast-path parsing functions. Note that the cost of copying is +// less than the cost of requiring the parser to do slow-path parsing, as +// it would have to check for bounds every byte, instead of every 16 bytes. +// +// This class is optimized for the case where headers are stored in one of two +// buffers. It doesn't make a lot of effort to densely pack memory-- in fact, +// it -may- be somewhat memory inefficient. This possible inefficiency allows a +// certain simplicity of implementation and speed which makes it worthwhile. +// If, in the future, better memory density is required, it should be possible +// to reuse the abstraction presented by this object to achieve those goals. +// +// In the most common use-case, this memory inefficiency should be relatively +// small. +// +// Alternate implementations of BalsaBuffer may include: +// - vector of strings, one per header line (similar to HTTPHeaders) +// - densely packed strings: +// - keep a sorted array/map of free-space linked lists or numbers. +// - use the entry that most closely first your needs. +// - at this point, perhaps just use a vector of strings, and let +// the allocator do the right thing. +// +class BalsaBuffer { + public: + static const size_t kDefaultBlocksize = 4096; + // We have two friends here. These exist as friends as we + // want to allow access to the constructors for the test + // class and the Balsa* classes. We put this into the + // header file as we want this class to be inlined into the + // BalsaHeaders implementation, yet be testable. + friend class BalsaBufferTestSpouse; + friend class BalsaHeaders; + + // The BufferBlock is a structure used internally by the + // BalsaBuffer class to store the base buffer pointers to + // each block, as well as the important metadata for buffer + // sizes and bytes free. + struct BufferBlock { + public: + char* buffer; + size_t buffer_size; + size_t bytes_free; + + size_t bytes_used() const { + return buffer_size - bytes_free; + } + char* start_of_unused_bytes() const { + return buffer + bytes_used(); + } + + BufferBlock() : buffer(NULL), buffer_size(0), bytes_free(0) {} + ~BufferBlock() {} + + BufferBlock(char* buf, size_t size, size_t free) : + buffer(buf), buffer_size(size), bytes_free(free) {} + // Yes we want this to be copyable (it gets stuck into vectors). + // For this reason, we don't use scoped ptrs, etc. here-- it + // is more efficient to manage this memory externally to this + // object. + }; + + typedef vector<BufferBlock> Blocks; + + ~BalsaBuffer() { + CleanupBlocksStartingFrom(0); + } + + // Returns the total amount of memory used by the buffer blocks. + size_t GetTotalBufferBlockSize() const { + size_t buffer_size = 0; + for (Blocks::const_iterator iter = blocks_.begin(); + iter != blocks_.end(); + ++iter) { + buffer_size += iter->buffer_size; + } + return buffer_size; + } + + const char* GetPtr(Blocks::size_type block_idx) const { + DCHECK_LT(block_idx, blocks_.size()) + << block_idx << ", " << blocks_.size(); + return blocks_[block_idx].buffer; + } + + char* GetPtr(Blocks::size_type block_idx) { + DCHECK_LT(block_idx, blocks_.size()) + << block_idx << ", " << blocks_.size(); + return blocks_[block_idx].buffer; + } + + // This function is different from Write(), as it ensures that the data + // stored via subsequent calls to this function are all contiguous (and in + // the order in which these writes happened). This is essentially the same + // as a string append. + // + // You may call this function at any time between object + // construction/Clear(), and the calling of the + // NoMoreWriteToContiguousBuffer() function. + // + // You must not call this function after the NoMoreWriteToContiguousBuffer() + // function is called, unless a Clear() has been called since. + // If you do, the program will abort(). + // + // This condition is placed upon this code so that calls to Write() can + // append to the buffer in the first block safely, and without invaliding + // the StringPiece which it returns. + // + // This function's main intended user is the BalsaFrame class, which, + // for reasons of efficiency, requires that the buffer from which it parses + // the headers be contiguous. + // + void WriteToContiguousBuffer(const StringPiece& sp) { + if (sp.empty()) { + return; + } + CHECK(can_write_to_contiguous_buffer_); + DCHECK_GE(blocks_.size(), 1); + if (blocks_[0].buffer == NULL && sp.size() <= blocksize_) { + blocks_[0] = AllocBlock(); + memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size()); + } else if (blocks_[0].bytes_free < sp.size()) { + // the first block isn't big enough, resize it. + const size_t old_storage_size_used = blocks_[0].bytes_used(); + const size_t new_storage_size = old_storage_size_used + sp.size(); + char* new_storage = new char[new_storage_size]; + char* old_storage = blocks_[0].buffer; + if (old_storage_size_used) { + memcpy(new_storage, old_storage, old_storage_size_used); + } + memcpy(new_storage + old_storage_size_used, sp.data(), sp.size()); + blocks_[0].buffer = new_storage; + blocks_[0].bytes_free = sp.size(); + blocks_[0].buffer_size = new_storage_size; + delete[] old_storage; + } else { + memcpy(blocks_[0].start_of_unused_bytes(), sp.data(), sp.size()); + } + blocks_[0].bytes_free -= sp.size(); + } + + void NoMoreWriteToContiguousBuffer() { + can_write_to_contiguous_buffer_ = false; + } + + // Takes a StringPiece and writes it to "permanent" storage, then returns a + // StringPiece which points to that data. If block_idx != NULL, it will be + // assigned the index of the block into which the data was stored. + // Note that the 'permanent' storage in which it stores data may be in + // the first block IFF the NoMoreWriteToContiguousBuffer function has + // been called since the last Clear/Construction. + StringPiece Write(const StringPiece& sp, + Blocks::size_type* block_buffer_idx) { + if (sp.empty()) { + return sp; + } + char* storage = Reserve(sp.size(), block_buffer_idx); + memcpy(storage, sp.data(), sp.size()); + return StringPiece(storage, sp.size()); + } + + // Reserves "permanent" storage of the size indicated. Returns a pointer to + // the beginning of that storage, and assigns the index of the block used to + // block_buffer_idx. This function uses the first block IFF the + // NoMoreWriteToContiguousBuffer function has been called since the last + // Clear/Construction. + char* Reserve(size_t size, + Blocks::size_type* block_buffer_idx) { + // There should always be a 'first_block', even if it + // contains nothing. + DCHECK_GE(blocks_.size(), 1); + BufferBlock* block = NULL; + Blocks::size_type block_idx = can_write_to_contiguous_buffer_ ? 1 : 0; + for (; block_idx < blocks_.size(); ++block_idx) { + if (blocks_[block_idx].bytes_free >= size) { + block = &blocks_[block_idx]; + break; + } + } + if (block == NULL) { + if (blocksize_ < size) { + blocks_.push_back(AllocCustomBlock(size)); + } else { + blocks_.push_back(AllocBlock()); + } + block = &blocks_.back(); + } + + char* storage = block->start_of_unused_bytes(); + block->bytes_free -= size; + if (block_buffer_idx) { + *block_buffer_idx = block_idx; + } + return storage; + } + + void Clear() { + CHECK(!blocks_.empty()); + if (blocksize_ == blocks_[0].buffer_size) { + CleanupBlocksStartingFrom(1); + blocks_[0].bytes_free = blocks_[0].buffer_size; + } else { + CleanupBlocksStartingFrom(0); + blocks_.push_back(AllocBlock()); + } + DCHECK_GE(blocks_.size(), 1); + can_write_to_contiguous_buffer_ = true; + } + + void Swap(BalsaBuffer* b) { + blocks_.swap(b->blocks_); + swap(can_write_to_contiguous_buffer_, b->can_write_to_contiguous_buffer_); + swap(blocksize_, b->blocksize_); + } + + void CopyFrom(const BalsaBuffer& b) { + CleanupBlocksStartingFrom(0); + blocks_.resize(b.blocks_.size()); + for (Blocks::size_type i = 0; i < blocks_.size(); ++i) { + blocks_[i] = CopyBlock(b.blocks_[i]); + } + blocksize_ = b.blocksize_; + can_write_to_contiguous_buffer_ = b.can_write_to_contiguous_buffer_; + } + + const char* StartOfFirstBlock() const { + return blocks_[0].buffer; + } + + const char* EndOfFirstBlock() const { + return blocks_[0].buffer + blocks_[0].bytes_used(); + } + + bool can_write_to_contiguous_buffer() const { + return can_write_to_contiguous_buffer_; + } + size_t blocksize() const { return blocksize_; } + Blocks::size_type num_blocks() const { return blocks_.size(); } + size_t buffer_size(size_t idx) const { return blocks_[idx].buffer_size; } + size_t bytes_used(size_t idx) const { return blocks_[idx].bytes_used(); } + + protected: + BalsaBuffer() : + blocksize_(kDefaultBlocksize), can_write_to_contiguous_buffer_(true) { + blocks_.push_back(AllocBlock()); + } + + explicit BalsaBuffer(size_t blocksize) : + blocksize_(blocksize), can_write_to_contiguous_buffer_(true) { + blocks_.push_back(AllocBlock()); + } + + BufferBlock AllocBlock() { + return AllocCustomBlock(blocksize_); + } + + BufferBlock AllocCustomBlock(size_t blocksize) { + return BufferBlock(new char[blocksize], blocksize, blocksize); + } + + BufferBlock CopyBlock(const BufferBlock& b) { + BufferBlock block = b; + if (b.buffer == NULL) { + return block; + } + + block.buffer = new char[b.buffer_size]; + memcpy(block.buffer, b.buffer, b.bytes_used()); + return block; + } + + // Cleans up the object. + // The block at start_idx, and all subsequent blocks + // will be cleared and have associated memory deleted. + void CleanupBlocksStartingFrom(Blocks::size_type start_idx) { + for (Blocks::size_type i = start_idx; i < blocks_.size(); ++i) { + delete[] blocks_[i].buffer; + } + blocks_.resize(start_idx); + } + + // A container of BufferBlocks + Blocks blocks_; + + // The default allocation size for a block. + // In general, blocksize_ bytes will be allocated for + // each buffer. + size_t blocksize_; + + // If set to true, then the first block cannot be used for Write() calls as + // the WriteToContiguous... function will modify the base pointer for this + // block, and the Write() calls need to be sure that the base pointer will + // not be changing in order to provide the user with StringPieces which + // continue to be valid. + bool can_write_to_contiguous_buffer_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +// All of the functions in the BalsaHeaders class use string pieces, by either +// using the StringPiece class, or giving an explicit size and char* (as these +// are the native representation for these string pieces). +// This is done for several reasons. +// 1) This minimizes copying/allocation/deallocation as compared to using +// string parameters +// 2) This reduces the number of strlen() calls done (as the length of any +// string passed in is relatively likely to be known at compile time, and for +// those strings passed back we obviate the need for a strlen() to determine +// the size of new storage allocations if a new allocation is required. +// 3) This class attempts to store all of its data in two linear buffers in +// order to enhance the speed of parsing and writing out to a buffer. As a +// result, many string pieces are -not- terminated by '\0', and are not +// c-strings. Since this is the case, we must delineate the length of the +// string explicitly via a length. +// +// WARNING: The side effect of using StringPiece is that if the underlying +// buffer changes (due to modifying the headers) the StringPieces which point +// to the data which was modified, may now contain "garbage", and should not +// be dereferenced. +// For example, If you fetch some component of the first-line, (request or +// response), and then you modify the first line, the StringPieces you +// originally received from the original first-line may no longer be valid). +// +// StringPieces pointing to pieces of header lines which have not been +// erased() or modified should be valid until the object is cleared or +// destroyed. + +class BalsaHeaders { + public: + struct HeaderLineDescription { + HeaderLineDescription(size_t first_character_index, + size_t key_end_index, + size_t value_begin_index, + size_t last_character_index, + size_t buffer_base_index) : + first_char_idx(first_character_index), + key_end_idx(key_end_index), + value_begin_idx(value_begin_index), + last_char_idx(last_character_index), + buffer_base_idx(buffer_base_index), + skip(false) {} + + HeaderLineDescription() : + first_char_idx(0), + key_end_idx(0), + value_begin_idx(0), + last_char_idx(0), + buffer_base_idx(0), + skip(false) {} + + size_t first_char_idx; + size_t key_end_idx; + size_t value_begin_idx; + size_t last_char_idx; + BalsaBuffer::Blocks::size_type buffer_base_idx; + bool skip; + }; + + typedef vector<StringPiece> HeaderTokenList; + friend bool gfe2::ParseHTTPFirstLine(const char* begin, + const char* end, + bool is_request, + size_t max_request_uri_length, + BalsaHeaders* headers, + BalsaFrameEnums::ErrorCode* error_code); + + protected: + typedef vector<HeaderLineDescription> HeaderLines; + + // Why these base classes (iterator_base, reverse_iterator_base)? Well, if + // we do want to export both iterator and const_iterator types (currently we + // only have const_iterator), then this is useful to avoid code duplication. + // Additionally, having this base class makes comparisons of iterators of + // different types (they're different types to ensure that operator= and + // constructors do not work in the places where they're expected to not work) + // work properly. There could be as many as 4 iterator types, all based on + // the same data as iterator_base... so it makes sense to simply have some + // base classes. + + class iterator_base { + public: + friend class BalsaHeaders; + friend class reverse_iterator_base; + typedef pair<StringPiece, StringPiece> StringPiecePair; + typedef StringPiecePair value_type; + typedef value_type& reference; + typedef value_type* pointer; + + typedef std::forward_iterator_tag iterator_category; + typedef ptrdiff_t difference_type; + + typedef iterator_base self; + + // default constructor. + iterator_base() : headers_(NULL), idx_(0) { } + + // copy constructor. + iterator_base(const iterator_base& it) + : headers_(it.headers_), + idx_(it.idx_) {} + + reference operator*() const { + return Lookup(idx_); + } + + pointer operator->() const { + return &(this->operator*()); + } + + bool operator==(const self& it) const { + return idx_ == it.idx_; + } + + bool operator<(const self& it) const { + return idx_ < it.idx_; + } + + bool operator<=(const self& it) const { + return idx_ <= it.idx_; + } + + bool operator!=(const self& it) const { + return !(*this == it); + } + + bool operator>(const self& it) const { + return it < *this; + } + + bool operator>=(const self& it) const { + return it <= *this; + } + + // This mainly exists so that we can have interesting output for + // unittesting. The EXPECT_EQ, EXPECT_NE functions require that + // operator<< work for the classes it sees. It would be better if there + // was an additional traits-like system for the gUnit output... but oh + // well. + friend ostream& operator<<(ostream& os, const iterator_base& it) { + os << "[" << it.headers_ << ", " << it.idx_ << "]"; + return os; + } + + protected: + iterator_base(const BalsaHeaders* headers, HeaderLines::size_type index) : + headers_(headers), + idx_(index) {} + + void increment() { + const HeaderLines& header_lines = headers_->header_lines_; + const HeaderLines::size_type header_lines_size = header_lines.size(); + const HeaderLines::size_type original_idx = idx_; + do { + ++idx_; + } while (idx_ < header_lines_size && header_lines[idx_].skip == true); + // The condition below exists so that ++(end() - 1) == end(), even + // if there are only 'skip == true' elements between the end() iterator + // and the end of the vector of HeaderLineDescriptions. + // TODO(fenix): refactor this list so that we don't have to do + // linear scanning through skipped headers (and this condition is + // then unnecessary) + if (idx_ == header_lines_size) { + idx_ = original_idx + 1; + } + } + + void decrement() { + const HeaderLines& header_lines = headers_->header_lines_; + const HeaderLines::size_type header_lines_size = header_lines.size(); + const HeaderLines::size_type original_idx = idx_; + do { + --idx_; + } while (idx_ >= 0 && + idx_ < header_lines_size && + header_lines[idx_].skip == true); + // The condition below exists so that --(rbegin() + 1) == rbegin(), even + // if there are only 'skip == true' elements between the rbegin() iterator + // and the beginning of the vector of HeaderLineDescriptions. + // TODO(fenix): refactor this list so that we don't have to do + // linear scanning through skipped headers (and this condition is + // then unnecessary) + if (idx_ < 0 || idx_ > header_lines_size) { + idx_ = original_idx - 1; + } + } + + reference Lookup(HeaderLines::size_type index) const { + DCHECK_LT(index, headers_->header_lines_.size()); + const HeaderLineDescription& line = headers_->header_lines_[index]; + const char* stream_begin = headers_->GetPtr(line.buffer_base_idx); + value_ = value_type( + StringPiece(stream_begin + line.first_char_idx, + line.key_end_idx - line.first_char_idx), + StringPiece(stream_begin + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx)); + DCHECK_GE(line.key_end_idx, line.first_char_idx); + DCHECK_GE(line.last_char_idx, line.value_begin_idx); + return value_; + } + + const BalsaHeaders* headers_; + HeaderLines::size_type idx_; + mutable StringPiecePair value_; + }; + + class reverse_iterator_base : public iterator_base { + public: + typedef reverse_iterator_base self; + typedef iterator_base::reference reference; + typedef iterator_base::pointer pointer; + using iterator_base::headers_; + using iterator_base::idx_; + + reverse_iterator_base() : iterator_base() {} + + // This constructor is no explicit purposely. + reverse_iterator_base(const iterator_base& it) : // NOLINT + iterator_base(it) { + } + + self& operator=(const iterator_base& it) { + idx_ = it.idx_; + headers_ = it.headers_; + return *this; + } + + self& operator=(const reverse_iterator_base& it) { + idx_ = it.idx_; + headers_ = it.headers_; + return *this; + } + + reference operator*() const { + return Lookup(idx_ - 1); + } + + pointer operator->() const { + return &(this->operator*()); + } + + reverse_iterator_base(const reverse_iterator_base& it) : + iterator_base(it) { } + + protected: + void increment() { + --idx_; + iterator_base::decrement(); + ++idx_; + } + + void decrement() { + ++idx_; + iterator_base::increment(); + --idx_; + } + + reverse_iterator_base(const BalsaHeaders* headers, + HeaderLines::size_type index) : + iterator_base(headers, index) {} + }; + + public: + class const_header_lines_iterator : public iterator_base { + friend class BalsaHeaders; + public: + typedef const_header_lines_iterator self; + const_header_lines_iterator() : iterator_base() {} + + const_header_lines_iterator(const const_header_lines_iterator& it) : + iterator_base(it.headers_, it.idx_) {} + + self& operator++() { + iterator_base::increment(); + return *this; + } + + self& operator--() { + iterator_base::decrement(); + return *this; + } + protected: + const_header_lines_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index) : + iterator_base(headers, index) {} + }; + + class const_reverse_header_lines_iterator : public reverse_iterator_base { + public: + typedef const_reverse_header_lines_iterator self; + const_reverse_header_lines_iterator() : reverse_iterator_base() {} + + const_reverse_header_lines_iterator( + const const_header_lines_iterator& it) : + reverse_iterator_base(it.headers_, it.idx_) {} + + const_reverse_header_lines_iterator( + const const_reverse_header_lines_iterator& it) : + reverse_iterator_base(it.headers_, it.idx_) {} + + const_header_lines_iterator base() { + return const_header_lines_iterator(headers_, idx_); + } + + self& operator++() { + reverse_iterator_base::increment(); + return *this; + } + + self& operator--() { + reverse_iterator_base::decrement(); + return *this; + } + protected: + const_reverse_header_lines_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index) : + reverse_iterator_base(headers, index) {} + + friend class BalsaHeaders; + }; + + // An iterator that only stops at lines with a particular key. + // See also GetIteratorForKey. + // + // Check against header_lines_key_end() to determine when iteration is + // finished. header_lines_end() will also work. + class const_header_lines_key_iterator : public iterator_base { + friend class BalsaHeaders; + public: + typedef const_header_lines_key_iterator self; + + self& operator++() { + do { + iterator_base::increment(); + } while (!AtEnd() && + !StringPieceUtils::EqualIgnoreCase(key_, (**this).first)); + return *this; + } + + void operator++(int ignore) { + ++(*this); + } + + // Only forward-iteration makes sense, so no operator-- defined. + + private: + const_header_lines_key_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index, + const StringPiece& key) + : iterator_base(headers, index), + key_(key) { + } + + // Should only be used for creating an end iterator. + const_header_lines_key_iterator(const BalsaHeaders* headers, + HeaderLines::size_type index) + : iterator_base(headers, index) { + } + + bool AtEnd() const { + return *this >= headers_->header_lines_end(); + } + + StringPiece key_; + }; + + // TODO(fenix): Revisit the amount of bytes initially allocated to the second + // block of the balsa_buffer_. It may make sense to pre-allocate some amount + // (roughly the amount we'd append in new headers such as X-User-Ip, etc.) + BalsaHeaders() : + balsa_buffer_(4096), + content_length_(0), + content_length_status_(BalsaHeadersEnums::NO_CONTENT_LENGTH), + parsed_response_code_(0), + firstline_buffer_base_idx_(0), + whitespace_1_idx_(0), + non_whitespace_1_idx_(0), + whitespace_2_idx_(0), + non_whitespace_2_idx_(0), + whitespace_3_idx_(0), + non_whitespace_3_idx_(0), + whitespace_4_idx_(0), + end_of_firstline_idx_(0), + transfer_encoding_is_chunked_(false) { } + + const_header_lines_iterator header_lines_begin() { + return HeaderLinesBeginHelper<const_header_lines_iterator>(); + } + + const_header_lines_iterator header_lines_begin() const { + return HeaderLinesBeginHelper<const_header_lines_iterator>(); + } + + const_header_lines_iterator header_lines_end() { + return HeaderLinesEndHelper<const_header_lines_iterator>(); + } + + const_header_lines_iterator header_lines_end() const { + return HeaderLinesEndHelper<const_header_lines_iterator>(); + } + + const_reverse_header_lines_iterator header_lines_rbegin() { + return const_reverse_header_lines_iterator(header_lines_end()); + } + + const_reverse_header_lines_iterator header_lines_rbegin() const { + return const_reverse_header_lines_iterator(header_lines_end()); + } + + const_reverse_header_lines_iterator header_lines_rend() { + return const_reverse_header_lines_iterator(header_lines_begin()); + } + + const_reverse_header_lines_iterator header_lines_rend() const { + return const_reverse_header_lines_iterator(header_lines_begin()); + } + + const_header_lines_key_iterator header_lines_key_end() const { + return HeaderLinesEndHelper<const_header_lines_key_iterator>(); + } + + void erase(const const_header_lines_iterator& it) { + DCHECK_EQ(it.headers_, this); + DCHECK_LT(it.idx_, header_lines_.size()); + DCHECK_GE(it.idx_, 0); + header_lines_[it.idx_].skip = true; + } + + void Clear(); + + void Swap(BalsaHeaders* other); + + void CopyFrom(const BalsaHeaders& other); + + // Exists until it is allowable to turn on header-order + // computation in the X-Google-GFE-Frontline-Info header, instead + // of requiring GFEv2 to output in the same header order in + // which it has received data. + // TODO(fenix): remove this function and rename all occurances + // of it in the code to AppendHeader when the condition above + // has been satisified. + void HackHeader(const StringPiece& key, const StringPiece& value); + + // Same as AppendToHeader, except that it will attempt to preserve + // header ordering. + // Note that this will always append to an existing header, if available, + // without moving the header around, or collapsing multiple header lines + // with the same key together. For this reason, it only 'attempts' to + // preserve header ordering. + // TODO(fenix): remove this function and rename all occurances + // of it in the code to AppendToHeader when the condition above + // has been satisified. + void HackAppendToHeader(const StringPiece& key, const StringPiece& value); + + // Replaces header entries with key 'key' if they exist, or appends + // a new header if none exist. See 'AppendHeader' below for additional + // comments about ContentLength and TransferEncoding headers. Note that this + // will allocate new storage every time that it is called. + // TODO(fenix): modify this function to reuse existing storage + // if it is available. + void ReplaceOrAppendHeader(const StringPiece& key, const StringPiece& value); + + // Append a new header entry to the header object. Clients who wish to append + // Content-Length header should use SetContentLength() method instead of + // adding the content length header using AppendHeader (manually adding the + // content length header will not update the content_length_ and + // content_length_status_ values). + // Similarly, clients who wish to add or remove the transfer encoding header + // in order to apply or remove chunked encoding should use SetChunkEncoding() + // instead. + void AppendHeader(const StringPiece& key, const StringPiece& value); + + // Appends ',value' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + void AppendToHeader(const StringPiece& key, const StringPiece& value); + + // Prepends 'value,' to an existing header named 'key'. If no header with the + // correct key exists, it will call AppendHeader(key, value). Calling this + // function on a key which exists several times in the headers will produce + // unpredictable results. + void PrependToHeader(const StringPiece& key, const StringPiece& value); + + const StringPiece GetHeader(const StringPiece& key) const; + + // Iterates over all currently valid header lines, appending their + // values into the vector 'out', in top-to-bottom order. + // Header-lines which have been erased are not currently valid, and + // will not have their values appended. Empty values will be + // represented as empty string. If 'key' doesn't exist in the headers at + // all, out will not be changed. We do not clear the vector out + // before adding new entries. If there are header lines with matching + // key but empty value then they are also added to the vector out. + // (Basically empty values are not treated in any special manner). + // + // Example: + // Input header: + // "GET / HTTP/1.0\r\n" + // "key1: v1\r\n" + // "key1: \r\n" + // "key1:\r\n" + // "key1: v1\r\n" + // "key1:v2\r\n" + // + // vector out is initially: ["foo"] + // vector out after GetAllOfHeader("key1", &out) is: + // ["foo", "v1", "", "", "v2", "v1", "v2"] + + void GetAllOfHeader(const StringPiece& key, vector<StringPiece>* out) const; + + // Joins all values for key into a comma-separated string in out. + // More efficient than calling JoinStrings on result of GetAllOfHeader if + // you don't need the intermediate vector<StringPiece>. + void GetAllOfHeaderAsString(const StringPiece& key, string* out) const; + + // Returns true if RFC 2616 Section 14 indicates that header can + // have multiple values. + static bool IsMultivaluedHeader(const StringPiece& header); + + // Determine if a given header is present. + inline bool HasHeader(const StringPiece& key) const { + return (GetConstHeaderLinesIterator(key, header_lines_.begin()) != + header_lines_.end()); + } + + // Returns true iff any header 'key' exists with non-empty value. + bool HasNonEmptyHeader(const StringPiece& key) const; + + const_header_lines_iterator GetHeaderPosition(const StringPiece& key) const; + + // Returns a forward-only iterator that only stops at lines matching key. + // String backing 'key' must remain valid for lifetime of iterator. + // + // Check returned iterator against header_lines_key_end() to determine when + // iteration is finished. + const_header_lines_key_iterator GetIteratorForKey( + const StringPiece& key) const; + + void RemoveAllOfHeader(const StringPiece& key); + + // Removes all headers starting with 'key' [case insensitive] + void RemoveAllHeadersWithPrefix(const StringPiece& key); + + // Returns the lower bound of memory used by this header object, including + // all internal buffers and data structure. Some of the memory used cannot be + // directly measure. For example, memory used for bookkeeping by standard + // containers. + size_t GetMemoryUsedLowerBound() const; + + // Returns the upper bound on the required buffer space to fully write out + // the header object (this include the first line, all header lines, and the + // final CRLF that marks the ending of the header). + size_t GetSizeForWriteBuffer() const; + + // The following WriteHeader* methods are template member functions that + // place one requirement on the Buffer class: it must implement a Write + // method that takes a pointer and a length. The buffer passed in is not + // required to be stretchable. For non-stretchable buffers, the user must + // call GetSizeForWriteBuffer() to find out the upper bound on the output + // buffer space required to make sure that the entire header is serialized. + // BalsaHeaders will not check that there is adequate space in the buffer + // object during the write. + + // Writes the entire header and the final CRLF that marks the end of the HTTP + // header section to the buffer. After this method returns, no more header + // data should be written to the buffer. + template <typename Buffer> + void WriteHeaderAndEndingToBuffer(Buffer* buffer) const { + WriteToBuffer(buffer); + WriteHeaderEndingToBuffer(buffer); + } + + // Writes the final CRLF to the buffer to terminate the HTTP header section. + // After this method returns, no more header data should be written to the + // buffer. + template <typename Buffer> + static void WriteHeaderEndingToBuffer(Buffer* buffer) { + buffer->Write("\r\n", 2); + } + + // Writes the entire header to the buffer without the CRLF that terminates + // the HTTP header. This lets users append additional header lines using + // WriteHeaderLineToBuffer and then terminate the header with + // WriteHeaderEndingToBuffer as the header is serialized to the + // buffer, without having to first copy the header. + template <typename Buffer> + void WriteToBuffer(Buffer* buffer) const { + // write the first line. + const size_t firstline_len = whitespace_4_idx_ - non_whitespace_1_idx_; + const char* stream_begin = GetPtr(firstline_buffer_base_idx_); + buffer->Write(stream_begin + non_whitespace_1_idx_, firstline_len); + buffer->Write("\r\n", 2); + const HeaderLines::size_type end = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < end; ++i) { + const HeaderLineDescription& line = header_lines_[i]; + if (line.skip) { + continue; + } + const char* line_ptr = GetPtr(line.buffer_base_idx); + WriteHeaderLineToBuffer( + buffer, + StringPiece(line_ptr + line.first_char_idx, + line.key_end_idx - line.first_char_idx), + StringPiece(line_ptr + line.value_begin_idx, + line.last_char_idx - line.value_begin_idx)); + } + } + + // Takes a header line in the form of a key/value pair and append it to the + // buffer. This function should be called after WriteToBuffer to + // append additional header lines to the header without copying the header. + // When the user is done with appending to the buffer, + // WriteHeaderEndingToBuffer must be used to terminate the HTTP + // header in the buffer. This method is a no-op if key is empty. + template <typename Buffer> + static void WriteHeaderLineToBuffer(Buffer* buffer, + const StringPiece& key, + const StringPiece& value) { + // if the key is empty, we don't want to write the rest because it + // will not be a well-formed header line. + if (key.size() > 0) { + buffer->Write(key.data(), key.size()); + buffer->Write(": ", 2); + buffer->Write(value.data(), value.size()); + buffer->Write("\r\n", 2); + } + } + + // Dump the textural representation of the header object to a string, which + // is suitable for writing out to logs. All CRLF will be printed out as \n. + // This function can be called on a header object in any state. Raw header + // data will be printed out if the header object is not completely parsed, + // e.g., when there was an error in the middle of parsing. + // The header content is appended to the string; the original content is not + // cleared. + void DumpToString(string* str) const; + + const StringPiece first_line() const { + DCHECK_GE(whitespace_4_idx_, non_whitespace_1_idx_); + return StringPiece(BeginningOfFirstLine() + non_whitespace_1_idx_, + whitespace_4_idx_ - non_whitespace_1_idx_); + } + + // Returns the parsed value of the response code if it has been parsed. + // Guaranteed to return 0 when unparsed (though it is a much better idea to + // verify that the BalsaFrame had no errors while parsing). + // This may return response codes which are outside the normal bounds of + // HTTP response codes-- it is up to the user of this class to ensure that + // the response code is one which is interpretable. + size_t parsed_response_code() const { return parsed_response_code_; } + + const StringPiece request_method() const { + DCHECK_GE(whitespace_2_idx_, non_whitespace_1_idx_); + return StringPiece(BeginningOfFirstLine() + non_whitespace_1_idx_, + whitespace_2_idx_ - non_whitespace_1_idx_); + } + + const StringPiece response_version() const { + // Note: There is no difference between request_method() and + // response_version(). They both could be called + // GetFirstTokenFromFirstline()... but that wouldn't be anywhere near as + // descriptive. + return request_method(); + } + + const StringPiece request_uri() const { + DCHECK_GE(whitespace_3_idx_, non_whitespace_2_idx_); + return StringPiece(BeginningOfFirstLine() + non_whitespace_2_idx_, + whitespace_3_idx_ - non_whitespace_2_idx_); + } + + const StringPiece response_code() const { + // Note: There is no difference between request_uri() and response_code(). + // They both could be called GetSecondtTokenFromFirstline(), but, as noted + // in an earlier comment, that wouldn't be as descriptive. + return request_uri(); + } + + const StringPiece request_version() const { + DCHECK_GE(whitespace_4_idx_, non_whitespace_3_idx_); + return StringPiece(BeginningOfFirstLine() + non_whitespace_3_idx_, + whitespace_4_idx_ - non_whitespace_3_idx_); + } + + const StringPiece response_reason_phrase() const { + // Note: There is no difference between request_version() and + // response_reason_phrase(). They both could be called + // GetThirdTokenFromFirstline(), but, as noted in an earlier comment, that + // wouldn't be as descriptive. + return request_version(); + } + + // Note that SetFirstLine will not update the internal indices for the + // various bits of the first-line (and may set them all to zero). + // If you'd like to use the accessors for the various bits of the firstline, + // then you should use the Set* functions, or SetFirstlineFromStringPieces, + // below, instead. + // + void SetFirstlineFromStringPieces(const StringPiece& firstline_a, + const StringPiece& firstline_b, + const StringPiece& firstline_c); + + void SetRequestFirstlineFromStringPieces(const StringPiece& method, + const StringPiece& uri, + const StringPiece& version) { + SetFirstlineFromStringPieces(method, uri, version); + } + + void SetResponseFirstlineFromStringPieces(const StringPiece& version, + const StringPiece& code, + const StringPiece& reason_phrase) { + SetFirstlineFromStringPieces(version, code, reason_phrase); + } + + // These functions are exactly the same, except that their names are + // different. This is done so that the code using this class is more + // expressive. + void SetRequestMethod(const StringPiece& method); + void SetResponseVersion(const StringPiece& version); + + void SetRequestUri(const StringPiece& uri); + void SetResponseCode(const StringPiece& code); + void set_parsed_response_code(size_t parsed_response_code) { + parsed_response_code_ = parsed_response_code; + } + void SetParsedResponseCodeAndUpdateFirstline(size_t parsed_response_code); + + // These functions are exactly the same, except that their names are + // different. This is done so that the code using this class is more + // expressive. + void SetRequestVersion(const StringPiece& version); + void SetResponseReasonPhrase(const StringPiece& reason_phrase); + + // The biggest problem with SetFirstLine is that we don't want to use a + // separate buffer for it. The second biggest problem with it is that the + // first biggest problem requires that we store offsets into a buffer instead + // of pointers into a buffer. Cuteness aside, SetFirstLine doesn't parse + // the individual fields of the firstline, and so accessors to those fields + // will not work properly after calling SetFirstLine. If you want those + // accessors to work, use the Set* functions above this one. + // SetFirstLine is stuff useful, however, if all you care about is correct + // serialization with the rest of the header object. + void SetFirstLine(const StringPiece& line); + + // Simple accessors to some of the internal state + bool transfer_encoding_is_chunked() const { + return transfer_encoding_is_chunked_; + } + + static bool ResponseCodeImpliesNoBody(int code) { + // From HTTP spec section 6.1.1 all 1xx responses must not have a body, + // as well as 204 No Content and 304 Not Modified. + return ((code >= 100) && (code <= 199)) || (code == 204) || (code == 304); + } + + // Note: never check this for requests. Nothing bad will happen if you do, + // but spec does not allow requests framed by connection close. + // TODO(vitaliyl): refactor. + bool is_framed_by_connection_close() const { + // We declare that response is framed by connection close if it has no + // content-length, no transfer encoding, and is allowed to have a body by + // the HTTP spec. + // parsed_response_code_ is 0 for requests, so ResponseCodeImpliesNoBody + // will return false. + return (content_length_status_ == BalsaHeadersEnums::NO_CONTENT_LENGTH) && + !transfer_encoding_is_chunked_ && + !ResponseCodeImpliesNoBody(parsed_response_code_); + } + + size_t content_length() const { return content_length_; } + BalsaHeadersEnums::ContentLengthStatus content_length_status() const { + return content_length_status_; + } + + // SetContentLength and SetChunkEncoding modifies the header object to use + // content-length and transfer-encoding headers in a consistent manner. They + // set all internal flags and status so client can get a consistent view from + // various accessors. + void SetContentLength(size_t length); + void SetChunkEncoding(bool chunk_encode); + + protected: + friend class BalsaFrame; + friend class FlipFrame; + friend class HTTPMessage; + friend class BalsaHeadersTokenUtils; + + const char* BeginningOfFirstLine() const { + return GetPtr(firstline_buffer_base_idx_); + } + + char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) { + return balsa_buffer_.GetPtr(block_idx); + } + + const char* GetPtr(BalsaBuffer::Blocks::size_type block_idx) const { + return balsa_buffer_.GetPtr(block_idx); + } + + void WriteFromFramer(const char* ptr, size_t size) { + balsa_buffer_.WriteToContiguousBuffer(StringPiece(ptr, size)); + } + + void DoneWritingFromFramer() { + balsa_buffer_.NoMoreWriteToContiguousBuffer(); + } + + const char* OriginalHeaderStreamBegin() const { + return balsa_buffer_.StartOfFirstBlock(); + } + + const char* OriginalHeaderStreamEnd() const { + return balsa_buffer_.EndOfFirstBlock(); + } + + size_t GetReadableBytesFromHeaderStream() const { + return OriginalHeaderStreamEnd() - OriginalHeaderStreamBegin(); + } + + void GetReadablePtrFromHeaderStream(const char** p, size_t* s) { + *p = OriginalHeaderStreamBegin(); + *s = GetReadableBytesFromHeaderStream(); + } + + StringPiece GetValueFromHeaderLineDescription( + const HeaderLineDescription& line) const; + + void AddAndMakeDescription(const StringPiece& key, + const StringPiece& value, + HeaderLineDescription* d); + + void AppendOrPrependAndMakeDescription(const StringPiece& key, + const StringPiece& value, + bool append, + HeaderLineDescription* d); + + // Removes all header lines with the given key starting at start. + void RemoveAllOfHeaderStartingAt(const StringPiece& key, + HeaderLines::iterator start); + + // If the 'key' does not exist in the headers, calls + // AppendHeader(key, value). Otherwise if append is true, appends ',value' + // to the first existing header with key 'key'. If append is false, prepends + // 'value,' to the first existing header with key 'key'. + void AppendOrPrependToHeader(const StringPiece& key, + const StringPiece& value, + bool append); + + HeaderLines::const_iterator GetConstHeaderLinesIterator( + const StringPiece& key, + HeaderLines::const_iterator start) const; + + HeaderLines::iterator GetHeaderLinesIteratorNoSkip( + const StringPiece& key, + HeaderLines::iterator start); + + HeaderLines::iterator GetHeaderLinesIterator( + const StringPiece& key, + HeaderLines::iterator start); + + template <typename IteratorType> + const IteratorType HeaderLinesBeginHelper() const { + if (header_lines_.empty()) { + return IteratorType(this, 0); + } + const HeaderLines::size_type header_lines_size = header_lines_.size(); + for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { + if (header_lines_[i].skip == false) { + return IteratorType(this, i); + } + } + return IteratorType(this, 0); + } + + template <typename IteratorType> + const IteratorType HeaderLinesEndHelper() const { + if (header_lines_.empty()) { + return IteratorType(this, 0); + } + const HeaderLines::size_type header_lines_size = header_lines_.size(); + HeaderLines::size_type i = header_lines_size; + do { + --i; + if (header_lines_[i].skip == false) { + return IteratorType(this, i + 1); + } + } while (i != 0); + return IteratorType(this, 0); + } + + // At the moment, this function will always return the original headers. + // In the future, it may not do so after erasing header lines, modifying + // header lines, or modifying the first line. + // For this reason, it is strongly suggested that use of this function is + // only acceptable for the purpose of debugging parse errors seen by the + // BalsaFrame class. + StringPiece OriginalHeadersForDebugging() const { + return StringPiece(OriginalHeaderStreamBegin(), + OriginalHeaderStreamEnd() - OriginalHeaderStreamBegin()); + } + + BalsaBuffer balsa_buffer_; + + size_t content_length_; + BalsaHeadersEnums::ContentLengthStatus content_length_status_; + size_t parsed_response_code_; + // HTTP firstlines all have the following structure: + // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF + // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n" + // ws1 nws1 ws2 nws2 ws3 nws3 ws4 + // | [-------) [-------) [----------------) + // REQ: method request_uri version + // RESP: version statuscode reason + // + // The first NONWS->LWS component we'll call firstline_a. + // The second firstline_b, and the third firstline_c. + // + // firstline_a goes from nws1 to (but not including) ws2 + // firstline_b goes from nws2 to (but not including) ws3 + // firstline_c goes from nws3 to (but not including) ws4 + // + // In the code: + // ws1 == whitespace_1_idx_ + // nws1 == non_whitespace_1_idx_ + // ws2 == whitespace_2_idx_ + // nws2 == non_whitespace_2_idx_ + // ws3 == whitespace_3_idx_ + // nws3 == non_whitespace_3_idx_ + // ws4 == whitespace_4_idx_ + BalsaBuffer::Blocks::size_type firstline_buffer_base_idx_; + size_t whitespace_1_idx_; + size_t non_whitespace_1_idx_; + size_t whitespace_2_idx_; + size_t non_whitespace_2_idx_; + size_t whitespace_3_idx_; + size_t non_whitespace_3_idx_; + size_t whitespace_4_idx_; + size_t end_of_firstline_idx_; + + bool transfer_encoding_is_chunked_; + + HeaderLines header_lines_; +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_H_ + diff --git a/net/tools/flip_server/balsa_headers_token_utils.cc b/net/tools/flip_server/balsa_headers_token_utils.cc new file mode 100644 index 0000000..5e27bd0 --- /dev/null +++ b/net/tools/flip_server/balsa_headers_token_utils.cc @@ -0,0 +1,143 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/balsa_headers_token_utils.h" + +#include "strings/stringpiece_utils.h" + +namespace gfe2 { + +inline void BalsaHeadersTokenUtils::TokenizeHeaderLine( + const BalsaHeaders& headers, + const BalsaHeaders::HeaderLineDescription& header_line, + BalsaHeaders::HeaderTokenList* tokens) { + CHECK(tokens); + + // Find where this line is stored + const char* stream_begin = headers.GetPtr(header_line.buffer_base_idx); + + // Determine the boundaries of the value + const char* value_begin = stream_begin + header_line.value_begin_idx; + const char* line_end = stream_begin + header_line.last_char_idx; + + // Tokenize + ParseTokenList(value_begin, line_end, tokens); +} + +void BalsaHeadersTokenUtils::RemoveLastTokenFromHeaderValue( + const StringPiece& key, BalsaHeaders* headers) { + BalsaHeaders::HeaderLines::iterator it = + headers->GetHeaderLinesIterator(key, headers->header_lines_.begin()); + if (it == headers->header_lines_.end()) { + DLOG(WARNING) << "Attempting to remove last token from a non-existent " + << "header \"" << key << "\""; + return; + } + + // Find the last line with that key. + BalsaHeaders::HeaderLines::iterator header_line; + do { + header_line = it; + it = headers->GetHeaderLinesIterator(key, it + 1); + } + while (it != headers->header_lines_.end()); + + // Tokenize just that line. + BalsaHeaders::HeaderTokenList tokens; + TokenizeHeaderLine(*headers, *header_line, &tokens); + + if (tokens.empty()) { + DLOG(WARNING) << "Attempting to remove a token from an empty header value " + << "for header \"" << key << "\""; + header_line->skip = true; // remove the whole line + } else if (tokens.size() == 1) { + header_line->skip = true; // remove the whole line + } else { + // Shrink the line size and leave the extra data in the buffer. + const StringPiece& new_last_token = tokens[tokens.size() - 2]; + const char* last_char_address = + new_last_token.data() + new_last_token.size() - 1; + const char* stream_begin = headers->GetPtr(header_line->buffer_base_idx); + + header_line->last_char_idx = last_char_address - stream_begin + 1; + } +} + +bool BalsaHeadersTokenUtils::CheckHeaderForLastToken( + const BalsaHeaders& headers, + const StringPiece& key, + const StringPiece& token) { + BalsaHeaders::const_header_lines_key_iterator it = + headers.GetIteratorForKey(key); + if (it == headers.header_lines_key_end()) + return false; + + // Find the last line + BalsaHeaders::const_header_lines_key_iterator header_line = it; + do { + header_line = it; + ++it; + } + while (it != headers.header_lines_key_end()); + + // Tokenize just that line + BalsaHeaders::HeaderTokenList tokens; + ParseTokenList(header_line->second.begin(), header_line->second.end(), + &tokens); + + return !tokens.empty() && + StringPieceUtils::StartsWithIgnoreCase(tokens.back(), token); +} + +void BalsaHeadersTokenUtils::TokenizeHeaderValue( + const BalsaHeaders& headers, + const StringPiece& key, + BalsaHeaders::HeaderTokenList* tokens) { + CHECK(tokens); + tokens->clear(); + + // We may have more then 1 line with the same header key. Tokenize them all + // and stick all the tokens into the same list. + for (BalsaHeaders::const_header_lines_key_iterator header_line = + headers.GetIteratorForKey(key); + header_line != headers.header_lines_key_end(); ++header_line) { + ParseTokenList(header_line->second.begin(), header_line->second.end(), + tokens); + } +} + +void BalsaHeadersTokenUtils::ParseTokenList( + const char* start, + const char* end, + BalsaHeaders::HeaderTokenList* tokens) { + if (start == end) { + return; + } + while (true) { + // search for first nonwhitespace, non separator char. + while (*start == ',' || *start <= ' ') { + ++start; + if (start == end) { + return; + } + } + // found. marked. + const char* nws = start; + + // search for next whitspace or separator char. + while (*start != ',' && *start > ' ') { + ++start; + if (start == end) { + if (nws != start) { + tokens->push_back(StringPiece(nws, start - nws)); + } + return; + } + } + tokens->push_back(StringPiece(nws, start - nws)); + } +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/balsa_headers_token_utils.h b/net/tools/flip_server/balsa_headers_token_utils.h new file mode 100644 index 0000000..450c089 --- /dev/null +++ b/net/tools/flip_server/balsa_headers_token_utils.h @@ -0,0 +1,61 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Utility class that performs basic operations on header value tokens: parsing +// them out, checking for presense of certain tokens, and removing them. + +#ifndef NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_TOKEN_UTILS_H_ +#define NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_TOKEN_UTILS_H_ + +#include "net/tools/flip_server/balsa_headers.h" +#include "strings/stringpiece.h" + +namespace gfe2 { + +class BalsaHeadersTokenUtils { + public: + // All the functions below respect multiple header lines with the same key. + + // Checks whether the last header token matches a given value. Useful to + // check the outer-most content or transfer-encoding, for example. In the + // presence of multiple header lines with given key, the last token of the + // last line is compared. + static bool CheckHeaderForLastToken(const BalsaHeaders& headers, + const StringPiece& key, + const StringPiece& token); + + // Tokenizes header value for a given key. In the presence of multiple lines + // with that key, all of them will be tokenized and tokens will be added to + // the list in the order in which they are encountered. + static void TokenizeHeaderValue(const BalsaHeaders& headers, + const StringPiece& key, + BalsaHeaders::HeaderTokenList* tokens); + + // Removes the last token from the header value. In the presence of multiple + // header lines with given key, will remove the last token of the last line. + // Can be useful if the last encoding has to be removed. + static void RemoveLastTokenFromHeaderValue(const StringPiece& key, + BalsaHeaders* headers); + + // Given a pointer to the beginning and the end of the header value + // in some buffer, populates tokens list with beginning and end indices + // of all tokens present in the value string. + static void ParseTokenList(const char* start, + const char* end, + BalsaHeaders::HeaderTokenList* tokens); + + private: + // Helper function to tokenize a header line once we have its description. + static void TokenizeHeaderLine( + const BalsaHeaders& headers, + const BalsaHeaders::HeaderLineDescription& line, + BalsaHeaders::HeaderTokenList* tokens); + + BalsaHeadersTokenUtils(); // Prohibit instantiation +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_BALSA_HEADERS_TOKEN_UTILS_H_ + diff --git a/net/tools/flip_server/balsa_visitor_interface.h b/net/tools/flip_server/balsa_visitor_interface.h new file mode 100644 index 0000000..be6fa98 --- /dev/null +++ b/net/tools/flip_server/balsa_visitor_interface.h @@ -0,0 +1,181 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_BALSA_VISITOR_INTERFACE_H_ +#define NET_TOOLS_FLIP_SERVER_BALSA_VISITOR_INTERFACE_H_ + +#include <cstddef> + +namespace gfe2 { + +class BalsaFrame; +class BalsaHeaders; + +// By default the BalsaFrame instantiates a class derived from this interface +// which does absolutely nothing. If you'd prefer to have interesting +// functionality execute when any of the below functions are called by the +// BalsaFrame, then you should subclass it, and set an instantiation of your +// subclass as the current visitor for the BalsaFrame class using +// BalsaFrame::set_visitor(). +class BalsaVisitorInterface { + public: + virtual ~BalsaVisitorInterface() {} + + // Summary: + // This is how the BalsaFrame passes you the raw input which it knows to + // be a part of the body. To be clear, every byte of the Balsa which isn't + // part of the header (or it's framing), or trailers will be passed through + // this function. This includes data as well as chunking framing. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void ProcessBodyInput(const char *input, size_t size) = 0; + + // Summary: + // This is like ProcessBodyInput, but it will only include those parts of + // the body which would be stored by a program such as wget, i.e. the bytes + // indicating chunking (it will have been omitted). Trailers will not be + // passed in through this function-- they'll be passed in through + // ProcessTrailers. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void ProcessBodyData(const char *input, size_t size) = 0; + + // Summary: + // BalsaFrame passes the raw header data through this function. This is + // not cleaned up in any way. + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void ProcessHeaderInput(const char *input, size_t size) = 0; + + // Summary: + // BalsaFrame passes the raw trailer data through this function. This is + // not cleaned up in any way. Note that trailers only occur in a message + // if there was a chunked encoding, and not always then. + // + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void ProcessTrailerInput(const char *input, size_t size) = 0; + + // Summary: + // Since the BalsaFrame already has to parse the headers in order to + // determine proper framing, it might as well pass the parsed and + // cleaned-up results to whatever might need it. This function exists for + // that purpose-- parsed headers are passed into this function. + // Arguments: + // headers - contains the parsed headers in the order in which + // they occured in the header. + virtual void ProcessHeaders(const BalsaHeaders& headers) = 0; + + // Summary: + // Called when the first line of the message is parsed, in this case, for a + // request. + // Arguments: + // line_input - pointer to the beginning of the first line string. + // line_length - length of the first line string. (i.e. the numer of + // bytes it is safe to read from line_ptr) + // method_input - pointer to the beginning of the method string + // method_length - length of the method string (i.e. the number + // of bytes it is safe to read from method_input) + // request_uri_input - pointer to the beginning of the request uri + // string. + // request_uri_length - length of the method string (i.e. the number + // of bytes it is safe to read from method_input) + // version_input - pointer to the beginning of the version string. + // version_length - length of the version string (i.e. the number + // of bytes it i ssafe to read from version_input) + virtual void ProcessRequestFirstLine(const char* line_input, + size_t line_length, + const char* method_input, + size_t method_length, + const char* request_uri_input, + size_t request_uri_length, + const char* version_input, + size_t version_length) = 0; + + // Summary: + // Called when the first line of the message is parsed, in this case, for a + // response. + // Arguments: + // line_input - pointer to the beginning of the first line string. + // line_length - length of the first line string. (i.e. the numer of + // bytes it is safe to read from line_ptr) + // version_input - pointer to the beginning of the version string. + // version_length - length of the version string (i.e. the number + // of bytes it i ssafe to read from version_input) + // status_input - pointer to the beginning of the status string + // status_length - length of the status string (i.e. the number + // of bytes it is safe to read from status_input) + // reason_input - pointer to the beginning of the reason string + // reason_length - length of the reason string (i.e. the number + // of bytes it is safe to read from reason_input) + virtual void ProcessResponseFirstLine(const char *line_input, + size_t line_length, + const char *version_input, + size_t version_length, + const char *status_input, + size_t status_length, + const char *reason_input, + size_t reason_length) = 0; + + // Called when a chunk length is parsed. + // Arguments: + // chunk length - the length of the next incoming chunk. + virtual void ProcessChunkLength(size_t chunk_length) = 0; + + // Summary: + // BalsaFrame passes the raw chunk extension data through this function. + // The data is not cleaned up at all, use + // BalsaFrame::ProcessChunkExtentions to get the parsed and cleaned up + // chunk extensions. + // + // Arguments: + // input - contains the bytes available for read. + // size - contains the number of bytes it is safe to read from input. + virtual void ProcessChunkExtensions(const char* input, size_t size) = 0; + + // Summary: + // Called when the header is framed and processed. + virtual void HeaderDone() = 0; + + // Summary: + // Called when the message is framed and processed. + virtual void MessageDone() = 0; + + // Summary: + // Called when an error is detected while parsing the header. + // Arguments: + // framer - the framer in which an error occured. + virtual void HandleHeaderError(BalsaFrame* framer) = 0; + + // Summary: + // Called when something meriting a warning is detected while + // parsing the header. + // Arguments: + // framer - the framer in which an error occured. + virtual void HandleHeaderWarning(BalsaFrame* framer) = 0; + + // Summary: + // Called when an error is detected while parsing a chunk. + // Arguments: + // framer - the framer in which an error occured. + virtual void HandleChunkingError(BalsaFrame* framer) = 0; + + // Summary: + // Called when an error is detected while handling the entity-body. + // Currently, this can only be called when there is an error + // with the BytesSpliced() function, but in the future other interesting + // errors could occur. + // Arguments: + // framer - the framer in which an error occured. + virtual void HandleBodyError(BalsaFrame* framer) = 0; +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_BALSA_VISITOR_INTERFACE_H_ + diff --git a/net/tools/flip_server/buffer_interface.h b/net/tools/flip_server/buffer_interface.h new file mode 100644 index 0000000..7f38fa1 --- /dev/null +++ b/net/tools/flip_server/buffer_interface.h @@ -0,0 +1,121 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_BUFFER_INTERFACE_H__ +#define NET_TOOLS_FLIP_SERVER_BUFFER_INTERFACE_H__ + +namespace gfe2 { + +class BufferInterface { + public: + + // Returns the bytes which can be read from the buffer. There is no + // guarantee that the bytes are contiguous. + virtual int ReadableBytes() const = 0; + + // Summary: + // returns the size of this buffer + // Returns: + // size of this buffer. + virtual int BufferSize() const = 0; + + // Summary: + // returns the number of bytes free in this buffer. + // Returns: + // number of bytes free. + virtual int BytesFree() const = 0; + + // Summary: + // Returns true if empty. + // Returns: + // true - if empty + // false - otherwise + virtual bool Empty() const = 0; + + // Summary: + // Returns true if the buffer is full. + virtual bool Full() const = 0; + + // Summary: + // returns the number of characters written. + // appends up-to-'size' bytes to the buffer. + // Args: + // bytes - bytes which are read, and copied into the buffer. + // size - number of bytes which are read and copied. + // this number shall be >= 0. + virtual int Write(const char* bytes, int size) = 0; + + // Summary: + // Gets a pointer which can be written to (assigned to). + // this pointer (and size) can be used in functions like + // recv() or read(), etc. + // If *size is zero upon returning from this function, that it + // is unsafe to dereference *ptr. + // Args: + // ptr - assigned a pointer to which we can write + // size - the amount of data (in bytes) that it is safe to write to ptr. + virtual void GetWritablePtr(char **ptr, int* size) const = 0; + + // Summary: + // Gets a pointer which can be read from + // this pointer (and size) can be used in functions like + // send() or write(), etc. + // If *size is zero upon returning from this function, that it + // is unsafe to dereference *ptr. + // Args: + // ptr - assigned a pointer from which we may read + // size - the amount of data (in bytes) that it is safe to read + virtual void GetReadablePtr(char **ptr, int* size) const = 0; + + // Summary: + // Reads bytes out of the buffer, and writes them into 'bytes'. + // Returns the number of bytes read. + // Consumes bytes from the buffer (possibly, but not necessarily + // rendering them free) + // Args: + // bytes - the pointer into which bytes are read from this buffer + // and written into + // size - number of bytes which are read and copied. + // this number shall be >= 0. + // Returns: + // the number of bytes read from 'bytes' + virtual int Read(char* bytes, int size) = 0; + + // Summary: + // removes all data from the buffer + virtual void Clear() = 0; + + // Summary: + // reserves contiguous writable empty space in the buffer of size bytes. + // Returns true if the reservation is successful. + // If a derive class chooses not to implement reservation, its + // implementation should return false. + virtual bool Reserve(int size) = 0; + + // Summary: + // removes the oldest 'amount_to_consume' characters from this buffer, + // Args: + // amount_to_advance - .. this should be self-explanatory =) + // this number shall be >= 0. + virtual void AdvanceReadablePtr(int amount_to_advance) = 0; + + // Summary: + // Moves the internal pointers around such that the + // amount of data specified here is expected to + // already be resident (as if it was Written) + // Args: + // amount_to_advance - self explanatory. + // this number shall be >= 0. + virtual void AdvanceWritablePtr(int amount_to_advance) = 0; + + virtual ~BufferInterface() {} + + protected: + BufferInterface() {} +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_BUFFER_INTERFACE__H__ + diff --git a/net/tools/flip_server/create_listener.cc b/net/tools/flip_server/create_listener.cc new file mode 100644 index 0000000..1a1998d --- /dev/null +++ b/net/tools/flip_server/create_listener.cc @@ -0,0 +1,179 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <arpa/inet.h> // for inet_ntop +#include <errno.h> // for strerror +#include <netdb.h> // for getaddrinfo and getnameinfo +#include <netinet/in.h> // for IPPROTO_*, etc. +#include <stdlib.h> // for EXIT_FAILURE +#include <sys/socket.h> // for getaddrinfo and getnameinfo +#include <sys/types.h> // " +#include <unistd.h> // for exit() + +#include "net/tools/flip_server/create_listener.h" + +#include "base/logging.h" + +namespace gfe2 { + +// used to ensure we delete the addrinfo structure +// alloc'd by getaddrinfo +class AddrinfoGuard { + protected: + struct addrinfo * addrinfo_ptr_; + public: + + explicit AddrinfoGuard(struct addrinfo* addrinfo_ptr) : + addrinfo_ptr_(addrinfo_ptr) {} + + ~AddrinfoGuard() { + freeaddrinfo(addrinfo_ptr_); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// Summary: +// Closes a socket, with option to attempt it multiple times. +// Why do this? Well, if the system-call gets interrupted, close +// can fail with EINTR. In that case you should just retry.. Unfortunately, +// we can't be sure that errno is properly set since we're using a +// multithreaded approach in the filter proxy, so we should just retry. +// Args: +// fd - the socket to close +// tries - the number of tries to close the socket. +// Returns: +// true - if socket was closed +// false - if socket was NOT closed. +// Side-effects: +// sets *fd to -1 if socket was closed. +// +bool CloseSocket(int *fd, int tries) { + for (int i = 0; i < tries; ++i) { + if (!close(*fd)) { + *fd = -1; + return true; + } + } + return false; +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +// see header for documentation of this function. +void CreateListeningSocket(const string& host, + const string& port, + bool is_numeric_host_address, + int backlog, + int * listen_fd, + bool reuseaddr, + bool reuseport, + ostream* error_stream) { + // start out by assuming things will fail. + *listen_fd = -1; + + const char* node = NULL; + const char* service = NULL; + + if (!host.empty()) node = host.c_str(); + if (!port.empty()) service = port.c_str(); + + struct addrinfo *results = 0; + struct addrinfo hints; + memset(&hints, 0, sizeof(hints)); + + if (is_numeric_host_address) { + hints.ai_flags = AI_NUMERICHOST; // iff you know the name is numeric. + } + hints.ai_flags |= AI_PASSIVE; + + hints.ai_family = PF_INET; // we know it'll be IPv4, but if we didn't + // hints.ai_family = PF_UNSPEC; // know we'd use this. <--- + hints.ai_socktype = SOCK_STREAM; + + int err = 0; + if ((err=getaddrinfo(node, service, &hints, &results))) { + // gai_strerror -is- threadsafe, so we get to use it here. + *error_stream << "getaddrinfo " << " for (" << host << ":" << port + << ") " << gai_strerror(err) << "\n"; + return; + } + // this will delete the addrinfo memory when we return from this function. + AddrinfoGuard addrinfo_guard(results); + + int sock = socket(results->ai_family, + results->ai_socktype, + results->ai_protocol); + if (sock == -1) { + *error_stream << "Unable to create socket for (" << host << ":" + << port << "): " << strerror(errno) << "\n"; + return; + } + + if (reuseaddr) { + // set SO_REUSEADDR on the listening socket. + int on = 1; + int rc; + rc = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + reinterpret_cast<char *>(&on), sizeof(on)); + if (rc < 0) { + close(sock); + LOG(FATAL) << "setsockopt() failed fd=" << listen_fd << "\n"; + } + } +#ifndef SO_REUSEPORT +#define SO_REUSEPORT 15 +#endif + if (reuseport) { + // set SO_REUSEADDR on the listening socket. + int on = 1; + int rc; + rc = setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, + reinterpret_cast<char *>(&on), sizeof(on)); + if (rc < 0) { + close(sock); + LOG(FATAL) << "setsockopt() failed fd=" << listen_fd << "\n"; + } + } + + if (bind(sock, results->ai_addr, results->ai_addrlen)) { + *error_stream << "Bind was unsuccessful for (" << host << ":" + << port << "): " << strerror(errno) << "\n"; + // if we knew that we were not multithreaded, we could do the following: + // " : " << strerror(errno) << "\n"; + if (CloseSocket(&sock, 100)) { + return; + } else { + // couldn't even close the dang socket?! + *error_stream << "Unable to close the socket.. Considering this a fatal " + "error, and exiting\n"; + exit(EXIT_FAILURE); + } + } + + if (listen(sock, backlog)) { + // listen was unsuccessful. + *error_stream << "Listen was unsuccessful for (" << host << ":" + << port << "): " << strerror(errno) << "\n"; + // if we knew that we were not multithreaded, we could do the following: + // " : " << strerror(errno) << "\n"; + + if (CloseSocket(&sock, 100)) { + sock = -1; + return; + } else { + // couldn't even close the dang socket?! + *error_stream << "Unable to close the socket.. Considering this a fatal " + "error, and exiting\n"; + exit(EXIT_FAILURE); + } + } + // If we've gotten to here, Yeay! Success! + *listen_fd = sock; +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/create_listener.h b/net/tools/flip_server/create_listener.h new file mode 100644 index 0000000..3e6c9bb --- /dev/null +++ b/net/tools/flip_server/create_listener.h @@ -0,0 +1,45 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_CREATE_LISTENER_H__ +#define NET_TOOLS_FLIP_SERVER_CREATE_LISTENER_H__ + +#include <iostream> +#include <string> + +namespace gfe2 { + +// Summary: +// creates a socket for listening, and bind()s and listen()s it. +// Args: +// host - hostname or numeric address, or empty-string if you want +// to bind to listen on all addresses +// port - a port number or service name. By service name I mean a +// -real- service name, not a Google service name. I'd suggest +// you just stick to a numeric representation like "80" +// is_numeric_host_address - +// if you know that the host address has already been looked-up, +// and will be provided in numeric form like "130.207.244.244", +// then you can set this to true, and it will save you the time +// of a DNS lookup. +// backlog - passed into listen. This is the number of pending incoming +// connections a socket which is listening may have acquired before +// the OS starts rejecting new incoming connections. +// listen_fd - this will be assigned a positive value if the socket is +// successfully created, else it will be assigned -1. +// error_stream - in the case of errors, output describing the error will +// be written into error_stream. +void CreateListeningSocket(const string& host, + const string& port, + bool is_numeric_host_address, + int backlog, + int * listen_fd, + bool reuseaddr, + bool reuseport, + ostream* error_stream); + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_CREATE_LISTENER_H__ + diff --git a/net/tools/flip_server/create_listener_test.cc b/net/tools/flip_server/create_listener_test.cc new file mode 100644 index 0000000..760169f --- /dev/null +++ b/net/tools/flip_server/create_listener_test.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <assert.h> + +#include <vector> +#include <string> +#include <algorithm> +#include <map> +#include <iostream> + +#include "net/tools/flip_server/create_listener.h" + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +struct TestCase { + string host; + string port; + bool valid; + TestCase(const string& host, const string& port, bool valid) : + host(host), port(port), valid(valid) { + } +}; + +int main(int argc, char ** argv) { + vector<TestCase> tests; + tests.push_back(TestCase("" , "8090" , true )); + tests.push_back(TestCase("invalid" , "80" , false)); // bad host spec. + tests.push_back(TestCase("127.0.0.1", "invalid", false)); // bad port spec. + tests.push_back(TestCase("127.0.0.2", "80" , false)); // priviledged port. + tests.push_back(TestCase("127.0.0.2", "8080" , true )); + tests.push_back(TestCase("127.0.0.2", "8080" , false)); // already bound. + tests.push_back(TestCase("" , "" , false)); // bad port spec. + + // create sockets and bind on all indicated interface/port combinations. + for (unsigned int i = 0; i < tests.size(); ++i) { + cerr << "test " << i << "..."; + const TestCase& test = tests[i]; + + int socket = -2; + CreateListeningSocket(test.host, + test.port, + true, + 5, + &socket, + true, + &cerr); + assert(socket != -2); + if (test.valid) { + assert(socket != -1); + } + cerr << "...done\n"; + // it would be good to invoke a seperate process (perhaps "nc"?) to + // talk to this process in order to verify that the listen worked. + } + + return EXIT_SUCCESS; +} + diff --git a/net/tools/flip_server/epoll_server.cc b/net/tools/flip_server/epoll_server.cc new file mode 100644 index 0000000..2597ada --- /dev/null +++ b/net/tools/flip_server/epoll_server.cc @@ -0,0 +1,822 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/epoll_server.h" + +#include <stdlib.h> // for abort +#include <errno.h> // for errno and strerror_r +#include <algorithm> +#include <iostream> +#include <utility> +#include <vector> + +#include "base/logging.h" +#include "base/timer.h" + +// Design notes: An efficient implementation of ready list has the following +// desirable properties: +// +// A. O(1) insertion into/removal from the list in any location. +// B. Once the callback is found by hash lookup using the fd, the lookup of +// corresponding entry in the list is O(1). +// C. Safe insertion into/removal from the list during list iteration. (The +// ready list's purpose is to enable completely event driven I/O model. +// Thus, all the interesting bits happen in the callback. It is critical +// to not place any restriction on the API during list iteration. +// +// The current implementation achieves these goals with the following design: +// +// - The ready list is constructed as a doubly linked list to enable O(1) +// insertion/removal (see man 3 queue). +// - The forward and backward links are directly embedded inside the +// CBAndEventMask struct. This enables O(1) lookup in the list for a given +// callback. (Techincally, we could've used std::list of hash_set::iterator, +// and keep a list::iterator in CBAndEventMask to achieve the same effect. +// However, iterators have two problems: no way to portably invalidate them, +// and no way to tell whether an iterator is singular or not. The only way to +// overcome these issues is to keep bools in both places, but that throws off +// memory alignment (up to 7 wasted bytes for each bool). The extra level of +// indirection will also likely be less cache friendly. Direct manipulation +// of link pointers makes it easier to retrieve the CBAndEventMask from the +// list, easier to check whether an CBAndEventMask is in the list, uses less +// memory (save 32 bytes/fd), and does not affect cache usage (we need to +// read in the struct to use the callback anyway).) +// - Embed the fd directly into CBAndEventMask and switch to using hash_set. +// This removes the need to store hash_map::iterator in the list just so that +// we can get both the fd and the callback. +// - The ready list is "one shot": each entry is removed before OnEvent is +// called. This removes the mutation-while-iterating problem. +// - Use two lists to keep track of callbacks. The ready_list_ is the one used +// for registration. Before iteration, the ready_list_ is swapped into the +// tmp_list_. Once iteration is done, tmp_list_ will be empty, and +// ready_list_ will have all the new ready fds. + +// The size we use for buffers passed to strerror_r +static const int kErrorBufferSize = 256; + +namespace gfe2 { + +// Clears the pipe and returns. Used for waking the epoll server up. +class ReadPipeCallback : public EpollCallbackInterface { + public: + void OnEvent(int fd, EpollEvent* event) { + DCHECK(event->in_events == EPOLLIN); + int data; + int data_read = 1; + // Read until the pipe is empty. + while (data_read > 0) { + data_read = read(fd, &data, sizeof(data)); + } + } + void OnShutdown(EpollServer *eps, int fd) {} + void OnRegistration(EpollServer*, int, int) {} + void OnModification(int, int) {} // COV_NF_LINE + void OnUnregistration(int, bool) {} // COV_NF_LINE +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +EpollServer::EpollServer() + : epoll_fd_(epoll_create(1024)), + timeout_in_us_(0), + recorded_now_in_us_(0), + ready_list_size_(0), + wake_cb_(new ReadPipeCallback), + read_fd_(-1), + write_fd_(-1), + in_wait_for_events_and_execute_callbacks_(false), + in_shutdown_(false) { + // ensure that the epoll_fd_ is valid. + CHECK_NE(epoll_fd_, -1); + LIST_INIT(&ready_list_); + LIST_INIT(&tmp_list_); + + int pipe_fds[2]; + if (pipe(pipe_fds) < 0) { + // Unfortunately, it is impossible to test any such initialization in + // a constructor (as virtual methods do not yet work). + // This -could- be solved by moving initialization to an outside + // call... + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Error " << saved_errno + << " in pipe(): " << strerror_r(saved_errno, buf, sizeof(buf)); + } + read_fd_ = pipe_fds[0]; + write_fd_ = pipe_fds[1]; + RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN); +} + +void EpollServer::CleanupFDToCBMap() { + FDToCBMap::iterator cb_iter = cb_map_.begin(); + while (cb_iter != cb_map_.end()) { + int fd = cb_iter->fd; + CB* cb = cb_iter->cb; + + cb_iter->in_use = true; + if (cb) { + cb->OnShutdown(this, fd); + } + + cb_map_.erase(cb_iter); + cb_iter = cb_map_.begin(); + } +} + +void EpollServer::CleanupTimeToAlarmCBMap() { + TimeToAlarmCBMap::iterator erase_it; + + // Call OnShutdown() on alarms. Note that the structure of the loop + // is similar to the structure of loop in the function HandleAlarms() + for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); + i != alarm_map_.end(); + ) { + // Note that OnShutdown() can call UnregisterAlarm() on + // other iterators. OnShutdown() should not call UnregisterAlarm() + // on self because by definition the iterator is not valid any more. + i->second->OnShutdown(this); + erase_it = i; + ++i; + alarm_map_.erase(erase_it); + } +} + +EpollServer::~EpollServer() { + DCHECK_EQ(in_shutdown_, false); + in_shutdown_ = true; +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + LOG(INFO) << "\n" << event_recorder_; +#endif + VLOG(2) << "Shutting down epoll server "; + CleanupFDToCBMap(); + + LIST_INIT(&ready_list_); + LIST_INIT(&tmp_list_); + + CleanupTimeToAlarmCBMap(); + + close(read_fd_); + close(write_fd_); + close(epoll_fd_); +} + +// Whether a CBAandEventMask is on the ready list is determined by a non-NULL +// le_prev pointer (le_next being NULL indicates end of list). +inline void EpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) { + if (cb_and_mask->entry.le_prev == NULL) { + LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry); + ++ready_list_size_; + } +} + +inline void EpollServer::RemoveFromReadyList( + const CBAndEventMask& cb_and_mask) { + if (cb_and_mask.entry.le_prev != NULL) { + LIST_REMOVE(&cb_and_mask, entry); + // Clean up all the ready list states. Don't bother with the other fields + // as they are initialized when the CBAandEventMask is added to the ready + // list. This saves a few cycles in the inner loop. + cb_and_mask.entry.le_prev = NULL; + --ready_list_size_; + if (ready_list_size_ == 0) { + DCHECK(ready_list_.lh_first == NULL); + DCHECK(tmp_list_.lh_first == NULL); + } + } +} + +void EpollServer::RegisterFD(int fd, CB* cb, int event_mask) { + CHECK(cb); + VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask; + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i) { + // do we just abort, or do we just unregister the other guy? + // for now, lets just unregister the other guy. + + // unregister any callback that may already be registered for this FD. + CB* other_cb = fd_i->cb; + if (other_cb) { + // Must remove from the ready list before erasing. + RemoveFromReadyList(*fd_i); + other_cb->OnUnregistration(fd, true); + ModFD(fd, event_mask); + } else { + // already unregistered, so just recycle the node. + AddFD(fd, event_mask); + } + fd_i->cb = cb; + fd_i->event_mask = event_mask; + fd_i->events_to_fake = 0; + } else { + AddFD(fd, event_mask); + cb_map_.insert(CBAndEventMask(cb, event_mask, fd)); + } + + + // set the FD to be non-blocking. + SetNonblocking(fd); + + cb->OnRegistration(this, fd, event_mask); +} + +void EpollServer::SetNonblocking(int fd) { + int flags = GetFlags(fd); + if (flags == -1) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Error " << saved_errno + << " doing fcntl(" << fd << ", F_GETFL, 0): " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + if (!(flags & O_NONBLOCK)) { + int saved_flags = flags; + flags = SetFlags(fd, flags | O_NONBLOCK); + if (flags == -1) { + // bad. + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Error " << saved_errno + << " doing fcntl(" << fd << ", F_SETFL, " << saved_flags << "): " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + } +} + +void EpollServer::RegisterFDForWrite(int fd, CB* cb) { + RegisterFD(fd, cb, EPOLLOUT); +} + +void EpollServer::RegisterFDForReadWrite(int fd, CB* cb) { + RegisterFD(fd, cb, EPOLLIN | EPOLLOUT); +} + +void EpollServer::RegisterFDForRead(int fd, CB* cb) { + RegisterFD(fd, cb, EPOLLIN); +} + +void EpollServer::UnregisterFD(int fd) { + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() == fd_i || fd_i->cb == NULL) { + // Doesn't exist in server, or has gone through UnregisterFD once and still + // inside the callchain of OnEvent. + return; + } +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordUnregistration(fd); +#endif + CB* cb = fd_i->cb; + // Since the links are embedded within the struct, we must remove it from the + // list before erasing it from the hash_set. + RemoveFromReadyList(*fd_i); + DelFD(fd); + cb->OnUnregistration(fd, false); + // fd_i->cb is NULL if that fd is unregistered inside the callchain of + // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent + // returns in order to add it to the ready list, we cannot have UnregisterFD + // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a + // condition that tells the EpollServer that this entry is unused at a later + // point. + if (!fd_i->in_use) { + cb_map_.erase(fd_i); + } else { + // Remove all trace of the registration, and just keep the node alive long + // enough so the code that calls OnEvent doesn't have to worry about + // figuring out whether the CBAndEventMask is valid or not. + fd_i->cb = NULL; + fd_i->event_mask = 0; + fd_i->events_to_fake = 0; + } +} + +void EpollServer::ModifyCallback(int fd, int event_mask) { + ModifyFD(fd, ~0, event_mask); +} + +void EpollServer::StopRead(int fd) { + ModifyFD(fd, EPOLLIN, 0); +} + +void EpollServer::StartRead(int fd) { + ModifyFD(fd, 0, EPOLLIN); +} + +void EpollServer::StopWrite(int fd) { + ModifyFD(fd, EPOLLOUT, 0); +} + +void EpollServer::StartWrite(int fd) { + ModifyFD(fd, 0, EPOLLOUT); +} + +void EpollServer::HandleEvent(int fd, int event_mask) { +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordEpollEvent(fd, event_mask); +#endif + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (fd_i == cb_map_.end() || fd_i->cb == NULL) { + // Ignore the event. + // This could occur if epoll() returns a set of events, and + // while processing event A (earlier) we removed the callback + // for event B (and are now processing event B). + return; + } + fd_i->events_asserted = event_mask; + CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); + AddToReadyList(cb_and_mask); +} + +class TrueFalseGuard { + public: + explicit TrueFalseGuard(bool* guarded_bool) : guarded_bool_(guarded_bool) { + DCHECK(guarded_bool_ != NULL); + DCHECK(*guarded_bool_ == false); + *guarded_bool_ = true; + } + ~TrueFalseGuard() { + *guarded_bool_ = false; + } + private: + bool* guarded_bool_; +}; + +void EpollServer::WaitForEventsAndExecuteCallbacks() { + if (in_wait_for_events_and_execute_callbacks_) { + LOG(DFATAL) << + "Attempting to call WaitForEventsAndExecuteCallbacks" + " when an ancestor to the current function is already" + " WaitForEventsAndExecuteCallbacks!"; + // The line below is actually tested, but in coverage mode, + // we never see it. + return; // COV_NF_LINE + } + TrueFalseGuard recursion_guard(&in_wait_for_events_and_execute_callbacks_); + if (alarm_map_.empty()) { + // no alarms, this is business as usual. + WaitForEventsAndCallHandleEvents(timeout_in_us_, + events_, + events_size_); + recorded_now_in_us_ = 0; + return; + } + + // store the 'now'. If we recomputed 'now' every iteration + // down below, then we might never exit that loop-- any + // long-running alarms might install other long-running + // alarms, etc. By storing it here now, we ensure that + // a more reasonable amount of work is done here. + int64 now_in_us = NowInUsec(); + + // Get the first timeout from the alarm_map where it is + // stored in absolute time. + int64 next_alarm_time_in_us = alarm_map_.begin()->first; + VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us + << " now = " << now_in_us + << " timeout_in_us = " << timeout_in_us_; + + int64 wait_time_in_us; + int64 alarm_timeout_in_us = next_alarm_time_in_us - now_in_us; + + // If the next alarm is sooner than the default timeout, or if there is no + // timeout (timeout_in_us_ == -1), wake up when the alarm should fire. + // Otherwise use the default timeout. + if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) { + wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64>(0)); + } else { + wait_time_in_us = timeout_in_us_; + } + + VLOG(4) << "wait_time_in_us = " << wait_time_in_us; + + // wait for events. + + WaitForEventsAndCallHandleEvents(wait_time_in_us, + events_, + events_size_); + CallAndReregisterAlarmEvents(); + recorded_now_in_us_ = 0; +} + +void EpollServer::SetFDReady(int fd, int events_to_fake) { + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i && fd_i->cb != NULL) { + // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring + // entry mutable is insufficient because LIST_HEAD_INSERT assigns the + // forward pointer of the list head to the current cb_and_mask, and the + // compiler complains that it can't assign a const T* to a T*. + CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); + // Note that there is no clearly correct behavior here when + // cb_and_mask->events_to_fake != 0 and this function is called. + // Of the two operations: + // cb_and_mask->events_to_fake = events_to_fake + // cb_and_mask->events_to_fake |= events_to_fake + // the first was picked because it discourages users from calling + // SetFDReady repeatedly to build up the correct event set as it is more + // efficient to call SetFDReady once with the correct, final mask. + cb_and_mask->events_to_fake = events_to_fake; + AddToReadyList(cb_and_mask); + } +} + +void EpollServer::SetFDNotReady(int fd) { + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i) { + RemoveFromReadyList(*fd_i); + } +} + +bool EpollServer::IsFDReady(int fd) const { + FDToCBMap::const_iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + return (cb_map_.end() != fd_i && + fd_i->cb != NULL && + fd_i->entry.le_prev != NULL); +} + +void EpollServer::VerifyReadyList() const { + int count = 0; + CBAndEventMask* cur = ready_list_.lh_first; + for (; cur; cur = cur->entry.le_next) { + ++count; + } + for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) { + ++count; + } + CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count"; +} + +void EpollServer::RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac) { + CHECK(ac); + if (ContainsAlarm(ac)) { + LOG(FATAL) << "Alarm already exists " << ac; + } + VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us; + + TimeToAlarmCBMap::iterator alarm_iter = + alarm_map_.insert(make_pair(timeout_time_in_us, ac)); + + all_alarms_.insert(ac); + // Pass the iterator to the EpollAlarmCallbackInterface. + ac->OnRegistration(alarm_iter, this); +} + +// Unregister a specific alarm callback: iterator_token must be a +// valid iterator. The caller must ensure the validity of the iterator. +void EpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) { + AlarmCB* cb = iterator_token->second; + alarm_map_.erase(iterator_token); + all_alarms_.erase(cb); + cb->OnUnregistration(); +} + +int EpollServer::NumFDsRegistered() const { + DCHECK(cb_map_.size() >= 1); + // Omit the internal FD (read_fd_) + return cb_map_.size() - 1; +} + +void EpollServer::Wake() { + char data = 'd'; // 'd' is for data. It's good enough for me. + write(write_fd_, &data, 1); +} + +int64 EpollServer::NowInUsec() const { +#ifdef CHROMIUM + return Time::Now().ToInternalValue(); +#else + return WallTimer::NowInUsec(); +#endif +} + +string EpollServer::EventMaskToString(int event_mask) { + string s; + if (event_mask & EPOLLIN) s += "EPOLLIN "; + if (event_mask & EPOLLPRI) s += "EPOLLPRI "; + if (event_mask & EPOLLOUT) s += "EPOLLOUT "; + if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM "; + if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND "; + if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM "; + if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND "; + if (event_mask & EPOLLMSG) s += "EPOLLMSG "; + if (event_mask & EPOLLERR) s += "EPOLLERR "; + if (event_mask & EPOLLHUP) s += "EPOLLHUP "; + if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT "; + if (event_mask & EPOLLET) s += "EPOLLET "; + return s; +} + +void EpollServer::LogStateOnCrash() { + LOG(ERROR) << "----------------------Epoll Server---------------------------"; + LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_; + LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_; + + // Log sessions with alarms. + LOG(ERROR) << alarm_map_.size() << " alarms registered."; + for (TimeToAlarmCBMap::iterator it = alarm_map_.begin(); + it != alarm_map_.end(); + ++it) { + const bool skipped = + alarms_reregistered_and_should_be_skipped_.find(it->second) + != alarms_reregistered_and_should_be_skipped_.end(); + LOG(ERROR) << "Alarm " << it->second << " registered at time " << it->first + << " and should be skipped = " << skipped; + } + + LOG(ERROR) << cb_map_.size() << " fd callbacks registered."; + for (FDToCBMap::iterator it = cb_map_.begin(); + it != cb_map_.end(); + ++it) { + LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask + << " registered with cb: " << it->cb; + } + LOG(ERROR) << "----------------------/Epoll Server--------------------------"; +} + + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +void EpollServer::DelFD(int fd) const { + struct epoll_event ee; + memset(&ee, 0, sizeof(ee)); +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD"); +#endif + if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Epoll set removal error for fd " << fd << ": " + << strerror_r(saved_errno, buf, sizeof(buf)); + } +} + +//////////////////////////////////////// + +void EpollServer::AddFD(int fd, int event_mask) const { + struct epoll_event ee; + memset(&ee, 0, sizeof(ee)); + ee.events = event_mask | EPOLLERR | EPOLLHUP; + ee.data.fd = fd; +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD"); +#endif + if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": " + << strerror_r(saved_errno, buf, sizeof(buf)); + } +} + +//////////////////////////////////////// + +void EpollServer::ModFD(int fd, int event_mask) const { + struct epoll_event ee; + memset(&ee, 0, sizeof(ee)); + ee.events = event_mask | EPOLLERR | EPOLLHUP; + ee.data.fd = fd; +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD"); +#endif + VLOG(3) << "modifying fd= " << fd << " " + << EventMaskToString(ee.events); + if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Epoll set modification error for fd " << fd << ": " + << strerror_r(saved_errno, buf, sizeof(buf)); + } +} + +//////////////////////////////////////// + +void EpollServer::ModifyFD(int fd, int remove_event, int add_event) { + FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() == fd_i) { + VLOG(2) << "Didn't find the fd " << fd << "in internal structures"; + return; + } + + if (fd_i->cb != NULL) { + int & event_mask = fd_i->event_mask; + VLOG(3) << "fd= " << fd + << " event_mask before: " << EventMaskToString(event_mask); + event_mask &= ~remove_event; + event_mask |= add_event; + + VLOG(3) << " event_mask after: " << EventMaskToString(event_mask); + + ModFD(fd, event_mask); + + fd_i->cb->OnModification(fd, event_mask); + } +} + +void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us, + struct epoll_event events[], + int events_size) { + if (timeout_in_us == 0 || ready_list_.lh_first != NULL) { + // If ready list is not empty, then don't sleep at all. + timeout_in_us = 0; + } else if (timeout_in_us < 0) { + LOG(INFO) << "Negative epoll timeout: " << timeout_in_us + << "us; epoll will wait forever for events."; + // If timeout_in_us is < 0 we are supposed to Wait forever. This means we + // should set timeout_in_us to -1000 so we will + // Wait(-1000/1000) == Wait(-1) == Wait forever. + timeout_in_us = -1000; + } else { + // If timeout is specified, and the ready list is empty. + if (timeout_in_us < 1000) { + timeout_in_us = 1000; + } + } + const int timeout_in_ms = timeout_in_us / 1000; + int nfds = epoll_wait_impl(epoll_fd_, + events, + events_size, + timeout_in_ms); + VLOG(3) << "nfds=" << nfds; + if (nfds == events_size) { + LOG_EVERY_N(INFO, 1000) + << "nfds=" << nfds << " as we only handle " << events_size + << " events at a time, so events[] should be larger."; + } + +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds); +#endif + + // If you're wondering why the NowInUsec() is recorded here, the answer is + // simple: If we did it before the epoll_wait_impl, then the max error for + // the ApproximateNowInUs() call would be as large as the maximum length of + // epoll_wait, which can be arbitrarily long. Since this would make + // ApproximateNowInUs() worthless, we instead record the time -after- we've + // done epoll_wait, which guarantees that the maximum error is the amount of + // time it takes to process all the events generated by epoll_wait. + recorded_now_in_us_ = NowInUsec(); + if (nfds > 0) { + for (int i = 0; i < nfds; ++i) { + int event_mask = events[i].events; + int fd = events[i].data.fd; + HandleEvent(fd, event_mask); + } + } else if (nfds < 0) { + // Catch interrupted syscall and just ignore it and move on. + if (errno != EINTR && errno != 0) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + } + + // Now run through the ready list. + if (ready_list_.lh_first) { + CallReadyListCallbacks(); + } +} + +void EpollServer::CallReadyListCallbacks() { + // Check pre-conditions. + DCHECK(tmp_list_.lh_first == NULL); + // Swap out the ready_list_ into the tmp_list_ before traversing the list to + // enable SetFDReady() to just push new items into the ready_list_. + std::swap(ready_list_.lh_first, tmp_list_.lh_first); + if (tmp_list_.lh_first) { + tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first; + EpollEvent event(0, false); + while (tmp_list_.lh_first != NULL) { + DCHECK_GT(ready_list_size_, 0); + CBAndEventMask* cb_and_mask = tmp_list_.lh_first; + RemoveFromReadyList(*cb_and_mask); + + event.out_ready_mask = 0; + event.in_events = + cb_and_mask->events_asserted | cb_and_mask->events_to_fake; + // TODO(fenix): get rid of the two separate fields in cb_and_mask. + cb_and_mask->events_asserted = 0; + cb_and_mask->events_to_fake = 0; + { + // OnEvent() may call UnRegister, so we set in_use, here. Any + // UnRegister call will now simply set the cb to NULL instead of + // invalidating the cb_and_mask object (by deleting the object in the + // map to which cb_and_mask refers) + TrueFalseGuard in_use_guard(&(cb_and_mask->in_use)); + cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event); + } + + // Since OnEvent may have called UnregisterFD, we must check here that + // the callback is still valid. If it isn't, then UnregisterFD *was* + // called, and we should now get rid of the object. + if (cb_and_mask->cb == NULL) { + cb_map_.erase(*cb_and_mask); + } else if (event.out_ready_mask != 0) { + cb_and_mask->events_to_fake = event.out_ready_mask; + AddToReadyList(cb_and_mask); + } + } + } + DCHECK(tmp_list_.lh_first == NULL); +} + +const int EpollServer::kMinimumEffectiveAlarmQuantum = 1000; + +// Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late. +inline int64 EpollServer::DoRoundingOnNow(int64 now_in_us) const { + now_in_us /= kMinimumEffectiveAlarmQuantum; + now_in_us *= kMinimumEffectiveAlarmQuantum; + now_in_us += (2 * kMinimumEffectiveAlarmQuantum - 1); + return now_in_us; +} + +void EpollServer::CallAndReregisterAlarmEvents() { + int64 now_in_us = recorded_now_in_us_; + DCHECK_NE(0, recorded_now_in_us_); + now_in_us = DoRoundingOnNow(now_in_us); + + TimeToAlarmCBMap::iterator erase_it; + + // execute alarms. + for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); + i != alarm_map_.end(); + ) { + if (i->first > now_in_us) { + break; + } + AlarmCB* cb = i->second; + // Execute the OnAlarm() only if we did not register + // it in this loop itself. + const bool added_in_this_round = + alarms_reregistered_and_should_be_skipped_.find(cb) + != alarms_reregistered_and_should_be_skipped_.end(); + if (added_in_this_round) { + ++i; + continue; + } + all_alarms_.erase(cb); + const int64 new_timeout_time_in_us = cb->OnAlarm(); + + erase_it = i; + ++i; + alarm_map_.erase(erase_it); + + if (new_timeout_time_in_us > 0) { + // We add to hash_set only if the new timeout is <= now_in_us. + // if timeout is > now_in_us then we have no fear that this alarm + // can be reexecuted in this loop, and hence we do not need to + // worry about a recursive loop. + DVLOG(3) << "Reregistering alarm " + << " " << cb + << " " << new_timeout_time_in_us + << " " << now_in_us; + if (new_timeout_time_in_us <= now_in_us) { + alarms_reregistered_and_should_be_skipped_.insert(cb); + } + RegisterAlarm(new_timeout_time_in_us, cb); + } + } + alarms_reregistered_and_should_be_skipped_.clear(); +} + +EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) { +} + +EpollAlarm::~EpollAlarm() { + UnregisterIfRegistered(); +} + +int64 EpollAlarm::OnAlarm() { + registered_ = false; + return 0; +} + +void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken& token, + EpollServer* eps) { + DCHECK_EQ(false, registered_); + + token_ = token; + eps_ = eps; + registered_ = true; +} + +void EpollAlarm::OnUnregistration() { + registered_ = false; +} + +void EpollAlarm::OnShutdown(EpollServer* eps) { + registered_ = false; + eps_ = NULL; +} + +// If the alarm was registered, unregister it. +void EpollAlarm::UnregisterIfRegistered() { + if (!registered_) { + return; + } + eps_->UnregisterAlarm(token_); +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/epoll_server.h b/net/tools/flip_server/epoll_server.h new file mode 100644 index 0000000..5999aa4 --- /dev/null +++ b/net/tools/flip_server/epoll_server.h @@ -0,0 +1,1071 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ +#define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ + +#include <fcntl.h> +#include <sys/queue.h> +#include <hash_map> +#include <hash_set> +#include <map> +#include <string> +#include <utility> +#include <set> +#include <vector> + +// #define GFE_GFE2_EPOLL_SERVER_EVENT_TRACING 1 +// +// Defining GFE_GFE2_EPOLL_SERVER_EVENT_TRACING +// causes code to exist which didn't before. +// This code tracks each event generated by the epollserver, +// as well as providing a per-fd-registered summary of +// events. Note that enabling this code vastly slows +// down operations, and uses substantially more +// memory. For these reasons, it should only be enabled when doing +// developer debugging at his/her workstation. +// +// A structure called 'EventRecorder' will exist when +// the macro is defined. See the EventRecorder class interface +// within the EpollServer class for more details. +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING +#include <iostream> +#include "base/logging.h" +#endif + +#include "base/scoped_ptr.h" +#include "util/hash/hash.h" + +#ifdef CHROMIUM +#include <sys/epoll.h> +#else +#include "net/base/epollstubs.h" +#endif + +namespace gfe2 { + +class EpollServer; +class EpollAlarmCallbackInterface; +class ReadPipeCallback; +class WatchDogToken; + +struct EpollEvent { + EpollEvent(int events, bool is_epoll_wait) + : in_events(events), + out_ready_mask(0) { + } + + int in_events; // incoming events + int out_ready_mask; // the new event mask for ready list (0 means don't + // get on the ready list). This field is always + // initialized to 0 when the event is passed to + // OnEvent. +}; + +// Callbacks which go into EpollServers are expected to derive from this class. +class EpollCallbackInterface { + public: + // Summary: + // Called when the callback is registered into a EpollServer. + // Args: + // eps - the poll server into which this callback was registered + // fd - the file descriptor which was registered + // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) + // which was registered (and will initially be used + // in the epoll() calls) + virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0; + + // Summary: + // Called when the event_mask is modified (for a file-descriptor) + // Args: + // fd - the file descriptor which was registered + // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) + // which was is now curren (and will be used + // in subsequent epoll() calls) + virtual void OnModification(int fd, int event_mask) = 0; + + // Summary: + // Called whenever an event occurs on the file-descriptor. + // This is where the bulk of processing is expected to occur. + // Args: + // fd - the file descriptor which was registered + // event - a struct that contains the event mask (composed of EPOLLIN, + // EPOLLOUT, etc), a flag that indicates whether this is a true + // epoll_wait event vs one from the ready list, and an output + // parameter for OnEvent to inform the EpollServer whether to put + // this fd on the ready list. + virtual void OnEvent(int fd, EpollEvent* event) = 0; + + // Summary: + // Called when the file-descriptor is unregistered from the poll-server. + // Args: + // fd - the file descriptor which was registered, and of this call, is now + // unregistered. + // replaced - If true, this callback is being replaced by another, otherwise + // it is simply being removed. + virtual void OnUnregistration(int fd, bool replaced) = 0; + + // Summary: + // Called when the epoll server is shutting down. This is different from + // OnUnregistration because the subclass may want to clean up memory. + // This is called in leiu of OnUnregistration. + // Args: + // fd - the file descriptor which was registered. + virtual void OnShutdown(EpollServer* eps, int fd) = 0; + + virtual ~EpollCallbackInterface() {} + + protected: + EpollCallbackInterface() {} +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +class EpollServer { + public: + typedef EpollAlarmCallbackInterface AlarmCB; + typedef EpollCallbackInterface CB; + + typedef multimap<int64, AlarmCB*> TimeToAlarmCBMap; + typedef TimeToAlarmCBMap::iterator AlarmRegToken; + + // Summary: + // Constructor: + // By default, we don't wait any amount of time for events, and + // we suggest to the epoll-system that we're going to use on-the-order + // of 1024 FDs. + EpollServer(); + + //////////////////////////////////////// + + // Destructor + virtual ~EpollServer(); + + //////////////////////////////////////// + + // Summary + // Register a callback to be called whenever an event contained + // in the set of events included in event_mask occurs on the + // file-descriptor 'fd' + // + // Note that only one callback is allowed to be registered for + // any specific file-decriptor. + // + // If a callback is registered for a file-descriptor which has already + // been registered, then the previous callback is unregistered with + // the 'replaced' flag set to true. I.e. the previous callback's + // OnUnregistration() function is called like so: + // OnUnregistration(fd, true); + // + // The epoll server does NOT take on ownership of the callback: the callback + // creator is responsible for managing that memory. + // + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating + // the events for which the callback would like to be + // called. + virtual void RegisterFD(int fd, CB* cb, int event_mask); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for writing. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForWrite(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for reading or writing. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForReadWrite(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for reading. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForRead(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // Removes the FD and the associated callback from the pollserver. + // If the callback is registered with other FDs, they will continue + // to be processed using the callback without modification. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the file-descriptor which should no-longer be monitored. + virtual void UnregisterFD(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor, replacing + // the old event_mask with the new one specified here. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + // event_mask - the new event mask. + virtual void ModifyCallback(int fd, int event_mask); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // no longer request events when 'fd' is readable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StopRead(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // request events when 'fd' is readable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StartRead(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // no longer request events when 'fd' is writable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StopWrite(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // request events when 'fd' is writable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StartWrite(int fd); + + //////////////////////////////////////// + + // Summary: + // Looks up the callback associated with the file-desriptor 'fd'. + // If a callback is associated with this file-descriptor, then + // it's OnEvent() method is called with the file-descriptor 'fd', + // and event_mask 'event_mask' + // + // If no callback is registered for this file-descriptor, nothing + // will happen as a result of this call. + // + // This function is used internally by the EpollServer, but is + // available publically so that events might be 'faked'. Calling + // this function with an fd and event_mask is equivalent (as far + // as the callback is concerned) to having a real event generated + // by epoll (except, of course, that read(), etc won't necessarily + // be able to read anything) + // Args: + // fd - the file-descriptor on which an event has occured. + // event_mask - a bitmask representing the events which have occured + // on/for this fd. This bitmask is composed of + // POLLIN, POLLOUT, etc. + // + void HandleEvent(int fd, int event_mask); + + // Summary: + // Call this when you want the pollserver to + // wait for events and execute the callbacks associated with + // the file-descriptors on which those events have occured. + // Depending on the value of timeout_in_us_, this may or may + // not return immediately. Please reference the set_timeout() + // function for the specific behaviour. + virtual void WaitForEventsAndExecuteCallbacks(); + + // Summary: + // When an fd is registered to use edge trigger notification, the ready + // list can be used to simulate level trigger semantics. Edge trigger + // registration doesn't send an initial event, and only rising edge (going + // from blocked to unblocked) events are sent. A callback can put itself on + // the ready list by calling SetFDReady() after calling RegisterFD(). The + // OnEvent method of all callbacks associated with the fds on the ready + // list will be called immediately after processing the events returned by + // epoll_wait(). The fd is removed from the ready list before the + // callback's OnEvent() method is invoked. To stay on the ready list, the + // OnEvent() (or some function in that call chain) must call SetFDReady + // again. When a fd is unregistered using UnregisterFD(), the fd is + // automatically removed from the ready list. + // + // When the callback for a edge triggered fd hits the falling edge (about + // to block, either because of it got an EAGAIN, or had a short read/write + // operation), it should remove itself from the ready list using + // SetFDNotReady() (since OnEvent cannot distinguish between invocation + // from the ready list vs from a normal epoll event). All four ready list + // methods are safe to be called within the context of the callbacks. + // + // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds + // that are registered with the EpollServer will be put on the ready list. + // SetFDReady() and SetFDNotReady() will do nothing if the EpollServer + // doesn't know about the fd passed in. + // + // Since the ready list cannot reliably determine proper set of events + // which should be sent to the callback, SetFDReady() requests the caller + // to provide the ready list with the event mask, which will be used later + // when OnEvent() is invoked by the ready list. Hence, the event_mask + // passedto SetFDReady() does not affect the actual epoll registration of + // the fd with the kernel. If a fd is already put on the ready list, and + // SetFDReady() is called again for that fd with a different event_mask, + // the event_mask will be updated. + virtual void SetFDReady(int fd, int events_to_fake); + + virtual void SetFDNotReady(int fd); + + // Summary: + // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as + // debugging tools and for writing unit tests. + // ISFDReady() returns whether a fd is in the ready list. + // ReadyListSize() returns the number of fds on the ready list. + // VerifyReadyList() checks the consistency of internal data structure. It + // will CHECK if it finds an error. + virtual bool IsFDReady(int fd) const; + + size_t ReadyListSize() const { return ready_list_size_; } + + void VerifyReadyList() const; + + //////////////////////////////////////// + + // Summary: + // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'. + // If the callback returns a positive number from its OnAlarm() function, + // then the callback will be re-registered at that time, else the alarm + // owner is responsible for freeing up memory. + // + // Important: A give AlarmCB* can not be registered again if it is already + // registered. If a user wants to register a callback again it should first + // unregister the previous callback before calling RegisterAlarm again. + // Args: + // timeout_time_in_us - the absolute time at which the alarm should go off + // ac - the alarm which will be called. + virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac); + + // Summary: + // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() + + // delta_in_us). While this is somewhat less accurate (see the description + // for ApproximateNowInUs() to see how 'approximate'), the error is never + // worse than the amount of time it takes to process all events in one + // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a + // positive number from its OnAlarm() function, then the callback will be + // re-registered at that time, else the alarm owner is responsible for + // freeing up memory. + // Note that this function is purely a convienence. The + // same thing may be accomplished by using RegisterAlarm with + // ApproximateNowInUs() directly. + // + // Important: A give AlarmCB* can not be registered again if it is already + // registered. If a user wants to register a callback again it should first + // unregister the previous callback before calling RegisterAlarm again. + // Args: + // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at + // which point the alarm should go off. + // ac - the alarm which will be called. + void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) { + RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac); + } + + //////////////////////////////////////// + + // Summary: + // Unregister the alarm referred to by iterator_token; Callers should + // be warned that a token may have become already invalid when OnAlarm() + // is called, was unregistered, or OnShutdown was called on that alarm. + // Args: + // iterator_token - iterator to the alarm callback to unregister. + virtual void UnregisterAlarm( + const EpollServer::AlarmRegToken& iterator_token); + + //////////////////////////////////////// + + // Summary: + // returns the number of file-descriptors registered in this EpollServer. + // Returns: + // number of FDs registered (discounting the internal pipe used for Wake) + virtual int NumFDsRegistered() const; + + // Summary: + // Force the epoll server to wake up (by writing to an internal pipe). + virtual void Wake(); + + // Summary: + // Wrapper around WallTimer's NowInUsec. We do this so that we can test + // EpollServer without using the system clock (and can avoid the flakiness + // that would ensue) + // Returns: + // the current time as number of microseconds since the Unix epoch. + virtual int64 NowInUsec() const; + + // Summary: + // Since calling NowInUsec() many thousands of times per + // WaitForEventsAndExecuteCallbacks function call is, to say the least, + // inefficient, we allow users to use an approximate time instead. The + // time returned from this function is as accurate as NowInUsec() when + // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's + // callstack. + // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then + // this function returns the time at which the + // WaitForEventsAndExecuteCallbacks function started to process events or + // alarms. + // + // Essentially, this function makes available a fast and mostly accurate + // mechanism for getting the time for any function handling an event or + // alarm. When functions which are not handling callbacks or alarms call + // this function, they get the slow and "absolutely" accurate time. + // + // Users should be encouraged to use this function. + // Returns: + // the "approximate" current time as number of microseconds since the Unix + // epoch. + virtual int64 ApproximateNowInUsec() const { + if (recorded_now_in_us_ != 0) { + return recorded_now_in_us_; + } + return this->NowInUsec(); + } + + static string EventMaskToString(int event_mask); + + // Summary: + // Logs the state of the epoll server with LOG(ERROR). + void LogStateOnCrash(); + + // Summary: + // Set the timeout to the value specified. + // If the timeout is set to a negative number, + // WaitForEventsAndExecuteCallbacks() will only return when an event has + // occured + // If the timeout is set to zero, + // WaitForEventsAndExecuteCallbacks() will return immediately + // If the timeout is set to a positive number, + // WaitForEventsAndExecuteCallbacks() will return when an event has + // occured, or when timeout_in_us microseconds has elapsed, whichever + // is first. + // Args: + // timeout_in_us - value specified depending on behaviour desired. + // See above. + void set_timeout_in_us(int64 timeout_in_us) { + timeout_in_us_ = timeout_in_us; + } + + //////////////////////////////////////// + + // Summary: + // Accessor for the current value of timeout_in_us. + int timeout_in_us() const { return timeout_in_us_; } + + // Summary: + // Returns true when the EpollServer() is being destroyed. + bool in_shutdown() const { return in_shutdown_; } + + bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const { + return all_alarms_.find(alarm) != all_alarms_.end(); + } + + // Summary: + // A function for implementing the ready list. It invokes OnEvent for each + // of the fd in the ready list, and takes care of adding them back to the + // ready list if the callback requests it (by checking that out_ready_mask + // is non-zero). + void CallReadyListCallbacks(); + + // Granularity at which time moves when considering what alarms are on. + // See function: DoRoundingOnNow() on exact usage. + static const int kMinimumEffectiveAlarmQuantum; + protected: + + // These have to be in the .h file so that we can override them in tests. + virtual inline int GetFlags(int fd) { return fcntl(fd, F_GETFL, 0); } + inline int SetFlags(int fd, int flags) { + return fcntl(fd, F_SETFL, flags | O_NONBLOCK); + } + + virtual void SetNonblocking (int fd); + + // This exists here so that we can override this function in unittests + // in order to make effective mock EpollServer objects. + virtual int epoll_wait_impl(int epfd, + struct epoll_event* events, + int max_events, + int timeout_in_ms) { + return epoll_wait(epfd, events, max_events, timeout_in_ms); + } + + // this struct is used internally, and is never used by anything external + // to this class. Some of its members are declared mutable to get around the + // restriction imposed by hash_set. Since hash_set knows nothing about the + // objects it stores, it has to assume that every bit of the object is used + // in the hash function and equal_to comparison. Thus hash_set::iterator is a + // const iterator. In this case, the only thing that must stay constant is + // fd. Everything else are just along for the ride and changing them doesn't + // compromise the hash_set integrity. + struct CBAndEventMask { + CBAndEventMask() + : cb(NULL), + fd(-1), + event_mask(0), + events_asserted(0), + events_to_fake(0), + in_use(false) { + entry.le_next = NULL; + entry.le_prev = NULL; + } + + CBAndEventMask(EpollCallbackInterface* cb, + int event_mask, + int fd) + : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0), + events_to_fake(0), in_use(false) { + entry.le_next = NULL; + entry.le_prev = NULL; + } + + // Required operator for hash_set. Normally operator== should be a free + // standing function. However, since CBAndEventMask is a protected type and + // it will never be a base class, it makes no difference. + bool operator==(const CBAndEventMask& cb_and_mask) const { + return fd == cb_and_mask.fd; + } + // A callback. If the fd is unregistered inside the callchain of OnEvent, + // the cb will be set to NULL. + mutable EpollCallbackInterface* cb; + + mutable LIST_ENTRY(CBAndEventMask) entry; + // file descriptor registered with the epoll server. + int fd; + // the current event_mask registered for this callback. + mutable int event_mask; + // the event_mask that was returned by epoll + mutable int events_asserted; + // the event_mask for the ready list to use to call OnEvent. + mutable int events_to_fake; + // toggle around calls to OnEvent to tell UnregisterFD to not erase the + // iterator because HandleEvent is using it. + mutable bool in_use; + }; + + // Custom hash function to be used by hash_set. + struct CBAndEventMaskHash { + size_t operator()(const CBAndEventMask& cb_and_eventmask) const { + return hash<int>()(cb_and_eventmask.fd); + } + }; + + typedef hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap; + + // the following four functions are OS-specific, and are likely + // to be changed in a subclass if the poll/select method is changed + // from epoll. + + // Summary: + // Deletes a file-descriptor from the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call-- it does not modify any data within the + // epoll_server. + // Args: + // fd - the file descriptor to-be-removed from the monitoring set + virtual void DelFD(int fd) const; + + //////////////////////////////////////// + + // Summary: + // Adds a file-descriptor to the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call. + // Args: + // fd - the file descriptor to-be-added to the monitoring set + // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc + // OR'd together) which will be associated with this + // FD initially. + virtual void AddFD(int fd, int event_mask) const; + + //////////////////////////////////////// + + // Summary: + // Modifies a file-descriptor in the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call. + // Args: + // fd - the file descriptor to-be-added to the monitoring set + // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc + // OR'd together) which will be associated with this + // FD after this call. + virtual void ModFD(int fd, int event_mask) const; + + //////////////////////////////////////// + + // Summary: + // Modified the event mask associated with an FD in the set of + // data needed by epoll. + // Events are removed before they are added, thus, if ~0 is put + // in 'remove_event', whatever is put in 'add_event' will be + // the new event mask. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the file descriptor whose event mask is to be modified + // remove_event - the events which are to be removed from the current + // event_mask + // add_event - the events which are to be added to the current event_mask + // + // + virtual void ModifyFD(int fd, int remove_event, int add_event); + + //////////////////////////////////////// + + // Summary: + // Waits for events, and calls HandleEvents() for each + // fd, event pair discovered to possibly have an event. + // Note that a callback (B) may get a spurious event if + // another callback (A) has closed a file-descriptor N, and + // the callback (B) has a newly opened file-descriptor, which + // also happens to be N. + virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us, + struct epoll_event events[], + int events_size); + + + + // Summary: + // An internal function for implementing the ready list. It adds a fd's + // CBAndEventMask to the ready list. If the fd is already on the ready + // list, it is a no-op. + void AddToReadyList(CBAndEventMask* cb_and_mask); + + // Summary: + // An internal function for implementing the ready list. It remove a fd's + // CBAndEventMask from the ready list. If the fd is not on the ready list, + // it is a no-op. + void RemoveFromReadyList(const CBAndEventMask& cb_and_mask); + + // Summary: + // Calls any pending alarms that should go off and reregisters them if they + // were recurring. + virtual void CallAndReregisterAlarmEvents(); + + // The file-descriptor created for epolling + int epoll_fd_; + + // The mapping of file-descriptor to CBAndEventMasks + FDToCBMap cb_map_; + + // TOOD(sushantj): Having this hash_set is avoidable. We currently have it + // only so that we can enforce stringent checks that a caller can not register + // the same alarm twice. One option is to have an implementation in which + // this hash_set is used only in the debug mode. + hash_set<AlarmCB*> all_alarms_; + + TimeToAlarmCBMap alarm_map_; + + // The amount of time in microseconds that we'll wait before returning + // from the WaitForEventsAndExecuteCallbacks() function. + // If this is positive, wait that many microseconds. + // If this is negative, wait forever, or for the first event that occurs + // If this is zero, never wait for an event. + int64 timeout_in_us_; + + // This is nonzero only after the invocation of epoll_wait_impl within + // WaitForEventsAndCallHandleEvents and before the function + // WaitForEventsAndExecuteCallbacks returns. At all other times, this is + // zero. This enables us to have relatively accurate time returned from the + // ApproximateNowInUs() function. See that function for more details. + int64 recorded_now_in_us_; + + // This is used to implement CallAndReregisterAlarmEvents. This stores + // all alarms that were reregistered because OnAlarm() returned a + // value > 0 and the time at which they should be executed is less that + // the current time. By storing such alarms in this map we ensure + // that while calling CallAndReregisterAlarmEvents we do not call + // OnAlarm on any alarm in this set. This ensures that we do not + // go in an infinite loop. + hash_set<AlarmCB*> alarms_reregistered_and_should_be_skipped_; + + LIST_HEAD(ReadyList, CBAndEventMask) ready_list_; + LIST_HEAD(TmpList, CBAndEventMask) tmp_list_; + int ready_list_size_; + // TODO(alyssar): make this into something that scales up. + static const int events_size_ = 256; + struct epoll_event events_[256]; + + // These controls the granularity for alarms + // See function CallAndReregisterAlarmEvents() + // TODO(sushantj): Add test for this. + int64 DoRoundingOnNow(int64 now_in_us) const; + +#ifdef GFE_GFE2_EPOLL_SERVER_EVENT_TRACING + struct EventRecorder { + public: + EventRecorder() : num_records_(0), record_threshold_(10000) {} + + ~EventRecorder() { + Clear(); + } + + // When a number of events equals the record threshold, + // the collected data summary for all FDs will be written + // to LOG(INFO). Note that this does not include the + // individual events (if you'reinterested in those, you'll + // have to get at them programmatically). + // After any such flushing to LOG(INFO) all events will + // be cleared. + // Note that the definition of an 'event' is a bit 'hazy', + // as it includes the 'Unregistration' event, and perhaps + // others. + void set_record_threshold(int64 new_threshold) { + record_threshold_ = new_threshold; + } + + void Clear() { + for (int i = 0; i < debug_events_.size(); ++i) { + delete debug_events_[i]; + } + debug_events_.clear(); + unregistered_fds_.clear(); + event_counts_.clear(); + } + + void MaybeRecordAndClear() { + ++num_records_; + if ((num_records_ > record_threshold_) && + (record_threshold_ > 0)) { + LOG(INFO) << "\n" << *this; + num_records_ = 0; + Clear(); + } + } + + void RecordFDMaskEvent(int fd, int mask, const char* function) { + FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function); + debug_events_.push_back(fdmo); + MaybeRecordAndClear(); + } + + void RecordEpollWaitEvent(int timeout_in_ms, + int num_events_generated) { + EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms, + num_events_generated); + debug_events_.push_back(ewo); + MaybeRecordAndClear(); + } + + void RecordEpollEvent(int fd, int event_mask) { + Events& events_for_fd = event_counts_[fd]; + events_for_fd.AssignFromMask(event_mask); + MaybeRecordAndClear(); + } + + friend ostream& operator<<(ostream& os, const EventRecorder& er) { + for (int i = 0; i < er.unregistered_fds_.size(); ++i) { + os << "fd: " << er.unregistered_fds_[i] << "\n"; + os << er.unregistered_fds_[i]; + } + for (hash_map<int, Events>::const_iterator i = er.event_counts_.begin(); + i != er.event_counts_.end(); + ++i) { + os << "fd: " << i->first << "\n"; + os << i->second; + } + for (int i = 0; i < er.debug_events_.size(); ++i) { + os << *(er.debug_events_[i]) << "\n"; + } + return os; + } + + void RecordUnregistration(int fd) { + hash_map<int, Events>::iterator i = event_counts_.find(fd); + if (i != event_counts_.end()) { + unregistered_fds_.push_back(i->second); + event_counts_.erase(i); + } + MaybeRecordAndClear(); + } + + protected: + class DebugOutput { + public: + friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) { + debug_output.OutputToStream(os); + return os; + } + virtual void OutputToStream(ostream* os) const = 0; + virtual ~DebugOutput() {} + }; + + class FDMaskOutput : public DebugOutput { + public: + FDMaskOutput(int fd, int mask, const char* function) : + fd_(fd), mask_(mask), function_(function) {} + virtual void OutputToStream(ostream* os) const { + (*os) << "func: " << function_ + << "\tfd: " << fd_; + if (mask_ != 0) { + (*os) << "\tmask: " << EventMaskToString(mask_); + } + } + int fd_; + int mask_; + const char* function_; + }; + + class EpollWaitOutput : public DebugOutput { + public: + EpollWaitOutput(int timeout_in_ms, + int num_events_generated) : + timeout_in_ms_(timeout_in_ms), + num_events_generated_(num_events_generated) {} + virtual void OutputToStream(ostream* os) const { + (*os) << "timeout_in_ms: " << timeout_in_ms_ + << "\tnum_events_generated: " << num_events_generated_; + } + protected: + int timeout_in_ms_; + int num_events_generated_; + }; + + struct Events { + Events() : + epoll_in(0), + epoll_pri(0), + epoll_out(0), + epoll_rdnorm(0), + epoll_rdband(0), + epoll_wrnorm(0), + epoll_wrband(0), + epoll_msg(0), + epoll_err(0), + epoll_hup(0), + epoll_oneshot(0), + epoll_et(0) {} + + void AssignFromMask(int event_mask) { + if (event_mask & EPOLLIN) ++epoll_in; + if (event_mask & EPOLLPRI) ++epoll_pri; + if (event_mask & EPOLLOUT) ++epoll_out; + if (event_mask & EPOLLRDNORM) ++epoll_rdnorm; + if (event_mask & EPOLLRDBAND) ++epoll_rdband; + if (event_mask & EPOLLWRNORM) ++epoll_wrnorm; + if (event_mask & EPOLLWRBAND) ++epoll_wrband; + if (event_mask & EPOLLMSG) ++epoll_msg; + if (event_mask & EPOLLERR) ++epoll_err; + if (event_mask & EPOLLHUP) ++epoll_hup; + if (event_mask & EPOLLONESHOT) ++epoll_oneshot; + if (event_mask & EPOLLET) ++epoll_et; + }; + + friend ostream& operator<<(ostream& os, const Events& ev) { + if (ev.epoll_in) { + os << "\t EPOLLIN: " << ev.epoll_in << "\n"; + } + if (ev.epoll_pri) { + os << "\t EPOLLPRI: " << ev.epoll_pri << "\n"; + } + if (ev.epoll_out) { + os << "\t EPOLLOUT: " << ev.epoll_out << "\n"; + } + if (ev.epoll_rdnorm) { + os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n"; + } + if (ev.epoll_rdband) { + os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n"; + } + if (ev.epoll_wrnorm) { + os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n"; + } + if (ev.epoll_wrband) { + os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n"; + } + if (ev.epoll_msg) { + os << "\t EPOLLMSG: " << ev.epoll_msg << "\n"; + } + if (ev.epoll_err) { + os << "\t EPOLLERR: " << ev.epoll_err << "\n"; + } + if (ev.epoll_hup) { + os << "\t EPOLLHUP: " << ev.epoll_hup << "\n"; + } + if (ev.epoll_oneshot) { + os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n"; + } + if (ev.epoll_et) { + os << "\t EPOLLET: " << ev.epoll_et << "\n"; + } + return os; + } + + unsigned int epoll_in; + unsigned int epoll_pri; + unsigned int epoll_out; + unsigned int epoll_rdnorm; + unsigned int epoll_rdband; + unsigned int epoll_wrnorm; + unsigned int epoll_wrband; + unsigned int epoll_msg; + unsigned int epoll_err; + unsigned int epoll_hup; + unsigned int epoll_oneshot; + unsigned int epoll_et; + }; + + vector<DebugOutput*> debug_events_; + vector<Events> unregistered_fds_; + hash_map<int, Events> event_counts_; + int64 num_records_; + int64 record_threshold_; + }; + + void ClearEventRecords() { + event_recorder_.Clear(); + } + void WriteEventRecords(ostream* os) const { + (*os) << event_recorder_; + } + + mutable EventRecorder event_recorder_; + +#endif + + private: + // Helper functions used in the destructor. + void CleanupFDToCBMap(); + void CleanupTimeToAlarmCBMap(); + + // The callback registered to the fds below. As the purpose of their + // registration is to wake the epoll server it just clears the pipe and + // returns. + scoped_ptr<ReadPipeCallback> wake_cb_; + + // A pipe owned by the epoll server. The server will be registered to listen + // on read_fd_ and can be woken by Wake() which writes to write_fd_. + int read_fd_; + int write_fd_; + + // This boolean is checked to see if it is false at the top of the + // WaitForEventsAndExecuteCallbacks function. If not, then it either returns + // without doing work, and logs to ERROR, or aborts the program (in + // DEBUG mode). If so, then it sets the bool to true, does work, and + // sets it back to false when done. This catches unwanted recursion. + bool in_wait_for_events_and_execute_callbacks_; + + // Returns true when the EpollServer() is being destroyed. + bool in_shutdown_; + + DISALLOW_COPY_AND_ASSIGN(EpollServer); +}; + +class EpollAlarmCallbackInterface { + public: + // Summary: + // Called when an alarm times out. Invalidates an AlarmRegToken. + // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must + // delete it, as the reference is no longer valid. + // Returns: + // the unix time (in microseconds) at which this alarm should be signaled + // again, or 0 if the alarm should be removed. + virtual int64 OnAlarm() = 0; + + // Summary: + // Called when the an alarm is registered. Invalidates an AlarmRegToken. + // Args: + // token: the iterator to the the alarm registered in the alarm map. + // WARNING: this token becomes invalid when the alarm fires, is + // unregistered, or OnShutdown is called on that alarm. + // eps: the epoll server the alarm is registered with. + virtual void OnRegistration(const EpollServer::AlarmRegToken& token, + EpollServer* eps) = 0; + + // Summary: + // Called when the an alarm is unregistered. + // WARNING: It is not valid to unregister a callback and then use the token + // that was saved to refer to the callback. + virtual void OnUnregistration() = 0; + + // Summary: + // Called when the epoll server is shutting down. + // Invalidates the AlarmRegToken that was given when this alarm was + // registered. + virtual void OnShutdown(EpollServer* eps) = 0; + + virtual ~EpollAlarmCallbackInterface() {} + + protected: + EpollAlarmCallbackInterface() {} +}; + +// A simple alarm which unregisters itself on destruction. +// +// PLEASE NOTE: +// Any classes overriding these functions must either call the implementation +// of the parent class, or is must otherwise make sure that the 'registered_' +// boolean and the token, 'token_', are updated appropriately. +class EpollAlarm : public EpollAlarmCallbackInterface { + public: + EpollAlarm(); + + virtual ~EpollAlarm(); + + // Marks the alarm as unregistered and returns 0. The return value may be + // safely ignored by subclasses. + virtual int64 OnAlarm(); + + // Marks the alarm as registered, and stores the token. + virtual void OnRegistration(const EpollServer::AlarmRegToken& token, + EpollServer* eps); + + // Marks the alarm as unregistered. + virtual void OnUnregistration(); + + // Marks the alarm as unregistered. + virtual void OnShutdown(EpollServer* eps); + + // If the alarm was registered, unregister it. + void UnregisterIfRegistered(); + + bool registered() const { return registered_; } + + const EpollServer* eps() const { return eps_; } + + private: + EpollServer::AlarmRegToken token_; + EpollServer* eps_; + bool registered_; +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ + diff --git a/net/tools/flip_server/flip_in_mem_edsm_server.cc b/net/tools/flip_server/flip_in_mem_edsm_server.cc new file mode 100644 index 0000000..7eab376 --- /dev/null +++ b/net/tools/flip_server/flip_in_mem_edsm_server.cc @@ -0,0 +1,2248 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <dirent.h> +#include <linux/tcp.h> // For TCP_NODELAY +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include <deque> +#include <iostream> +#include <limits> +#include <vector> +#include <list> + +#include "base/google.h" +#include "base/logging.h" + +// Used to get time. Can be replaced. +#include "base/timer.h" + +// All "open-sourcable" +#include "net/flip/flip_frame_builder.h" +#include "net/flip/flip_framer.h" +#include "net/flip/flip_protocol.h" +#include "net/tools/flip_server/balsa_enums.h" +#include "net/tools/flip_server/balsa_frame.h" +#include "net/tools/flip_server/balsa_headers.h" +#include "net/tools/flip_server/balsa_visitor_interface.h" +#include "net/tools/flip_server/buffer_interface.h" +#include "net/tools/flip_server/create_listener.h" +#include "net/tools/flip_server/epoll_server.h" +#include "net/tools/flip_server/loadtime_measurement.h" +#include "net/tools/flip_server/ring_buffer.h" +#include "net/tools/flip_server/simple_buffer.h" +#include "net/tools/flip_server/url_to_filename_encoder.h" +#include "net/tools/flip_server/url_utilities.h" +#include "strings/split.h" +#include "thread/thread.h" +#include "third_party/openssl/ssl.h" + +//////////////////////////////////////////////////////////////////////////////// + +DEFINE_bool(use_ssl, true, + "If set to true, then the server will act as an SSL server for both" + " HTTP and FLIP"); +DEFINE_string(ssl_cert_name, "cert.pem", "The name of the cert .pem file"); +DEFINE_string(ssl_key_name, "key.pem", "The name of the key .pem file"); +DEFINE_int32(response_count_until_close, 1000 * 1000, + "The number of responses given before the server closes the" + " connection"); +DEFINE_bool(no_nagle, true, "If true, then disables the nagle algorithm"); +DEFINE_int32(accepts_per_wake, 0, + "The number of times that accept() will be called when the " + " alarm goes off when the accept_using_alarm flag is set to true." + " If set to 0, accept() will be performed until the accept queue" + " is completely drained and the accept() call returns an error"); +DEFINE_int32(flip_port, 10040, "The port on which the flip server listens"); +DEFINE_int32(port, 16002, "The port on which the http server listens"); +DEFINE_int32(accept_backlog_size, 1024, + "The size of the TCP accept backlog"); +DEFINE_string(cache_base_dir, ".", "The directory where cache locates"); +DEFINE_bool(need_to_encode_url, true, "If true, then encode url to filename"); +DEFINE_bool(reuseport, false, + "If set to false a single socket will be used. If set to true" + " then a new socket will be created for each accept thread." + " Note that this only works with kernels that support" + " SO_REUSEPORT"); + +DEFINE_double(server_think_time_in_s, 0, + " The amount of time the server delays before sending back the" + "reply"); +DEFINE_bool(use_xsub, false, + "Does the server send X-Subresource headers"); +DEFINE_bool(use_xac, false, + "Does the server send X-Associated-Content headers"); +DEFINE_bool(use_cwnd_opener, false, + "Does the server advance cwnd by sending no-op packets"); +DEFINE_bool(use_compression, false, + "Does the server compress data frames"); + +DEFINE_string(urls_file, "experimental/users/fenix/flip/urls.txt", + "The path to the urls file which includes the urls for testing"); +DEFINE_string(pageload_html_file, + "experimental/users/fenix/flip/loadtime_measurement.html", + "The path to the html that does the pageload in iframe"); +DEFINE_bool(record_mode, false, + "If set to true, record requests in file named as fd used"); +DEFINE_string(record_path, ".", "The path to save the record files"); + + +//////////////////////////////////////////////////////////////////////////////// + +using gfe_flip::CONTROL_FLAG_NONE; +using gfe_flip::DATA_FLAG_COMPRESSED; +using gfe_flip::DATA_FLAG_FIN; +using gfe_flip::FIN_STREAM; +using gfe_flip::FlipControlFrame; +using gfe_flip::FlipFrame; +using gfe_flip::FlipDataFlags; +using gfe_flip::FlipDataFrame; +using gfe_flip::FlipFrameBuilder; +using gfe_flip::FlipFramer; +using gfe_flip::FlipFramerVisitorInterface; +using gfe_flip::FlipHeaderBlock; +using gfe_flip::FlipFinStreamControlFrame; +using gfe_flip::FlipStreamId; +using gfe_flip::FlipSynReplyControlFrame; +using gfe_flip::FlipSynStreamControlFrame; +using gfe_flip::SYN_REPLY; +using gfe_flip::SYN_STREAM; +using gfe2::BalsaFrame; +using gfe2::BalsaFrameEnums; +using gfe2::BalsaHeaders; +using gfe2::BalsaHeadersEnums; +using gfe2::BalsaVisitorInterface; +using gfe2::EpollAlarmCallbackInterface; +using gfe2::EpollCallbackInterface; +using gfe2::EpollEvent; +using gfe2::EpollServer; +using gfe2::RingBuffer; +using gfe2::SimpleBuffer; +using net::UrlUtilities; + +//////////////////////////////////////////////////////////////////////////////// + +// Creates a socket with domain, type and protocol parameters. +// Assigns the return value of socket() to *fd. +// Returns errno if an error occurs, else returns zero. +int CreateSocket(int domain, int type, int protocol, int *fd) { + CHECK(fd != NULL); + *fd = ::socket(domain, type, protocol); + return (*fd == -1) ? errno : 0; +} + +//////////////////////////////////////////////////////////////////////////////// + +// Sets an FD to be nonblocking. +void SetNonBlocking(int fd) { + DCHECK(fd >= 0); + + int fcntl_return = fcntl(fd, F_GETFL, 0); + CHECK_NE(fcntl_return, -1) + << "error doing fcntl(fd, F_GETFL, 0) fd: " << fd + << " errno=" << errno; + + if (fcntl_return & O_NONBLOCK) + return; + + fcntl_return = fcntl(fd, F_SETFL, fcntl_return | O_NONBLOCK); + CHECK_NE(fcntl_return, -1) + << "error doing fcntl(fd, F_SETFL, fcntl_return) fd: " << fd + << " errno=" << errno; +} + +//////////////////////////////////////////////////////////////////////////////// + +LoadtimeMeasurement global_loadtime_measurement(FLAGS_urls_file, + FLAGS_pageload_html_file); + +//////////////////////////////////////////////////////////////////////////////// + +struct GlobalSSLState { + SSL_METHOD* ssl_method; + SSL_CTX* ssl_ctx; +}; + +//////////////////////////////////////////////////////////////////////////////// + +GlobalSSLState* global_ssl_state = NULL; + +//////////////////////////////////////////////////////////////////////////////// + +// SSL stuff +void flip_init_ssl(GlobalSSLState* state) { + SSL_library_init(); + SSL_load_error_strings(); + + state->ssl_method = TLSv1_server_method(); + state->ssl_ctx = SSL_CTX_new(state->ssl_method); + if (!state->ssl_ctx) { + LOG(FATAL) << "Unable to create SSL context"; + } + if (SSL_CTX_use_certificate_file(state->ssl_ctx, + FLAGS_ssl_cert_name.c_str(), + SSL_FILETYPE_PEM) <= 0) { + LOG(FATAL) << "Unable to use cert.pem as SSL cert."; + } + if (SSL_CTX_use_PrivateKey_file(state->ssl_ctx, + FLAGS_ssl_key_name.c_str(), + SSL_FILETYPE_PEM) <= 0) { + LOG(FATAL) << "Unable to use key.pem as SSL key."; + } + if (!SSL_CTX_check_private_key(state->ssl_ctx)) { + LOG(FATAL) << "The cert.pem and key.pem files don't match"; + } +} + +SSL* flip_new_ssl(SSL_CTX* ssl_ctx) { + SSL* ssl = SSL_new(ssl_ctx); + SSL_set_accept_state(ssl); + return ssl; +} + +//////////////////////////////////////////////////////////////////////////////// + +const int kInitialDataSendersThreshold = 128; +const int kNormalSegmentSize = (4 * 1460) - FlipFrame::size(); + +//////////////////////////////////////////////////////////////////////////////// + +class DataFrame { + public: + const char* data; + size_t size; + bool delete_when_done; + size_t index; + DataFrame() : data(NULL), size(0), delete_when_done(false), index(0) {} + void MaybeDelete() { + if (delete_when_done) { + delete[] data; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class StoreBodyAndHeadersVisitor: public BalsaVisitorInterface { + public: + BalsaHeaders headers; + string body; + bool error_; + + virtual void ProcessBodyInput(const char *input, size_t size) {} + virtual void ProcessBodyData(const char *input, size_t size) { + body.append(input, size); + } + virtual void ProcessHeaderInput(const char *input, size_t size) {} + virtual void ProcessTrailerInput(const char *input, size_t size) {} + virtual void ProcessHeaders(const BalsaHeaders& headers) { + // nothing to do here-- we're assuming that the BalsaFrame has + // been handed our headers. + } + virtual void ProcessRequestFirstLine(const char* line_input, + size_t line_length, + const char* method_input, + size_t method_length, + const char* request_uri_input, + size_t request_uri_length, + const char* version_input, + size_t version_length) {} + virtual void ProcessResponseFirstLine(const char *line_input, + size_t line_length, + const char *version_input, + size_t version_length, + const char *status_input, + size_t status_length, + const char *reason_input, + size_t reason_length) {} + virtual void ProcessChunkLength(size_t chunk_length) {} + virtual void ProcessChunkExtensions(const char *input, size_t size) {} + virtual void HeaderDone() {} + virtual void MessageDone() {} + virtual void HandleHeaderError(BalsaFrame* framer) { HandleError(); } + virtual void HandleHeaderWarning(BalsaFrame* framer) { HandleError(); } + virtual void HandleChunkingError(BalsaFrame* framer) { HandleError(); } + virtual void HandleBodyError(BalsaFrame* framer) { HandleError(); } + + void HandleError() { error_ = true; } +}; + +//////////////////////////////////////////////////////////////////////////////// + +struct FileData { + void CopyFrom(const FileData& file_data) { + headers = new BalsaHeaders; + headers->CopyFrom(*(file_data.headers)); + filename = file_data.filename; + related_files = file_data.related_files; + body = file_data.body; + } + FileData(BalsaHeaders* h, const string& b) : headers(h), body(b) {} + FileData() {} + BalsaHeaders* headers; + string filename; + vector< pair<int, string> > related_files; // priority, filename + string body; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class MemCacheIter { + public: + MemCacheIter() : + file_data(NULL), + priority(0), + transformed_header(false), + body_bytes_consumed(0), + stream_id(0), + max_segment_size(kInitialDataSendersThreshold), + bytes_sent(0) {} + explicit MemCacheIter(FileData* fd) : + file_data(fd), + priority(0), + transformed_header(false), + body_bytes_consumed(0), + stream_id(0), + max_segment_size(kInitialDataSendersThreshold), + bytes_sent(0) {} + FileData* file_data; + int priority; + bool transformed_header; + size_t body_bytes_consumed; + uint32 stream_id; + uint32 max_segment_size; + size_t bytes_sent; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class MemoryCache { + public: + typedef map<string, FileData> Files; + + public: + Files files_; + string cwd_; + + void CloneFrom(const MemoryCache& mc) { + for (Files::const_iterator i = mc.files_.begin(); + i != mc.files_.end(); + ++i) { + Files::iterator out_i = + files_.insert(make_pair(i->first, FileData())).first; + out_i->second.CopyFrom(i->second); + cwd_ = mc.cwd_; + } + } + + void AddFiles() { + LOG(INFO) << "Adding files!"; + deque<string> paths; + cwd_ = FLAGS_cache_base_dir; + paths.push_back(cwd_ + "/GET_"); + DIR* current_dir = NULL; + while (!paths.empty()) { + while (current_dir == NULL && !paths.empty()) { + string current_dir_name = paths.front(); + VLOG(1) << "Attempting to open dir: \"" << current_dir_name << "\""; + current_dir = opendir(current_dir_name.c_str()); + paths.pop_front(); + + if (current_dir == NULL) { + perror("Unable to open directory. "); + current_dir_name.clear(); + continue; + } + + if (current_dir) { + VLOG(1) << "Succeeded opening"; + for (struct dirent* dir_data = readdir(current_dir); + dir_data != NULL; + dir_data = readdir(current_dir)) { + string current_entry_name = + current_dir_name + "/" + dir_data->d_name; + if (dir_data->d_type == DT_REG) { + VLOG(1) << "Found file: " << current_entry_name; + ReadAndStoreFileContents(current_entry_name.c_str()); + } else if (dir_data->d_type == DT_DIR) { + VLOG(1) << "Found subdir: " << current_entry_name; + if (string(dir_data->d_name) != "." && + string(dir_data->d_name) != "..") { + VLOG(1) << "Adding to search path: " << current_entry_name; + paths.push_front(current_entry_name); + } + } + } + VLOG(1) << "Oops, no data left. Closing dir."; + closedir(current_dir); + current_dir = NULL; + } + } + } + } + + void ReadToString(const char* filename, string* output) { + output->clear(); + int fd = open(filename, 0, "r"); + if (fd == -1) + return; + char buffer[4096]; + ssize_t read_status = read(fd, buffer, sizeof(buffer)); + while (read_status > 0) { + output->append(buffer, static_cast<size_t>(read_status)); + do { + read_status = read(fd, buffer, sizeof(buffer)); + } while (read_status <= 0 && errno == EINTR); + } + close(fd); + } + + void ReadAndStoreFileContents(const char* filename) { + StoreBodyAndHeadersVisitor visitor; + BalsaFrame framer; + framer.set_balsa_visitor(&visitor); + framer.set_balsa_headers(&(visitor.headers)); + string filename_contents; + ReadToString(filename, &filename_contents); + + // Ugly hack to make everything look like 1.1. + if (filename_contents.find("HTTP/1.0") == 0) + filename_contents[7] = '1'; + + size_t pos = 0; + size_t old_pos = 0; + while (true) { + old_pos = pos; + pos += framer.ProcessInput(filename_contents.data() + pos, + filename_contents.size() - pos); + if (framer.Error() || pos == old_pos) { + LOG(ERROR) << "Unable to make forward progress, or error" + " framing file: " << filename; + if (framer.Error()) { + LOG(INFO) << "********************************************ERROR!"; + return; + } + return; + } + if (framer.MessageFullyRead()) { + // If no Content-Length or Transfer-Encoding was captured in the + // file, then the rest of the data is the body. Many of the captures + // from within Chrome don't have content-lengths. + if (!visitor.body.length()) + visitor.body = filename_contents.substr(pos); + break; + } + } + visitor.headers.RemoveAllOfHeader("content-length"); + visitor.headers.RemoveAllOfHeader("transfer-encoding"); + visitor.headers.RemoveAllOfHeader("connection"); + visitor.headers.AppendHeader("transfer-encoding", "chunked"); + visitor.headers.AppendHeader("connection", "keep-alive"); + + // Experiment with changing headers for forcing use of cached + // versions of content. + // TODO(mbelshe) REMOVE ME +#if 0 + // TODO(mbelshe): append current date. + visitor.headers.RemoveAllOfHeader("date"); + if (visitor.headers.HasHeader("expires")) { + visitor.headers.RemoveAllOfHeader("expires"); + visitor.headers.AppendHeader("expires", + "Fri, 30 Aug, 2019 12:00:00 GMT"); + } +#endif + BalsaHeaders* headers = new BalsaHeaders; + headers->CopyFrom(visitor.headers); + string filename_stripped = string(filename).substr(cwd_.size() + 1); +// LOG(INFO) << "Adding file (" << visitor.body.length() << " bytes): " +// << filename_stripped; + files_[filename_stripped] = FileData(); + FileData& fd = files_[filename_stripped]; + fd = FileData(headers, visitor.body); + fd.filename = string(filename_stripped, + filename_stripped.find_first_of('/')); + if (headers->HasHeader("X-Associated-Content")) { + string content = headers->GetHeader("X-Associated-Content").ToString(); + vector<string> urls_and_priorities; + SplitStringUsing(content, "||", &urls_and_priorities); + VLOG(1) << "Examining X-Associated-Content header"; + for (int i = 0; i < urls_and_priorities.size(); ++i) { + const string& url_and_priority_pair = urls_and_priorities[i]; + vector<string> url_and_priority; + SplitStringUsing(url_and_priority_pair, "??", &url_and_priority); + if (url_and_priority.size() >= 2) { + string& priority_string = url_and_priority[0]; + string& filename_string = url_and_priority[1]; + int priority; + char* last_eaten_char; + priority = strtol(priority_string.c_str(), &last_eaten_char, 0); + if (last_eaten_char == + priority_string.c_str() + priority_string.size()) { + pair<int, string> entry(priority, filename_string); + VLOG(1) << "Adding associated content: " << filename_string; + fd.related_files.push_back(entry); + } + } + } + } + } + + // Called at runtime to update learned headers + // |url| is a url which contains a referrer header. + // |referrer| is the referring URL + // Adds an X-Subresource or X-Associated-Content to |referer| for |url| + void UpdateHeaders(string referrer, string file_url) { + if (!FLAGS_use_xac && !FLAGS_use_xsub) + return; + + string referrer_host_path = + net::UrlToFilenameEncoder::Encode(referrer, "GET_/"); + + FileData* fd1 = GetFileData(string("GET_") + file_url); + if (!fd1) { + LOG(ERROR) << "Updating headers for unknown url: " << file_url; + return; + } + string url = fd1->headers->GetHeader("X-Original-Url").as_string(); + string content_type = fd1->headers->GetHeader("Content-Type").as_string(); + if (content_type.length() == 0) { + LOG(ERROR) << "Skipping subresource with unknown content-type"; + return; + } + + // Now, lets see if this is the same host or not + bool same_host = (UrlUtilities::GetUrlHost(referrer) == + UrlUtilities::GetUrlHost(url)); + + // This is a hacked algorithm for figuring out what priority + // to use with pushed content. + int priority = 4; + if (content_type.find("css") != std::string::npos) + priority = 1; + else if (content_type.find("cript") != std::string::npos) + priority = 1; + else if (content_type.find("html") != std::string::npos) + priority = 2; + + LOG(ERROR) << "Attempting update for " << referrer_host_path; + + FileData* fd2 = GetFileData(referrer_host_path); + if (fd2 != NULL) { + // If they are on the same host, we'll use X-Associated-Content + string header_name; + string new_value; + string delimiter; + bool related_files = false; + if (same_host && FLAGS_use_xac) { + header_name = "X-Associated-Content"; + char pri_ch = priority + '0'; + new_value = pri_ch + string("??") + url; + delimiter = "||"; + related_files = true; + } else { + if (!FLAGS_use_xsub) + return; + header_name = "X-Subresource"; + new_value = content_type + "!!" + url; + delimiter = "!!"; + } + + if (fd2->headers->HasNonEmptyHeader(header_name)) { + string existing_header = + fd2->headers->GetHeader(header_name).as_string(); + if (existing_header.find(url) != string::npos) + return; // header already recorded + + // Don't let these lists grow too long for low pri stuff. + // TODO(mbelshe) We need better algorithms for this. + if (existing_header.length() > 256 && priority > 2) + return; + + new_value = existing_header + delimiter + new_value; + } + + LOG(INFO) << "Recording " << header_name << " for " << new_value; + fd2->headers->ReplaceOrAppendHeader(header_name, new_value); + + // Add it to the related files so that it will actually get sent out. + if (related_files) { + pair<int, string> entry(4, file_url); + fd2->related_files.push_back(entry); + } + } else { + LOG(ERROR) << "Failed to update headers:"; + LOG(ERROR) << "FAIL url: " << url; + LOG(ERROR) << "FAIL ref: " << referrer_host_path; + } + } + + FileData* GetFileData(const string& filename) { + Files::iterator fi = files_.end(); + if (filename.compare(filename.length() - 5, 5, ".html", 5) == 0) { + string new_filename(filename.data(), filename.size() - 5); + new_filename += ".http"; + fi = files_.find(new_filename); + } + if (fi == files_.end()) + fi = files_.find(filename); + + if (fi == files_.end()) { + return NULL; + } + return &(fi->second); + } + + bool AssignFileData(const string& filename, MemCacheIter* mci) { + mci->file_data = GetFileData(filename); + if (mci->file_data == NULL) { + LOG(ERROR) << "Could not find file data for " << filename; + return false; + } + return true; + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class NotifierInterface { + public: + virtual ~NotifierInterface() {} + virtual void Notify() = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class SMInterface { + public: + virtual size_t ProcessInput(const char* data, size_t len) = 0; + virtual bool MessageFullyRead() const = 0; + virtual bool Error() const = 0; + virtual const char* ErrorAsString() const = 0; + virtual void Reset() = 0; + virtual void ResetForNewConnection() = 0; + + virtual void PostAcceptHook() = 0; + + virtual void NewStream(uint32 stream_id, uint32 priority, + const string& filename) = 0; + virtual void SendEOF(uint32 stream_id) = 0; + virtual void SendErrorNotFound(uint32 stream_id) = 0; + virtual size_t SendSynStream(uint32 stream_id, + const BalsaHeaders& headers) = 0; + virtual size_t SendSynReply(uint32 stream_id, + const BalsaHeaders& headers) = 0; + virtual void SendDataFrame(uint32 stream_id, const char* data, int64 len, + uint32 flags, bool compress) = 0; + virtual void GetOutput() = 0; + + virtual ~SMInterface() {} +}; + +//////////////////////////////////////////////////////////////////////////////// + +class SMServerConnection; +typedef SMInterface*(SMInterfaceFactory)(SMServerConnection* conn); + +//////////////////////////////////////////////////////////////////////////////// + +typedef list<DataFrame> OutputList; + +//////////////////////////////////////////////////////////////////////////////// + +class SMServerConnection; + +class SMServerConnectionPoolInterface { + public: + virtual ~SMServerConnectionPoolInterface() {} + // SMServerConnections will use this: + virtual void SMServerConnectionDone(SMServerConnection* conn) = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class SMServerConnection: public EpollCallbackInterface, + public NotifierInterface { + private: + SMServerConnection(SMInterfaceFactory* sm_interface_factory, + MemoryCache* memory_cache, + EpollServer* epoll_server) : + fd_(-1), + record_fd_(-1), + events_(0), + + registered_in_epoll_server_(false), + initialized_(false), + + connection_pool_(NULL), + epoll_server_(epoll_server), + + read_buffer_(4096*10), + memory_cache_(memory_cache), + sm_interface_(sm_interface_factory(this)), + + max_bytes_sent_per_dowrite_(128), + + ssl_(NULL) {} + + int fd_; + int record_fd_; + int events_; + + bool registered_in_epoll_server_; + bool initialized_; + + SMServerConnectionPoolInterface* connection_pool_; + EpollServer* epoll_server_; + + RingBuffer read_buffer_; + + OutputList output_list_; + MemoryCache* memory_cache_; + SMInterface* sm_interface_; + + size_t max_bytes_sent_per_dowrite_; + + SSL* ssl_; + public: + EpollServer* epoll_server() { return epoll_server_; } + OutputList* output_list() { return &output_list_; } + MemoryCache* memory_cache() { return memory_cache_; } + int record_fd() { return record_fd_; } + void close_record_fd() { + if (record_fd_ != -1) { + close(record_fd_); + record_fd_ = -1; + } + } + void ReadyToSend() { + epoll_server_->SetFDReady(fd_, EPOLLIN | EPOLLOUT); + } + void EnqueueDataFrame(const DataFrame& df) { + output_list_.push_back(df); + VLOG(2) << "EnqueueDataFrame. Setting FD ready."; + ReadyToSend(); + } + + public: + ~SMServerConnection() { + if (initialized()) { + Reset(); + } + } + static SMServerConnection* NewSMServerConnection(SMInterfaceFactory* smif, + MemoryCache* memory_cache, + EpollServer* epoll_server) { + return new SMServerConnection(smif, memory_cache, epoll_server); + } + + bool initialized() const { return initialized_; } + + void InitSMServerConnection(SMServerConnectionPoolInterface* connection_pool, + EpollServer* epoll_server, + int fd) { + if (initialized_) { + LOG(FATAL) << "Attempted to initialize already initialized server"; + return; + } + if (epoll_server_ && registered_in_epoll_server_ && fd_ != -1) { + epoll_server_->UnregisterFD(fd_); + } + if (fd_ != -1) { + VLOG(2) << "Closing pre-existing fd"; + close(fd_); + fd_ = -1; + } + if (FLAGS_record_mode) { + char record_file_name[1024]; + snprintf(record_file_name, 1024, "%s/%d_%lld", + FLAGS_record_path.c_str(), fd, GetCurrentTimeMillis()); + record_fd_ = open(record_file_name, O_CREAT|O_APPEND|O_WRONLY, S_IRWXU); + if (record_fd_ < 0) { + LOG(ERROR) << "Open record file for fd " << fd << " failed"; + record_fd_ = -1; + } + } + + fd_ = fd; + + registered_in_epoll_server_ = false; + initialized_ = true; + + connection_pool_ = connection_pool; + epoll_server_ = epoll_server; + + sm_interface_->Reset(); + read_buffer_.Clear(); + + epoll_server_->RegisterFD(fd_, this, EPOLLIN | EPOLLOUT | EPOLLET); + + if (global_ssl_state) { + ssl_ = flip_new_ssl(global_ssl_state->ssl_ctx); + SSL_set_fd(ssl_, fd_); + } + sm_interface_->PostAcceptHook(); + } + + int Send(const char* bytes, int len, int flags) { + return send(fd_, bytes, len, flags); + } + + // the following are from the EpollCallbackInterface + virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) { + registered_in_epoll_server_ = true; + } + virtual void OnModification(int fd, int event_mask) { } + virtual void OnEvent(int fd, EpollEvent* event) { + events_ |= event->in_events; + HandleEvents(); + if (events_) { + event->out_ready_mask = events_; + events_ = 0; + } + } + virtual void OnUnregistration(int fd, bool replaced) { + registered_in_epoll_server_ = false; + } + virtual void OnShutdown(EpollServer* eps, int fd) { + Cleanup("OnShutdown"); + return; + } + + private: + void HandleEvents() { + VLOG(1) << "Received: " << EpollServer::EventMaskToString(events_); + if (events_ & EPOLLIN) { + if (!DoRead()) + goto handle_close_or_error; + } + + if (events_ & EPOLLOUT) { + if (!DoWrite()) + goto handle_close_or_error; + } + + if (events_ & (EPOLLHUP | EPOLLERR)) { + VLOG(2) << "!!!! Got HUP or ERR"; + goto handle_close_or_error; + } + return; + + handle_close_or_error: + Cleanup("HandleEvents"); + } + + bool DoRead() { + VLOG(2) << "DoRead()"; + if (fd_ == -1) { + VLOG(2) << "DoRead(): fd_ == -1. Invalid FD. Returning false"; + return false; + } + while (!read_buffer_.Full()) { + char* bytes; + int size; + read_buffer_.GetWritablePtr(&bytes, &size); + ssize_t bytes_read = 0; + if (ssl_) { + bytes_read = SSL_read(ssl_, bytes, size); + } else { + bytes_read = recv(fd_, bytes, size, MSG_DONTWAIT); + } + int stored_errno = errno; + if (bytes_read == -1) { + switch (stored_errno) { + case EAGAIN: + events_ &= ~EPOLLIN; + VLOG(2) << "Got EAGAIN while reading"; + goto done; + case EINTR: + VLOG(2) << "Got EINTR while reading"; + continue; + default: + VLOG(2) << "While calling recv, got error: " << stored_errno + << " " << strerror(stored_errno); + goto error_or_close; + } + } else if (bytes_read > 0) { + VLOG(2) << "Read: " << bytes_read << " bytes from fd: " << fd_; + read_buffer_.AdvanceWritablePtr(bytes_read); + if (!DoConsumeReadData()) { + goto error_or_close; + } + continue; + } else { // bytes_read == 0 + VLOG(2) << "0 bytes read with recv call."; + } + goto error_or_close; + } + done: + return true; + + error_or_close: + VLOG(2) << "DoRead(): error_or_close. Cleaning up, then returning false"; + Cleanup("DoRead"); + return false; + } + + bool DoConsumeReadData() { + char* bytes; + int size; + read_buffer_.GetReadablePtr(&bytes, &size); + while (size != 0) { + size_t bytes_consumed = sm_interface_->ProcessInput(bytes, size); + VLOG(2) << "consumed: " << bytes_consumed << " from socket fd: " << fd_; + if (bytes_consumed == 0) { + break; + } + read_buffer_.AdvanceReadablePtr(bytes_consumed); + if (sm_interface_->MessageFullyRead()) { + VLOG(2) << "HandleRequestFullyRead"; + HandleRequestFullyRead(); + sm_interface_->Reset(); + events_ |= EPOLLOUT; + } else if (sm_interface_->Error()) { + LOG(ERROR) << "Framer error detected: " + << sm_interface_->ErrorAsString(); + // this causes everything to be closed/cleaned up. + events_ |= EPOLLOUT; + return false; + } + read_buffer_.GetReadablePtr(&bytes, &size); + } + return true; + } + + void WriteResponse() { + // this happens asynchronously from separate threads + // feeding files into the output buffer. + } + + void HandleRequestFullyRead() { + } + + void Notify() { + } + + bool DoWrite() { + size_t bytes_sent = 0; + int flags = MSG_NOSIGNAL | MSG_DONTWAIT; + if (fd_ == -1) { + VLOG(2) << "DoWrite: fd == -1. Returning false."; + return false; + } + if (output_list_.empty()) { + sm_interface_->GetOutput(); + if (output_list_.empty()) + events_ &= ~EPOLLOUT; + } + while (!output_list_.empty()) { + if (bytes_sent >= max_bytes_sent_per_dowrite_) { + events_ |= EPOLLOUT; + break; + } + if (output_list_.size() < 2) { + sm_interface_->GetOutput(); + } + DataFrame& data_frame = output_list_.front(); + const char* bytes = data_frame.data; + int size = data_frame.size; + bytes += data_frame.index; + size -= data_frame.index; + DCHECK_GE(size, 0); + if (size <= 0) { + data_frame.MaybeDelete(); + output_list_.pop_front(); + continue; + } + + flags = MSG_NOSIGNAL | MSG_DONTWAIT; + if (output_list_.size() > 1) { + flags |= MSG_MORE; + } + ssize_t bytes_written = 0; + if (ssl_) { + bytes_written = SSL_write(ssl_, bytes, size); + } else { + bytes_written = send(fd_, bytes, size, flags); + } + int stored_errno = errno; + if (bytes_written == -1) { + switch (stored_errno) { + case EAGAIN: + events_ &= ~EPOLLOUT; + VLOG(2) << " Got EAGAIN while writing"; + goto done; + case EINTR: + VLOG(2) << " Got EINTR while writing"; + continue; + default: + VLOG(2) << "While calling send, got error: " << stored_errno + << " " << strerror(stored_errno); + goto error_or_close; + } + } else if (bytes_written > 0) { + VLOG(1) << "Wrote: " << bytes_written << " bytes to socket fd: " + << fd_; + data_frame.index += bytes_written; + bytes_sent += bytes_written; + continue; + } + VLOG(2) << "0 bytes written to socket " << fd_ << " with send call."; + goto error_or_close; + } + done: + return true; + + error_or_close: + VLOG(2) << "DoWrite: error_or_close. Returning false after cleaning up"; + Cleanup("DoWrite"); + return false; + } + + friend ostream& operator<<(ostream& os, const SMServerConnection& c) { + os << &c << "\n"; + return os; + } + + void Reset() { + VLOG(2) << "Resetting"; + if (ssl_) { + SSL_shutdown(ssl_); + SSL_free(ssl_); + } + if (registered_in_epoll_server_) { + epoll_server_->UnregisterFD(fd_); + registered_in_epoll_server_ = false; + } + if (fd_ >= 0) { + VLOG(2) << "Closing connection"; + close(fd_); + fd_ = -1; + } + sm_interface_->ResetForNewConnection(); + read_buffer_.Clear(); + initialized_ = false; + events_ = 0; + output_list_.clear(); + } + + void Cleanup(const char* cleanup) { + VLOG(2) << "Cleaning up: " << cleanup; + if (!initialized_) { + return; + } + Reset(); + connection_pool_->SMServerConnectionDone(this); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class OutputOrdering { + public: + typedef list<MemCacheIter> PriorityRing; + + typedef map<uint32, PriorityRing> PriorityMap; + + struct PriorityMapPointer { + PriorityMapPointer(): ring(NULL), alarm_enabled(false) {} + PriorityRing* ring; + PriorityRing::iterator it; + bool alarm_enabled; + EpollServer::AlarmRegToken alarm_token; + }; + typedef map<uint32, PriorityMapPointer> StreamIdToPriorityMap; + + StreamIdToPriorityMap stream_ids_; + PriorityMap priority_map_; + PriorityRing first_data_senders_; + uint32 first_data_senders_threshold_; // when you've passed this, you're no + // longer a first_data_sender... + SMServerConnection* connection_; + EpollServer* epoll_server_; + + explicit OutputOrdering(SMServerConnection* connection) : + first_data_senders_threshold_(kInitialDataSendersThreshold), + connection_(connection), + epoll_server_(connection->epoll_server()) { + } + + void Reset() { + while (!stream_ids_.empty()) { + StreamIdToPriorityMap::iterator sitpmi = stream_ids_.begin(); + PriorityMapPointer& pmp = sitpmi->second; + if (pmp.alarm_enabled) { + epoll_server_->UnregisterAlarm(pmp.alarm_token); + } + stream_ids_.erase(sitpmi); + } + priority_map_.clear(); + first_data_senders_.clear(); + } + + bool ExistsInPriorityMaps(uint32 stream_id) { + StreamIdToPriorityMap::iterator sitpmi = stream_ids_.find(stream_id); + return sitpmi != stream_ids_.end(); + } + + struct BeginOutputtingAlarm : public EpollAlarmCallbackInterface { + public: + BeginOutputtingAlarm(OutputOrdering* oo, + OutputOrdering::PriorityMapPointer* pmp, + const MemCacheIter& mci) : + output_ordering_(oo), pmp_(pmp), mci_(mci), epoll_server_(NULL) {} + + int64 OnAlarm() { + OnUnregistration(); + output_ordering_->MoveToActive(pmp_, mci_); + VLOG(1) << "ON ALARM! Should now start to output..."; + delete this; + return 0; + } + void OnRegistration(const EpollServer::AlarmRegToken& tok, + EpollServer* eps) { + epoll_server_ = eps; + pmp_->alarm_token = tok; + pmp_->alarm_enabled = true; + } + void OnUnregistration() { + pmp_->alarm_enabled = false; + } + void OnShutdown(EpollServer* eps) { + OnUnregistration(); + } + ~BeginOutputtingAlarm() { + if (epoll_server_ && pmp_->alarm_enabled) + epoll_server_->UnregisterAlarm(pmp_->alarm_token); + } + private: + OutputOrdering* output_ordering_; + OutputOrdering::PriorityMapPointer* pmp_; + MemCacheIter mci_; + EpollServer* epoll_server_; + }; + + void MoveToActive(PriorityMapPointer* pmp, MemCacheIter mci) { + VLOG(1) <<"Moving to active!"; + first_data_senders_.push_back(mci); + pmp->ring = &first_data_senders_; + pmp->it = first_data_senders_.end(); + --pmp->it; + connection_->ReadyToSend(); + } + + void AddToOutputOrder(const MemCacheIter& mci) { + if (ExistsInPriorityMaps(mci.stream_id)) + LOG(FATAL) << "OOps, already was inserted here?!"; + + StreamIdToPriorityMap::iterator sitpmi; + sitpmi = stream_ids_.insert( + pair<uint32, PriorityMapPointer>(mci.stream_id, + PriorityMapPointer())).first; + PriorityMapPointer& pmp = sitpmi->second; + + BeginOutputtingAlarm* boa = new BeginOutputtingAlarm(this, &pmp, mci); + epoll_server_->RegisterAlarmApproximateDelta( + FLAGS_server_think_time_in_s * 1000000, boa); + } + + void SpliceToPriorityRing(PriorityRing::iterator pri) { + MemCacheIter& mci = *pri; + PriorityMap::iterator pmi = priority_map_.find(mci.priority); + if (pmi == priority_map_.end()) { + pmi = priority_map_.insert( + pair<uint32, PriorityRing>(mci.priority, PriorityRing())).first; + } + + pmi->second.splice(pmi->second.end(), + first_data_senders_, + pri); + StreamIdToPriorityMap::iterator sitpmi = stream_ids_.find(mci.stream_id); + sitpmi->second.ring = &(pmi->second); + } + + MemCacheIter* GetIter() { + while (!first_data_senders_.empty()) { + MemCacheIter& mci = first_data_senders_.front(); + if (mci.bytes_sent >= first_data_senders_threshold_) { + SpliceToPriorityRing(first_data_senders_.begin()); + } else { + first_data_senders_.splice(first_data_senders_.end(), + first_data_senders_, + first_data_senders_.begin()); + mci.max_segment_size = kInitialDataSendersThreshold; + return &mci; + } + } + while (!priority_map_.empty()) { + PriorityRing& first_ring = priority_map_.begin()->second; + if (first_ring.empty()) { + priority_map_.erase(priority_map_.begin()); + continue; + } + MemCacheIter& mci = first_ring.front(); + first_ring.splice(first_ring.end(), + first_ring, + first_ring.begin()); + mci.max_segment_size = kNormalSegmentSize; + return &mci; + } + return NULL; + } + + void RemoveStreamId(uint32 stream_id) { + StreamIdToPriorityMap::iterator sitpmi = stream_ids_.find(stream_id); + if (sitpmi == stream_ids_.end()) + return; + PriorityMapPointer& pmp = sitpmi->second; + if (pmp.alarm_enabled) { + epoll_server_->UnregisterAlarm(pmp.alarm_token); + } else { + pmp.ring->erase(pmp.it); + } + + stream_ids_.erase(sitpmi); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class FlipSM : public FlipFramerVisitorInterface, public SMInterface { + private: + uint64 seq_num_; + FlipFramer* framer_; + + SMServerConnection* connection_; + OutputList* output_list_; + OutputOrdering output_ordering_; + MemoryCache* memory_cache_; + uint32 next_outgoing_stream_id_; + public: + explicit FlipSM(SMServerConnection* connection) : + seq_num_(0), + framer_(new FlipFramer), + connection_(connection), + output_list_(connection->output_list()), + output_ordering_(connection), + memory_cache_(connection->memory_cache()), + next_outgoing_stream_id_(2) { + framer_->set_visitor(this); + } + private: + virtual void OnError(FlipFramer* framer) { + /* do nothing with this right now */ + } + + virtual void OnControl(const FlipControlFrame* frame) { + FlipHeaderBlock headers; + bool parsed_headers = false; + switch (frame->type()) { + case SYN_STREAM: + { + parsed_headers = framer_->ParseHeaderBlock(frame, &headers); + VLOG(2) << "OnSyn(" << frame->stream_id() << ")"; + VLOG(2) << "headers parsed?: " << (parsed_headers? "yes": "no"); + if (parsed_headers) { + VLOG(2) << "# headers: " << headers.size(); + } + int j = 0; + for (FlipHeaderBlock::iterator i = headers.begin(); + i != headers.end(); + ++i) { + VLOG(2) << i->first << ": " << i->second; + if (FLAGS_record_mode && connection_->record_fd() > 0) { + // If record mode is enabled and corresponding server connection + // has file opened, then save the request headers into the file. + // All the requests from the same connection is save in one file. + // This file will be used to replay and generate FLIP requests + // load. + string header = i->first + ": " + i->second + "\n"; + ++j; + if (j == headers.size()) { + header += "\n"; // add an additional empty lime + } + int r = write( + connection_->record_fd(), header.c_str(), header.size()); + if (r < 0) { + perror("unable to write to record file:"); + } + } + } + + FlipHeaderBlock::iterator method = headers.find("method"); + FlipHeaderBlock::iterator url = headers.find("url"); + if (url == headers.end() || method == headers.end()) { + VLOG(2) << "didn't find method or url or method. Not creating stream"; + break; + } + + FlipHeaderBlock::iterator referer = headers.find("referer"); + if (referer != headers.end() && method->second == "GET") { + memory_cache_->UpdateHeaders(referer->second, url->second); + } + string uri = UrlUtilities::GetUrlPath(url->second); + string host = UrlUtilities::GetUrlHost(url->second); + // requests started with /testing are loadtime measurement related + // urls, use LoadtimeMeasurement class to handle them. + if (uri.find("/testing") == 0) { + string output; + global_loadtime_measurement.ProcessRequest(uri, output); + SendOKResponse(frame->stream_id(), &output); + } else { + string filename; + if (FLAGS_need_to_encode_url) { + filename = net::UrlToFilenameEncoder::Encode( + "http://" + host + uri, method->second + "_/"); + } else { + filename = string(method->second + "_" + url->second); + } + + NewStream(frame->stream_id(), + reinterpret_cast<const FlipSynStreamControlFrame*>(frame)-> + priority(), + filename); + } + } + break; + + case SYN_REPLY: + parsed_headers = framer_->ParseHeaderBlock(frame, &headers); + VLOG(2) << "OnSynReply(" << frame->stream_id() << ")"; + break; + case FIN_STREAM: + VLOG(2) << "OnFin(" << frame->stream_id() << ")"; + output_ordering_.RemoveStreamId(frame->stream_id()); + + break; + default: + LOG(DFATAL) << "Unknown control frame type"; + } + } + virtual void OnStreamFrameData( + FlipStreamId stream_id, + const char* data, size_t len) { + VLOG(2) << "StreamData(" << stream_id << ", [" << len << "])"; + /* do nothing with this right now */ + } + virtual void OnLameDuck() { + /* do nothing with this right now */ + } + + public: + ~FlipSM() { + Reset(); + } + size_t ProcessInput(const char* data, size_t len) { + return framer_->ProcessInput(data, len); + } + + bool MessageFullyRead() const { + return framer_->MessageFullyRead(); + } + + bool Error() const { + return framer_->HasError(); + } + + const char* ErrorAsString() const { + return FlipFramer::ErrorCodeToString(framer_->error_code()); + } + + void Reset() {} + void ResetForNewConnection() { + // seq_num is not cleared, intentionally. + delete framer_; + framer_ = new FlipFramer; + framer_->set_visitor(this); + output_ordering_.Reset(); + next_outgoing_stream_id_ = 2; + } + + // Send a couple of NOOP packets to force opening of cwnd. + void PostAcceptHook() { + if (!FLAGS_use_cwnd_opener) + return; + + // We send 2 because that is the initial cwnd, and also because + // we have to in order to get an ACK back from the client due to + // delayed ACK. + const int kPkts = 2; + + LOG(ERROR) << "Sending NOP FRAMES"; + + scoped_ptr<FlipControlFrame> frame(FlipFramer::CreateNopFrame()); + for (int i = 0; i < kPkts; ++i) { + char* bytes = frame->data(); + size_t size = FlipFrame::size(); + ssize_t bytes_written = connection_->Send(bytes, size, MSG_DONTWAIT); + if (bytes_written != size) { + LOG(ERROR) << "Trouble sending Nop packet! (" << errno << ")"; + if (errno == EAGAIN) + break; + } + } + } + + void AddAssociatedContent(FileData* file_data) { + for (int i = 0; i < file_data->related_files.size(); ++i) { + pair<int, string>& related_file = file_data->related_files[i]; + MemCacheIter mci; + string filename = "GET_"; + filename += related_file.second; + if (!memory_cache_->AssignFileData(filename, &mci)) { + VLOG(1) << "Unable to find associated content for: " << filename; + continue; + } + VLOG(1) << "Adding associated content: " << filename; + mci.stream_id = next_outgoing_stream_id_; + next_outgoing_stream_id_ += 2; + mci.priority = related_file.first; + AddToOutputOrder(mci); + } + } + + void NewStream(uint32 stream_id, uint32 priority, const string& filename) { + MemCacheIter mci; + mci.stream_id = stream_id; + mci.priority = priority; + if (!memory_cache_->AssignFileData(filename, &mci)) { + // error creating new stream. + VLOG(2) << "Sending ErrorNotFound"; + SendErrorNotFound(stream_id); + } else { + AddToOutputOrder(mci); + if (FLAGS_use_xac) { + AddAssociatedContent(mci.file_data); + } + } + } + + void AddToOutputOrder(const MemCacheIter& mci) { + output_ordering_.AddToOutputOrder(mci); + } + + void SendEOF(uint32 stream_id) { + SendEOFImpl(stream_id); + } + + void SendErrorNotFound(uint32 stream_id) { + SendErrorNotFoundImpl(stream_id); + } + + void SendOKResponse(uint32 stream_id, string* output) { + SendOKResponseImpl(stream_id, output); + } + + size_t SendSynStream(uint32 stream_id, const BalsaHeaders& headers) { + return SendSynStreamImpl(stream_id, headers); + } + + size_t SendSynReply(uint32 stream_id, const BalsaHeaders& headers) { + return SendSynReplyImpl(stream_id, headers); + } + + void SendDataFrame(uint32 stream_id, const char* data, int64 len, + uint32 flags, bool compress) { + FlipDataFlags flip_flags = static_cast<FlipDataFlags>(flags); + SendDataFrameImpl(stream_id, data, len, flip_flags, compress); + } + + FlipFramer* flip_framer() { return framer_; } + + private: + void SendEOFImpl(uint32 stream_id) { + SendDataFrame(stream_id, NULL, 0, DATA_FLAG_FIN, false); + VLOG(2) << "Sending EOF: " << stream_id; + KillStream(stream_id); + } + + void SendErrorNotFoundImpl(uint32 stream_id) { + BalsaHeaders my_headers; + my_headers.SetFirstlineFromStringPieces("HTTP/1.1", "404", "Not Found"); + SendSynReplyImpl(stream_id, my_headers); + SendDataFrame(stream_id, "wtf?", 4, DATA_FLAG_FIN, false); + output_ordering_.RemoveStreamId(stream_id); + } + + void SendOKResponseImpl(uint32 stream_id, string* output) { + BalsaHeaders my_headers; + my_headers.SetFirstlineFromStringPieces("HTTP/1.1", "200", "OK"); + SendSynReplyImpl(stream_id, my_headers); + SendDataFrame( + stream_id, output->c_str(), output->size(), DATA_FLAG_FIN, false); + output_ordering_.RemoveStreamId(stream_id); + } + + void KillStream(uint32 stream_id) { + output_ordering_.RemoveStreamId(stream_id); + } + + void CopyHeaders(FlipHeaderBlock& dest, const BalsaHeaders& headers) { + for (BalsaHeaders::const_header_lines_iterator hi = + headers.header_lines_begin(); + hi != headers.header_lines_end(); + ++hi) { + FlipHeaderBlock::iterator fhi = dest.find(hi->first.ToString()); + if (fhi == dest.end()) { + dest[hi->first.ToString()] = hi->second.ToString(); + } else { + dest[hi->first.ToString()] = ( + string(fhi->second.data(), fhi->second.size()) + "," + + string(hi->second.data(), hi->second.size())); + } + } + + // These headers have no value + dest.erase("X-Associated-Content"); // TODO(mbelshe): case-sensitive + dest.erase("X-Original-Url"); // TODO(mbelshe): case-sensitive + } + + size_t SendSynStreamImpl(uint32 stream_id, const BalsaHeaders& headers) { + FlipHeaderBlock block; + block["method"] = headers.request_method().ToString(); + if (!headers.HasHeader("status")) + block["status"] = headers.response_code().ToString(); + if (!headers.HasHeader("version")) + block["version"] =headers.response_version().ToString(); + if (headers.HasHeader("X-Original-Url")) { + string original_url = headers.GetHeader("X-Original-Url").as_string(); + block["path"] = UrlUtilities::GetUrlPath(original_url); + } else { + block["path"] = headers.request_uri().ToString(); + } + CopyHeaders(block, headers); + + FlipSynStreamControlFrame* fsrcf = + framer_->CreateSynStream(stream_id, 0, CONTROL_FLAG_NONE, true, &block); + DataFrame df; + df.size = fsrcf->length() + FlipFrame::size(); + size_t df_size = df.size; + df.data = fsrcf->data(); + df.delete_when_done = true; + EnqueueDataFrame(df); + + VLOG(2) << "Sending SynStreamheader " << stream_id; + return df_size; + } + + size_t SendSynReplyImpl(uint32 stream_id, const BalsaHeaders& headers) { + FlipHeaderBlock block; + CopyHeaders(block, headers); + block["status"] = headers.response_code().ToString() + " " + + headers.response_reason_phrase().ToString(); + block["version"] = headers.response_version().ToString(); + + FlipSynReplyControlFrame* fsrcf = + framer_->CreateSynReply(stream_id, CONTROL_FLAG_NONE, true, &block); + DataFrame df; + df.size = fsrcf->length() + FlipFrame::size(); + size_t df_size = df.size; + df.data = fsrcf->data(); + df.delete_when_done = true; + EnqueueDataFrame(df); + + VLOG(2) << "Sending SynReplyheader " << stream_id; + return df_size; + } + + void SendDataFrameImpl(uint32 stream_id, const char* data, int64 len, + FlipDataFlags flags, bool compress) { + // Force compression off if disabled via command line. + if (!FLAGS_use_compression) + flags = static_cast<FlipDataFlags>(flags & ~DATA_FLAG_COMPRESSED); + + // TODO(mbelshe): We can't compress here - before going into the + // priority queue. Compression needs to be done + // with late binding. + FlipDataFrame* fdf = framer_->CreateDataFrame(stream_id, data, len, + flags); + DataFrame df; + df.size = fdf->length() + FlipFrame::size(); + df.data = fdf->data(); + df.delete_when_done = true; + EnqueueDataFrame(df); + + VLOG(2) << "Sending data frame" << stream_id << " [" << len << "]" + << " shrunk to " << fdf->length(); + } + + void EnqueueDataFrame(const DataFrame& df) { + connection_->EnqueueDataFrame(df); + } + + void GetOutput() { + while (output_list_->size() < 2) { + MemCacheIter* mci = output_ordering_.GetIter(); + if (mci == NULL) { + VLOG(2) << "GetOutput: nothing to output!?"; + return; + } + if (!mci->transformed_header) { + mci->transformed_header = true; + VLOG(2) << "GetOutput transformed header stream_id: [" + << mci->stream_id << "]"; + if ((mci->stream_id % 2) == 0) { + // this is a server initiated stream. + // Ideally, we'd do a 'syn-push' here, instead of a syn-reply. + BalsaHeaders headers; + headers.CopyFrom(*(mci->file_data->headers)); + headers.ReplaceOrAppendHeader("status", "200"); + headers.ReplaceOrAppendHeader("version", "http/1.1"); + headers.SetRequestFirstlineFromStringPieces("PUSH", + mci->file_data->filename, + ""); + mci->bytes_sent = SendSynStream(mci->stream_id, headers); + } else { + BalsaHeaders headers; + headers.CopyFrom(*(mci->file_data->headers)); + mci->bytes_sent = SendSynReply(mci->stream_id, headers); + } + return; + } + if (mci->body_bytes_consumed >= mci->file_data->body.size()) { + VLOG(2) << "GetOutput remove_stream_id: [" << mci->stream_id << "]"; + SendEOF(mci->stream_id); + return; + } + size_t num_to_write = + mci->file_data->body.size() - mci->body_bytes_consumed; + if (num_to_write > mci->max_segment_size) + num_to_write = mci->max_segment_size; + + bool should_compress = false; + if (!mci->file_data->headers->HasHeader("content-encoding")) { + if (mci->file_data->headers->HasHeader("content-type")) { + string content_type = + mci->file_data->headers->GetHeader("content-type").ToString(); + if (content_type.find("image") == content_type.npos) + should_compress = true; + } + } + + SendDataFrame(mci->stream_id, + mci->file_data->body.data() + mci->body_bytes_consumed, + num_to_write, 0, should_compress); + VLOG(2) << "GetOutput SendDataFrame[" << mci->stream_id + << "]: " << num_to_write; + mci->body_bytes_consumed += num_to_write; + mci->bytes_sent += num_to_write; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class HTTPSM : public BalsaVisitorInterface, public SMInterface { + private: + uint64 seq_num_; + BalsaFrame* framer_; + BalsaHeaders headers_; + uint32 stream_id_; + + SMServerConnection* connection_; + OutputList* output_list_; + OutputOrdering output_ordering_; + MemoryCache* memory_cache_; + public: + explicit HTTPSM(SMServerConnection* connection) : + seq_num_(0), + framer_(new BalsaFrame), + stream_id_(1), + connection_(connection), + output_list_(connection->output_list()), + output_ordering_(connection), + memory_cache_(connection->memory_cache()) { + framer_->set_balsa_visitor(this); + framer_->set_balsa_headers(&headers_); + } + private: + typedef map<string, uint32> ClientTokenMap; + private: + virtual void ProcessBodyInput(const char *input, size_t size) { + } + virtual void ProcessBodyData(const char *input, size_t size) { + // ignoring this. + } + virtual void ProcessHeaderInput(const char *input, size_t size) { + } + virtual void ProcessTrailerInput(const char *input, size_t size) {} + virtual void ProcessHeaders(const BalsaHeaders& headers) { + VLOG(2) << "Got new request!"; + // requests started with /testing are loadtime measurement related + // urls, use LoadtimeMeasurement class to handle them. + if (headers.request_uri().ToString().find("/testing") == 0) { + string output; + global_loadtime_measurement.ProcessRequest( + headers.request_uri().ToString(), output); + SendOKResponse(stream_id_, &output); + stream_id_ += 2; + } else { + string filename; + if (FLAGS_need_to_encode_url) { + filename = net::UrlToFilenameEncoder::Encode( + headers.GetHeader("Host").ToString() + + headers.request_uri().ToString(), + headers.request_method().ToString() + "_/"); + } else { + filename = headers.request_method().ToString() + "_" + + headers.request_uri().ToString(); + } + NewStream(stream_id_, 0, filename); + stream_id_ += 2; + } + } + virtual void ProcessRequestFirstLine(const char* line_input, + size_t line_length, + const char* method_input, + size_t method_length, + const char* request_uri_input, + size_t request_uri_length, + const char* version_input, + size_t version_length) {} + virtual void ProcessResponseFirstLine(const char *line_input, + size_t line_length, + const char *version_input, + size_t version_length, + const char *status_input, + size_t status_length, + const char *reason_input, + size_t reason_length) {} + virtual void ProcessChunkLength(size_t chunk_length) {} + virtual void ProcessChunkExtensions(const char *input, size_t size) {} + virtual void HeaderDone() {} + virtual void MessageDone() { + VLOG(2) << "MessageDone!"; + } + virtual void HandleHeaderError(BalsaFrame* framer) { + HandleError(); + } + virtual void HandleHeaderWarning(BalsaFrame* framer) {} + virtual void HandleChunkingError(BalsaFrame* framer) { + HandleError(); + } + virtual void HandleBodyError(BalsaFrame* framer) { + HandleError(); + } + + void HandleError() { + VLOG(2) << "Error detected"; + } + + public: + ~HTTPSM() { + Reset(); + } + size_t ProcessInput(const char* data, size_t len) { + return framer_->ProcessInput(data, len); + } + + bool MessageFullyRead() const { + return framer_->MessageFullyRead(); + } + + bool Error() const { + return framer_->Error(); + } + + const char* ErrorAsString() const { + return BalsaFrameEnums::ErrorCodeToString(framer_->ErrorCode()); + } + + void Reset() { + framer_->Reset(); + } + + void ResetForNewConnection() { + seq_num_ = 0; + output_ordering_.Reset(); + framer_->Reset(); + } + + void PostAcceptHook() { + } + + void NewStream(uint32 stream_id, uint32 priority, const string& filename) { + MemCacheIter mci; + mci.stream_id = stream_id; + mci.priority = priority; + if (!memory_cache_->AssignFileData(filename, &mci)) { + SendErrorNotFound(stream_id); + } else { + AddToOutputOrder(mci); + } + } + + void AddToOutputOrder(const MemCacheIter& mci) { + output_ordering_.AddToOutputOrder(mci); + } + + void SendEOF(uint32 stream_id) { + SendEOFImpl(stream_id); + } + + void SendErrorNotFound(uint32 stream_id) { + SendErrorNotFoundImpl(stream_id); + } + + void SendOKResponse(uint32 stream_id, string* output) { + SendOKResponseImpl(stream_id, output); + } + + size_t SendSynStream(uint32 stream_id, const BalsaHeaders& headers) { + return 0; + } + + size_t SendSynReply(uint32 stream_id, const BalsaHeaders& headers) { + return SendSynReplyImpl(stream_id, headers); + } + + void SendDataFrame(uint32 stream_id, const char* data, int64 len, + uint32 flags, bool compress) { + SendDataFrameImpl(stream_id, data, len, flags, compress); + } + + BalsaFrame* flip_framer() { return framer_; } + + private: + void SendEOFImpl(uint32 stream_id) { + DataFrame df; + df.data = "0\r\n\r\n"; + df.size = 5; + df.delete_when_done = false; + EnqueueDataFrame(df); + } + + void SendErrorNotFoundImpl(uint32 stream_id) { + BalsaHeaders my_headers; + my_headers.SetFirstlineFromStringPieces("HTTP/1.1", "404", "Not Found"); + my_headers.RemoveAllOfHeader("content-length"); + my_headers.HackHeader("transfer-encoding", "chunked"); + SendSynReplyImpl(stream_id, my_headers); + SendDataFrame(stream_id, "wtf?", 4, 0, false); + SendEOFImpl(stream_id); + output_ordering_.RemoveStreamId(stream_id); + } + + void SendOKResponseImpl(uint32 stream_id, string* output) { + BalsaHeaders my_headers; + my_headers.SetFirstlineFromStringPieces("HTTP/1.1", "200", "OK"); + my_headers.RemoveAllOfHeader("content-length"); + my_headers.HackHeader("transfer-encoding", "chunked"); + SendSynReplyImpl(stream_id, my_headers); + SendDataFrame(stream_id, output->c_str(), output->size(), 0, false); + SendEOFImpl(stream_id); + output_ordering_.RemoveStreamId(stream_id); + } + + size_t SendSynReplyImpl(uint32 stream_id, const BalsaHeaders& headers) { + SimpleBuffer sb; + headers.WriteHeaderAndEndingToBuffer(&sb); + DataFrame df; + df.size = sb.ReadableBytes(); + char* buffer = new char[df.size]; + df.data = buffer; + df.delete_when_done = true; + sb.Read(buffer, df.size); + VLOG(2) << "******************Sending HTTP Reply header " << stream_id; + size_t df_size = df.size; + EnqueueDataFrame(df); + return df_size; + } + + size_t SendSynStreamImpl(uint32 stream_id, const BalsaHeaders& headers) { + SimpleBuffer sb; + headers.WriteHeaderAndEndingToBuffer(&sb); + DataFrame df; + df.size = sb.ReadableBytes(); + char* buffer = new char[df.size]; + df.data = buffer; + df.delete_when_done = true; + sb.Read(buffer, df.size); + VLOG(2) << "******************Sending HTTP Reply header " << stream_id; + size_t df_size = df.size; + EnqueueDataFrame(df); + return df_size; + } + + void SendDataFrameImpl(uint32 stream_id, const char* data, int64 len, + uint32 flags, bool compress) { + char chunk_buf[128]; + snprintf(chunk_buf, sizeof(chunk_buf), "%x\r\n", (unsigned int)len); + string chunk_description(chunk_buf); + DataFrame df; + df.size = chunk_description.size() + len + 2; + char* buffer = new char[df.size]; + df.data = buffer; + df.delete_when_done = true; + memcpy(buffer, chunk_description.data(), chunk_description.size()); + memcpy(buffer + chunk_description.size(), data, len); + memcpy(buffer + chunk_description.size() + len, "\r\n", 2); + EnqueueDataFrame(df); + } + + void EnqueueDataFrame(const DataFrame& df) { + connection_->EnqueueDataFrame(df); + } + + void GetOutput() { + MemCacheIter* mci = output_ordering_.GetIter(); + if (mci == NULL) { + VLOG(2) << "GetOutput: nothing to output!?"; + return; + } + if (!mci->transformed_header) { + mci->bytes_sent = SendSynReply(mci->stream_id, + *(mci->file_data->headers)); + mci->transformed_header = true; + VLOG(2) << "GetOutput transformed header stream_id: [" + << mci->stream_id << "]"; + return; + } + if (mci->body_bytes_consumed >= mci->file_data->body.size()) { + SendEOF(mci->stream_id); + output_ordering_.RemoveStreamId(mci->stream_id); + VLOG(2) << "GetOutput remove_stream_id: [" << mci->stream_id << "]"; + return; + } + size_t num_to_write = + mci->file_data->body.size() - mci->body_bytes_consumed; + if (num_to_write > mci->max_segment_size) + num_to_write = mci->max_segment_size; + SendDataFrame(mci->stream_id, + mci->file_data->body.data() + mci->body_bytes_consumed, + num_to_write, 0, true); + VLOG(2) << "GetOutput SendDataFrame[" << mci->stream_id + << "]: " << num_to_write; + mci->body_bytes_consumed += num_to_write; + mci->bytes_sent += num_to_write; + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class SMAcceptorThread : public Thread, + public EpollCallbackInterface, + public SMServerConnectionPoolInterface { + EpollServer epoll_server_; + int listen_fd_; + int accepts_per_wake_; + + vector<SMServerConnection*> unused_server_connections_; + vector<SMServerConnection*> tmp_unused_server_connections_; + vector<SMServerConnection*> allocated_server_connections_; + Notification quitting_; + SMInterfaceFactory* sm_interface_factory_; + MemoryCache* memory_cache_; + Mutex m_; + public: + + SMAcceptorThread(int listen_fd, + int accepts_per_wake, + SMInterfaceFactory* smif, + MemoryCache* memory_cache) : + listen_fd_(listen_fd), + accepts_per_wake_(accepts_per_wake), + quitting_(false), + sm_interface_factory_(smif), + memory_cache_(memory_cache) { + Thread::SetJoinable(true); + } + + ~SMAcceptorThread() { + for (vector<SMServerConnection*>::iterator i = + allocated_server_connections_.begin(); + i != allocated_server_connections_.end(); + ++i) { + delete *i; + } + } + + SMServerConnection* NewConnection() { + SMServerConnection* server = + SMServerConnection::NewSMServerConnection(sm_interface_factory_, + memory_cache_, + &epoll_server_); + allocated_server_connections_.push_back(server); + VLOG(3) << "Making new server: " << server; + return server; + } + + SMServerConnection* FindOrMakeNewSMServerConnection() { + if (unused_server_connections_.empty()) { + return NewConnection(); + } + SMServerConnection* retval = unused_server_connections_.back(); + unused_server_connections_.pop_back(); + return retval; + } + + + void InitWorker() { + epoll_server_.RegisterFD(listen_fd_, this, EPOLLIN | EPOLLET); + } + + void HandleConnection(int client_fd) { + SMServerConnection* server_connection = FindOrMakeNewSMServerConnection(); + if (server_connection == NULL) { + VLOG(2) << "Closing " << client_fd; + close(client_fd); + return; + } + server_connection->InitSMServerConnection(this, + &epoll_server_, + client_fd); + } + + void AcceptFromListenFD() { + if (accepts_per_wake_ > 0) { + for (int i = 0; i < accepts_per_wake_; ++i) { + struct sockaddr address; + socklen_t socklen = sizeof(address); + int fd = accept(listen_fd_, &address, &socklen); + if (fd == -1) { + VLOG(2) << "accept fail(" << listen_fd_ << "): " << errno; + break; + } + VLOG(2) << "********************Accepted fd: " << fd << "\n\n\n"; + HandleConnection(fd); + } + } else { + while (true) { + struct sockaddr address; + socklen_t socklen = sizeof(address); + int fd = accept(listen_fd_, &address, &socklen); + if (fd == -1) { + VLOG(2) << "accept fail(" << listen_fd_ << "): " << errno; + break; + } + VLOG(2) << "********************Accepted fd: " << fd << "\n\n\n"; + HandleConnection(fd); + } + } + } + + // EpollCallbackInteface virtual functions. + virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) { } + virtual void OnModification(int fd, int event_mask) { } + virtual void OnEvent(int fd, EpollEvent* event) { + if (event->in_events | EPOLLIN) { + VLOG(2) << "Accepting based upon epoll events"; + AcceptFromListenFD(); + } + } + virtual void OnUnregistration(int fd, bool replaced) { } + virtual void OnShutdown(EpollServer* eps, int fd) { } + + void Quit() { + quitting_.Notify(); + } + + void Run() { + while (!quitting_.HasBeenNotified()) { + epoll_server_.set_timeout_in_us(10 * 1000); // 10 ms + epoll_server_.WaitForEventsAndExecuteCallbacks(); + unused_server_connections_.insert(unused_server_connections_.end(), + tmp_unused_server_connections_.begin(), + tmp_unused_server_connections_.end()); + tmp_unused_server_connections_.clear(); + } + } + + // SMServerConnections will use this: + virtual void SMServerConnectionDone(SMServerConnection* sc) { + VLOG(3) << "Done with server connection: " << sc; + sc->close_record_fd(); + tmp_unused_server_connections_.push_back(sc); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +SMInterface* NewFlipSM(SMServerConnection* connection) { + return new FlipSM(connection); +} + +SMInterface* NewHTTPSM(SMServerConnection* connection) { + return new HTTPSM(connection); +} + +//////////////////////////////////////////////////////////////////////////////// + +int CreateListeningSocket(int port, int backlog_size, + bool reuseport, bool no_nagle) { + int listening_socket = 0; + char port_buf[256]; + snprintf(port_buf, sizeof(port_buf), "%d", port); + cerr <<" Attempting to listen on port: " << port_buf << "\n"; + cerr <<" input port: " << port << "\n"; + gfe2::CreateListeningSocket("", + port_buf, + true, + backlog_size, + &listening_socket, + true, + reuseport, + &cerr); + SetNonBlocking(listening_socket); + if (no_nagle) { + // set SO_REUSEADDR on the listening socket. + int on = 1; + int rc; + rc = setsockopt(listening_socket, IPPROTO_TCP, TCP_NODELAY, + reinterpret_cast<char *>(&on), sizeof(on)); + if (rc < 0) { + close(listening_socket); + LOG(FATAL) << "setsockopt() failed fd=" << listening_socket << "\n"; + } + } + return listening_socket; +} + +//////////////////////////////////////////////////////////////////////////////// + +bool GotQuitFromStdin() { + // Make stdin nonblocking. Yes this is done each time. Oh well. + fcntl(0, F_SETFL, O_NONBLOCK); + char c; + string maybequit; + while (read(0, &c, 1) > 0) { + maybequit += c; + } + if (maybequit.size()) { + VLOG(2) << "scanning string: \"" << maybequit << "\""; + } + return (maybequit.size() > 1 && + (maybequit.c_str()[0] == 'q' || + maybequit.c_str()[0] == 'Q')); +} + + +//////////////////////////////////////////////////////////////////////////////// + +const char* BoolToStr(bool b) { + if (b) + return "true"; + return "false"; +} + +//////////////////////////////////////////////////////////////////////////////// + +int main(int argc, char**argv) { + InitGoogleExceptChangeRootAndUser(argv[0], &argc, &argv, true); + + bool use_ssl = FLAGS_use_ssl; + int response_count_until_close = FLAGS_response_count_until_close; + int flip_port = FLAGS_flip_port; + int port = FLAGS_port; + int backlog_size = FLAGS_accept_backlog_size; + bool reuseport = FLAGS_reuseport; + bool no_nagle = FLAGS_no_nagle; + double server_think_time_in_s = FLAGS_server_think_time_in_s; + int accepts_per_wake = FLAGS_accepts_per_wake; + int num_threads = 1; + + MemoryCache flip_memory_cache; + flip_memory_cache.AddFiles(); + + MemoryCache http_memory_cache; + http_memory_cache.CloneFrom(flip_memory_cache); + + LOG(INFO) << + "Starting up with the following state: \n" + " use_ssl: " << use_ssl << "\n" + " response_count_until_close: " << response_count_until_close << "\n" + " port: " << port << "\n" + " flip_port: " << flip_port << "\n" + " backlog_size: " << backlog_size << "\n" + " reuseport: " << BoolToStr(reuseport) << "\n" + " no_nagle: " << BoolToStr(no_nagle) << "\n" + " server_think_time_in_s: " << server_think_time_in_s << "\n" + " accepts_per_wake: " << accepts_per_wake << "\n" + " num_threads: " << num_threads << "\n" + " use_xsub: " << BoolToStr(FLAGS_use_xsub) << "\n" + " use_xac: " << BoolToStr(FLAGS_use_xac) << "\n"; + + if (use_ssl) { + global_ssl_state = new GlobalSSLState; + flip_init_ssl(global_ssl_state); + } else { + global_ssl_state = NULL; + } + EpollServer epoll_server; + vector<SMAcceptorThread*> sm_worker_threads_; + + { + // flip + int listen_fd = -1; + + if (reuseport || listen_fd == -1) { + listen_fd = CreateListeningSocket(flip_port, backlog_size, + reuseport, no_nagle); + if (listen_fd < 0) { + LOG(FATAL) << "Unable to open listening socket on flip_port: " + << flip_port; + } else { + LOG(INFO) << "Listening for flip on port: " << flip_port; + } + } + sm_worker_threads_.push_back( + new SMAcceptorThread(listen_fd, + accepts_per_wake, + &NewFlipSM, + &flip_memory_cache)); + // Note that flip_memory_cache is not threadsafe, it is merely + // thread compatible. Thus, if ever we are to spawn multiple threads, + // we either must make the MemoryCache threadsafe, or use + // a separate MemoryCache for each thread. + // + // The latter is what is currently being done as we spawn + // two threads (one for flip, one for http). + sm_worker_threads_.back()->InitWorker(); + sm_worker_threads_.back()->Start(); + } + + { + // http + int listen_fd = -1; + if (reuseport || listen_fd == -1) { + listen_fd = CreateListeningSocket(port, backlog_size, + reuseport, no_nagle); + if (listen_fd < 0) { + LOG(FATAL) << "Unable to open listening socket on port: " << port; + } else { + LOG(INFO) << "Listening for HTTP on port: " << port; + } + } + sm_worker_threads_.push_back( + new SMAcceptorThread(listen_fd, + accepts_per_wake, + &NewHTTPSM, + &http_memory_cache)); + // Note that flip_memory_cache is not threadsafe, it is merely + // thread compatible. Thus, if ever we are to spawn multiple threads, + // we either must make the MemoryCache threadsafe, or use + // a separate MemoryCache for each thread. + // + // The latter is what is currently being done as we spawn + // two threads (one for flip, one for http). + sm_worker_threads_.back()->InitWorker(); + sm_worker_threads_.back()->Start(); + } + + while (true) { + if (GotQuitFromStdin()) { + for (int i = 0; i < sm_worker_threads_.size(); ++i) { + sm_worker_threads_[i]->Quit(); + } + for (int i = 0; i < sm_worker_threads_.size(); ++i) { + sm_worker_threads_[i]->Join(); + } + return 0; + } + usleep(1000*10); // 10 ms + } + return 0; +} + diff --git a/net/tools/flip_server/http_message_constants.cc b/net/tools/flip_server/http_message_constants.cc new file mode 100644 index 0000000..8d3b7f8 --- /dev/null +++ b/net/tools/flip_server/http_message_constants.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/http_message_constants.h" + +namespace gfe2 { + +const char* get_http_status_message(int status_message) { + switch (status_message) { + case 100: + return "Continue"; + case 101: + return "Switching Protocols"; + case 200: + return "OK"; + case 201: + return "Created"; + case 202: + return "Accepted"; + case 203: + return "Non-Authoritative Information"; + case 204: + return "No Content"; + case 205: + return "Reset Content"; + case 206: + return "Partial Content"; + case 300: + return "Multiple Choices"; + case 301: + return "Moved Permanently"; + case 302: + return "Found"; + case 303: + return "See Other"; + case 304: + return "Not Modified"; + case 305: + return "Use Proxy"; + case 307: + return "Temporary Redirect"; + case 400: + return "Bad Request"; + case 401: + return "Unauthorized"; + case 402: + return "Payment Required"; + case 403: + return "Forbidden"; + case 404: + return "Not Found"; + case 405: + return "Method Not Allowed"; + case 406: + return "Not Acceptable"; + case 407: + return "Proxy Authentication Required"; + case 408: + return "Request Time-out"; + case 409: + return "Conflict"; + case 410: + return "Gone"; + case 411: + return "Length Required"; + case 412: + return "Precondition Failed"; + case 413: + return "Request Entity Too Large"; + case 414: + return "Request-URI Too Large"; + case 415: + return "Unsupported Media Type"; + case 416: + return "Requested range not satisfiable"; + case 417: + return "Expectation Failed"; + case 500: + return "Internal Server Error"; + case 501: + return "Not Implemented"; + case 502: + return "Bad Gateway"; + case 503: + return "Service Unavailable"; + case 504: + return "Gateway Time-out"; + case 505: + return "HTTP Version not supported"; + } + return "unknown"; +} + +//////////////////////////////////////////////////////////////////////////////// + +const int http_status_codes[] = { + 100, + 101, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 300, + 301, + 302, + 303, + 304, + 305, + 307, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 500, + 501, + 502, + 503, + 504, + 505 +}; + +//////////////////////////////////////////////////////////////////////////////// + +const int http_status_code_count = sizeof(http_status_codes) / + sizeof(http_status_codes[0]); + +} // namespace gfe2 + diff --git a/net/tools/flip_server/http_message_constants.h b/net/tools/flip_server/http_message_constants.h new file mode 100644 index 0000000..fdee50b --- /dev/null +++ b/net/tools/flip_server/http_message_constants.h @@ -0,0 +1,17 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_HTTP_MESSAGE_CONSTANTS_H__ +#define NET_TOOLS_FLIP_SERVER_HTTP_MESSAGE_CONSTANTS_H__ + +namespace gfe2 { + +const char* get_http_status_message(int status_message); +extern const int http_status_codes[]; +extern const int http_status_code_count; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_HTTP_MESSAGE_CONSTANTS_H__ + diff --git a/net/tools/flip_server/loadtime_measurement.h b/net/tools/flip_server/loadtime_measurement.h new file mode 100644 index 0000000..e57bca7 --- /dev/null +++ b/net/tools/flip_server/loadtime_measurement.h @@ -0,0 +1,120 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_LOADTIME_MEASUREMENT_H__ +#define NET_TOOLS_FLIP_SERVER_LOADTIME_MEASUREMENT_H__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/types.h> +#include <unistd.h> + +#include <map> +#include <string> +#include <vector> + +// Class to handle loadtime measure related urls, which all start with testing +// The in memory server has a singleton object of this class. It includes a +// html file containing javascript to go through a list of urls and upload the +// loadtime. The users can modify urls.txt to define the urls they want to +// measure and start with downloading the html file from browser. +class LoadtimeMeasurement { + public: + LoadtimeMeasurement(const string& urls_file, + const string& pageload_html_file) + : num_urls_(0), pageload_html_file_(pageload_html_file) { + string urls_string; + read_file_to_string(urls_file.c_str(), &urls_string); + split_string(urls_string, '\n', &urls_); + num_urls_ = urls_.size(); + } + + // This is the entry function for all the loadtime measure related urls + // It handles the request to html file, get_total_iteration to get number + // of urls in the urls file, get each url, report the loadtime for + // each url, and the test is completed. + void ProcessRequest(const string& uri, string& output) { + string action = uri.substr(9); // remove "/testing/" from uri to get action + if (pageload_html_file_.find(action) != string::npos) { + read_file_to_string(pageload_html_file_.c_str(), &output); + return; + } + if (action.find("get_total_iteration") == 0) { + char buffer[16]; + snprintf(buffer, 16, "%d", num_urls_); + output.append(buffer, strlen(buffer)); + return; + } + if (action.find("geturl") == 0) { + size_t b = action.find_first_of('='); + if (b != string::npos) { + int num = atoi(action.substr(b + 1).c_str()); + if (num < num_urls_) { + output.append(urls_[num]); + } + } + return; + } + if (action.find("test_complete") == 0) { + for (map<string, int>::const_iterator it = loadtimes_.begin(); + it != loadtimes_.end(); ++it) { + LOG(INFO) << it->first << " " << it->second; + } + loadtimes_.clear(); + output.append("OK"); + return; + } + if (action.find("record_page_load") == 0) { + vector<string> query; + split_string(action, '?', &query); + vector<string> params; + split_string(query[1], '&', ¶ms); + vector<string> url; + vector<string> loadtime; + split_string(params[1], '=', &url); + split_string(params[2], '=', &loadtime); + loadtimes_[url[1]] = atoi(loadtime[1].c_str()); + output.append("OK"); + return; + } + } + + private: + void read_file_to_string(const char* filename, string* output) { + output->clear(); + int fd = open(filename, 0, "r"); + if (fd == -1) + return; + char buffer[4096]; + ssize_t read_status = read(fd, buffer, sizeof(buffer)); + while (read_status > 0) { + output->append(buffer, static_cast<size_t>(read_status)); + do { + read_status = read(fd, buffer, sizeof(buffer)); + } while (read_status <= 0 && errno == EINTR); + } + close(fd); + } + + void split_string(string& str, char sepa, vector<string>* sub_strs) { + size_t b = 0; + size_t e = str.find_first_of(sepa, b); + while (e != string::npos && e > b) { + sub_strs->push_back(str.substr(b, e - b)); + b = e + 1; + e = str.find_first_of(sepa, b); + } + if (b < str.size()) { + sub_strs->push_back(str.substr(b)); + } + } + + int num_urls_; + vector<string> urls_; + map<string, int> loadtimes_; + const string pageload_html_file_; +}; + +#endif // NET_TOOLS_FLIP_SERVER_LOADTIME_MEASUREMENT_H__ + diff --git a/net/tools/flip_server/ring_buffer.cc b/net/tools/flip_server/ring_buffer.cc new file mode 100644 index 0000000..9f702a0 --- /dev/null +++ b/net/tools/flip_server/ring_buffer.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/ring_buffer.h" +#include "base/logging.h" + +namespace gfe2 { + +RingBuffer::RingBuffer(int buffer_size) + : buffer_(new char[buffer_size]), + buffer_size_(buffer_size), + bytes_used_(0), + read_idx_(0), + write_idx_(0) { +} + +//////////////////////////////////////////////////////////////////////////////// + +int RingBuffer::ReadableBytes() const { + return bytes_used_; +} + +//////////////////////////////////////////////////////////////////////////////// + +int RingBuffer::BufferSize() const { + return buffer_size_; +} + +//////////////////////////////////////////////////////////////////////////////// + +int RingBuffer::BytesFree() const { + return BufferSize() - ReadableBytes(); +} + +//////////////////////////////////////////////////////////////////////////////// + +// Returns the number of characters written. +// Appends up-to-'size' bytes to the ringbuffer. +int RingBuffer::Write(const char* bytes, int size) { + CHECK_GE(size, 0); +#if 1 + char* wptr; + int wsize; + GetWritablePtr(&wptr, &wsize); + int bytes_remaining = size; + int bytes_written = 0; + + while (wsize && bytes_remaining) { + if (wsize > bytes_remaining) { + wsize = bytes_remaining; + } + memcpy(wptr, bytes + bytes_written, wsize); + bytes_written += wsize; + bytes_remaining -= wsize; + AdvanceWritablePtr(wsize); + GetWritablePtr(&wptr, &wsize); + } + return bytes_written; +#else + const char* p = bytes; + + int bytes_to_write = size; + int bytes_available = BytesFree(); + if (bytes_available < bytes_to_write) { + bytes_to_write = bytes_available; + } + const char* end = bytes + bytes_to_write; + + while (p != end) { + this->buffer_[this->write_idx_] = *p; + ++p; + ++this->write_idx_; + if (this->write_idx_ >= this->buffer_size_) { + this->write_idx_ = 0; + } + } + bytes_used_ += bytes_to_write; + return bytes_to_write; +#endif +} + +//////////////////////////////////////////////////////////////////////////////// + +// Sets *ptr to the beginning of writable memory, and sets *size to the size +// available for writing using this pointer. +void RingBuffer::GetWritablePtr(char** ptr, int* size) const { + *ptr = buffer_.get() + write_idx_; + + if (bytes_used_ == buffer_size_) { + *size = 0; + } else if (read_idx_ > write_idx_) { + *size = read_idx_ - write_idx_; + } else { + *size = buffer_size_ - write_idx_; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Sets *ptr to the beginning of readable memory, and sets *size to the size +// available for reading using this pointer. +void RingBuffer::GetReadablePtr(char** ptr, int* size) const { + *ptr = buffer_.get() + read_idx_; + + if (bytes_used_ == 0) { + *size = 0; + } else if (write_idx_ > read_idx_) { + *size = write_idx_ - read_idx_; + } else { + *size = buffer_size_ - read_idx_; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// returns the number of bytes read into +int RingBuffer::Read(char* bytes, int size) { + CHECK_GE(size, 0); +#if 1 + char* rptr; + int rsize; + GetReadablePtr(&rptr, &rsize); + int bytes_remaining = size; + int bytes_read = 0; + + while (rsize && bytes_remaining) { + if (rsize > bytes_remaining) { + rsize = bytes_remaining; + } + memcpy(bytes + bytes_read, rptr, rsize); + bytes_read += rsize; + bytes_remaining -= rsize; + AdvanceReadablePtr(rsize); + GetReadablePtr(&rptr, &rsize); + } + return bytes_read; +#else + char* p = bytes; + int bytes_to_read = size; + int bytes_used = ReadableBytes(); + if (bytes_used < bytes_to_read) { + bytes_to_read = bytes_used; + } + char* end = bytes + bytes_to_read; + + while (p != end) { + *p = this->buffer_[this->read_idx_]; + ++p; + ++this->read_idx_; + if (this->read_idx_ >= this->buffer_size_) { + this->read_idx_ = 0; + } + } + this->bytes_used_ -= bytes_to_read; + return bytes_to_read; +#endif +} + +//////////////////////////////////////////////////////////////////////////////// + +void RingBuffer::Clear() { + bytes_used_ = 0; + write_idx_ = 0; + read_idx_ = 0; +} + +//////////////////////////////////////////////////////////////////////////////// + +bool RingBuffer::Reserve(int size) { + DCHECK(size > 0); + char* write_ptr = NULL; + int write_size = 0; + GetWritablePtr(&write_ptr, &write_size); + + if (write_size < size) { + char* read_ptr = NULL; + int read_size = 0; + GetReadablePtr(&read_ptr, &read_size); + if (size <= BytesFree()) { + // The fact that the total Free size is big enough but writable size is + // not means that the writeable region is broken into two pieces: only + // possible if the read_idx < write_idx. If write_idx < read_idx, then + // the writeable region must be contiguous: [write_idx, read_idx). There + // is no work to be done for the latter. + DCHECK(read_idx_ <= write_idx_); + DCHECK(read_size == ReadableBytes()); + if (read_idx_ < write_idx_) { + // Writeable area fragmented, consolidate it. + memmove(buffer_.get(), read_ptr, read_size); + read_idx_ = 0; + write_idx_ = read_size; + } else if (read_idx_ == write_idx_) { + // No unconsumed data in the buffer, simply reset the indexes. + DCHECK(ReadableBytes() == 0); + read_idx_ = 0; + write_idx_ = 0; + } + } else { + Resize(ReadableBytes() + size); + } + } + DCHECK_LE(size, buffer_size_ - write_idx_); + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +void RingBuffer::AdvanceReadablePtr(int amount_to_consume) { + CHECK_GE(amount_to_consume, 0); + if (amount_to_consume >= bytes_used_) { + Clear(); + return; + } + read_idx_ += amount_to_consume; + read_idx_ %= buffer_size_; + bytes_used_ -= amount_to_consume; +} + +//////////////////////////////////////////////////////////////////////////////// + +void RingBuffer::AdvanceWritablePtr(int amount_to_produce) { + CHECK_GE(amount_to_produce, 0); + CHECK_LE(amount_to_produce, BytesFree()); + write_idx_ += amount_to_produce; + write_idx_ %= buffer_size_; + bytes_used_ += amount_to_produce; +} + +//////////////////////////////////////////////////////////////////////////////// + +void RingBuffer::Resize(int buffer_size) { + CHECK_GE(buffer_size, 0); + if (buffer_size == buffer_size_) return; + + char* new_buffer = new char[buffer_size]; + if (buffer_size < bytes_used_) { + // consume the oldest data. + AdvanceReadablePtr(bytes_used_ - buffer_size); + } + + int bytes_written = 0; + int bytes_used = bytes_used_; + while (true) { + int size; + char* ptr; + GetReadablePtr(&ptr, &size); + if (size == 0) break; + if (size > buffer_size) { + size = buffer_size; + } + memcpy(new_buffer + bytes_written, ptr, size); + bytes_written += size; + AdvanceReadablePtr(size); + } + buffer_.reset(new_buffer); + + buffer_size_ = buffer_size; + bytes_used_ = bytes_used; + read_idx_ = 0; + write_idx_ = bytes_used_ % buffer_size_; +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/ring_buffer.h b/net/tools/flip_server/ring_buffer.h new file mode 100644 index 0000000..75b1756 --- /dev/null +++ b/net/tools/flip_server/ring_buffer.h @@ -0,0 +1,112 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_RING_BUFFER_H__ +#define NET_TOOLS_FLIP_SERVER_RING_BUFFER_H__ + +#include "base/scoped_ptr.h" +#include "net/tools/flip_server/buffer_interface.h" + +namespace gfe2 { + +// The ring buffer is a circular buffer, that is, reads or writes may wrap +// around the end of the linear memory contained by the class (and back to +// the beginning). This is a good choice when you want to use a fixed amount +// of buffering and don't want to be moving memory around a lot. +// +// What is the penalty for using this over a normal, linear buffer? +// Reading all the data may take two operations, and +// writing all the data may take two operations. +// +// In the proxy, this class is used as a fixed size buffer between +// clients and servers (so that the memory size is constrained). + +class RingBuffer : public BufferInterface { + public: + explicit RingBuffer(int buffer_size); + virtual ~RingBuffer() { } + + // Resize the buffer to the size specified here. If the buffer_size passed + // in here is smaller than the amount of data in the buffer, then the oldest + // data will be dropped, but all other data will be saved. + // This means: If the buffer size is increasing, all data that was resident + // in the buffer prior to this call will be resident after this call. + void Resize(int buffer_size); + + // The following functions all override pure virtual functions + // in BufferInterface. See buffer_interface.h for a description + // of what they do if the function isn't documented here. + virtual int ReadableBytes() const; + virtual int BufferSize() const; + virtual int BytesFree() const; + + virtual bool Empty() const { return ReadableBytes() == 0; } + virtual bool Full() const { return ReadableBytes() == BufferSize(); } + + // returns the number of characters written. + // appends up-to-'size' bytes to the ringbuffer. + virtual int Write(const char * bytes, int size); + + // Stores a pointer into the ring buffer in *ptr, and stores the number of + // characters which are allowed to be written in *size. + // If there are no writable bytes available, then *size will contain 0. + virtual void GetWritablePtr(char** ptr, int* size) const; + + // Stores a pointer into the ring buffer in *ptr, and stores the number of + // characters which are allowed to be read in *size. + // If there are no readable bytes available, then *size will contain 0. + virtual void GetReadablePtr(char** ptr, int* size) const; + + // Returns the number of bytes read into 'bytes'. + virtual int Read(char* bytes, int size); + + // Removes all data from the ring buffer. + virtual void Clear(); + + // Reserves contiguous writable empty space in the buffer of size bytes. + // Since the point of this class is to have a fixed size buffer, be careful + // not to inadvertently resize the buffer using Reserve(). If the reserve + // size is <= BytesFree(), it is guaranteed that the buffer size will not + // change. + // This can be an expensive operation, it may new a buffer copy all existing + // data and delete the old data. Even if the existing buffer does not need + // to be resized, unread data may still need to be non-destructively copied + // to consolidate fragmented free space. If the size requested is less than + // or equal to BytesFree(), it is guaranteed that the buffer size will not + // change. + virtual bool Reserve(int size); + + // Removes the oldest 'amount_to_advance' characters. + // If amount_to_consume > ReadableBytes(), this performs a Clear() instead. + virtual void AdvanceReadablePtr(int amount_to_advance); + + // Moves the internal pointers around such that the amount of data specified + // here is expected to already be resident (as if it was Written). + virtual void AdvanceWritablePtr(int amount_to_advance); + + protected: + int read_idx() const { return read_idx_; } + int write_idx() const { return write_idx_; } + int bytes_used() const { return bytes_used_; } + int buffer_size() const { return buffer_size_; } + const char* buffer() const { return buffer_.get(); } + + int set_read_idx(int idx) { return read_idx_ = idx; } + int set_write_idx(int idx) { return write_idx_ = idx; } + + private: + scoped_array<char> buffer_; + int buffer_size_; + int bytes_used_; + int read_idx_; + int write_idx_; + + RingBuffer(const RingBuffer&); + void operator=(const RingBuffer&); +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_RING_BUFFER_H__ + diff --git a/net/tools/flip_server/simple_buffer.cc b/net/tools/flip_server/simple_buffer.cc new file mode 100644 index 0000000..dc28e4c --- /dev/null +++ b/net/tools/flip_server/simple_buffer.cc @@ -0,0 +1,204 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/tools/flip_server/simple_buffer.h" +#include "base/logging.h" + +// Some of the following member functions are marked inlined, even though they +// are virtual. This may seem counter-intuitive, since virtual functions are +// generally not eligible for inlining. Profiling results indicate that these +// large amount of runtime is spent on virtual function dispatch on these +// simple functions. They are virtual because of the interface this class +// inherits from. However, it is very unlikely that anyone will sub-class +// SimpleBuffer and change their implementation. To get rid of this baggage, +// internal implementation (e.g., Write) explicitly use SimpleBuffer:: to +// qualify the method calls, thus disabling the virtual dispatch and enable +// inlining. + +namespace gfe2 { + +static const int kInitialSimpleBufferSize = 10; + +SimpleBuffer::SimpleBuffer() + : storage_(new char[kInitialSimpleBufferSize]), + write_idx_(0), + read_idx_(0), + storage_size_(kInitialSimpleBufferSize) { +} + +SimpleBuffer::SimpleBuffer(int size) + : write_idx_(0), + read_idx_(0), + storage_size_(size) { + // Callers may try to allocate overly large blocks, but negative sizes are + // obviously wrong. + CHECK_GE(size, 0); + storage_ = new char[size]; +} + +//////////////////////////////////////////////////////////////////////////////// + +int SimpleBuffer::ReadableBytes() const { + return write_idx_ - read_idx_; +} + +//////////////////////////////////////////////////////////////////////////////// + +string SimpleBuffer::str() const { + string s; + char * readable_ptr; + int readable_size; + GetReadablePtr(&readable_ptr, &readable_size); + s.append(readable_ptr, readable_ptr + readable_size); + return s; +} + +//////////////////////////////////////////////////////////////////////////////// + +int SimpleBuffer::BufferSize() const { + return storage_size_; +} + +//////////////////////////////////////////////////////////////////////////////// + +inline int SimpleBuffer::BytesFree() const { + return (storage_size_ - write_idx_); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool SimpleBuffer::Empty() const { + return (read_idx_ == write_idx_); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool SimpleBuffer::Full() const { + return ((write_idx_ == storage_size_) && (read_idx_ != write_idx_)); +} + +//////////////////////////////////////////////////////////////////////////////// + +// returns the number of characters written. +// appends up-to-'size' bytes to the simplebuffer. +int SimpleBuffer::Write(const char* bytes, int size) { + bool has_room = ((storage_size_ - write_idx_) >= size); + if (!has_room) { + (void)Reserve(size); + } + memcpy(storage_ + write_idx_, bytes, size); + SimpleBuffer::AdvanceWritablePtr(size); + return size; +} + +//////////////////////////////////////////////////////////////////////////////// + +// stores a pointer into the simple buffer in *ptr, +// and stores the number of characters which are allowed +// to be written in *size. +inline void SimpleBuffer::GetWritablePtr(char **ptr, int* size) const { + *ptr = storage_ + write_idx_; + *size = SimpleBuffer::BytesFree(); +} + +//////////////////////////////////////////////////////////////////////////////// + +// stores a pointer into the simple buffer in *ptr, +// and stores the number of characters which are allowed +// to be read in *size. +void SimpleBuffer::GetReadablePtr(char **ptr, int* size) const { + *ptr = storage_ + read_idx_; + *size = write_idx_ - read_idx_; +} + +//////////////////////////////////////////////////////////////////////////////// + +// returns the number of bytes read into 'bytes' +int SimpleBuffer::Read(char* bytes, int size) { + char * read_ptr = NULL; + int read_size = 0; + GetReadablePtr(&read_ptr, &read_size); + if (read_size > size) { + read_size = size; + } + memcpy(bytes, read_ptr, read_size); + AdvanceReadablePtr(read_size); + return read_size; +} + +//////////////////////////////////////////////////////////////////////////////// + +// removes all data from the simple buffer +void SimpleBuffer::Clear() { + read_idx_ = write_idx_ = 0; +} + +//////////////////////////////////////////////////////////////////////////////// + +// Attempts to reserve a contiguous block of buffer space by either reclaiming +// old data that is already read, and reallocate large storage as needed. +bool SimpleBuffer::Reserve(int size) { + if (size > 0 && BytesFree() < size) { + char * read_ptr = NULL; + int read_size = 0; + GetReadablePtr(&read_ptr, &read_size); + + if (read_size + size <= BufferSize()) { + // Can reclaim space from already read bytes by shifting + memmove(storage_, read_ptr, read_size); + read_idx_ = 0; + write_idx_ = read_size; + CHECK_GE(BytesFree(), size); + } else { + // what we need is to have at least size bytes available for writing. + // This implies that the buffer needs to be at least size bytes + + // read_size bytes long. Since we want linear time extensions in the case + // that we're extending this thing repeatedly, we should extend to twice + // the current size (if that is big enough), or the size + read_size + // bytes, whichever is larger. + int new_storage_size = 2 * storage_size_; + if (new_storage_size < size + read_size) { + new_storage_size = size + read_size; + } + + // have to extend the thing + char* new_storage = new char[new_storage_size]; + + // copy still useful info to the new buffer. + memcpy(new_storage, read_ptr, read_size); + // reset pointers. + read_idx_ = 0; + write_idx_ = read_size; + delete[] storage_; + storage_ = new_storage; + storage_size_ = new_storage_size; + } + } + return true; +} + +//////////////////////////////////////////////////////////////////////////////// + +// removes the oldest 'amount_to_consume' characters. +void SimpleBuffer::AdvanceReadablePtr(int amount_to_advance) { + read_idx_ += amount_to_advance; + if (read_idx_ > storage_size_) { + read_idx_ = storage_size_; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Moves the internal pointers around such that the +// amount of data specified here is expected to +// already be resident (as if it was Written) +inline void SimpleBuffer::AdvanceWritablePtr(int amount_to_advance) { + write_idx_ += amount_to_advance; + if (write_idx_ > storage_size_) { + write_idx_ = storage_size_; + } +} + +} // namespace gfe2 + diff --git a/net/tools/flip_server/simple_buffer.h b/net/tools/flip_server/simple_buffer.h new file mode 100644 index 0000000..20d52748 --- /dev/null +++ b/net/tools/flip_server/simple_buffer.h @@ -0,0 +1,94 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_SIMPLE_BUFFER_H__ +#define NET_TOOLS_FLIP_SERVER_SIMPLE_BUFFER_H__ + +#include <string> + +#include "net/tools/flip_server/buffer_interface.h" + +namespace gfe2 { + +class SimpleBuffer : public BufferInterface { + public: + SimpleBuffer(); + explicit SimpleBuffer(int size); + virtual ~SimpleBuffer() { + delete[] storage_; + } + + string str() const; + + typedef char * iterator; + typedef const char * const_iterator; + + iterator begin() { return storage_ + read_idx_; } + const_iterator begin() const { return storage_ + read_idx_; } + + iterator end() { return storage_ + write_idx_; } + const_iterator end() const { return storage_ + write_idx_; } + + // The following functions all override pure virtual functions + // in BufferInterface. See buffer_interface.h for a description + // of what they do. + virtual int ReadableBytes() const; + virtual int BufferSize() const; + virtual int BytesFree() const; + + virtual bool Empty() const; + virtual bool Full() const; + + virtual int Write(const char* bytes, int size); + + virtual void GetWritablePtr(char **ptr, int* size) const; + + virtual void GetReadablePtr(char **ptr, int* size) const; + + virtual int Read(char* bytes, int size); + + virtual void Clear(); + + // This can be an expensive operation: costing a new/delete, and copying of + // all existing data. Even if the existing buffer does not need to be + // resized, unread data may still need to be non-destructively copied to + // consolidate fragmented free space. + virtual bool Reserve(int size); + + virtual void AdvanceReadablePtr(int amount_to_advance); + + virtual void AdvanceWritablePtr(int amount_to_advance); + + void Swap(SimpleBuffer* other) { + char* tmp = storage_; + storage_ = other->storage_; + other->storage_ = tmp; + + int tmp_int = write_idx_; + write_idx_ = other->write_idx_; + other->write_idx_ = tmp_int; + + tmp_int = read_idx_; + read_idx_ = other->read_idx_; + other->read_idx_ = tmp_int; + + tmp_int = storage_size_; + storage_size_ = other->storage_size_; + other->storage_size_ = tmp_int; + } + + protected: + char* storage_; + int write_idx_; + int read_idx_; + int storage_size_; + + private: + //DISALLOW_COPY_AND_ASSIGN(SimpleBuffer); +}; + +} // namespace gfe2 + +#endif // NET_TOOLS_FLIP_SERVER_SIMPLE_BUFFER_H__ + diff --git a/net/tools/flip_server/url_to_filename_encoder.h b/net/tools/flip_server/url_to_filename_encoder.h new file mode 100644 index 0000000..25ddbd3 --- /dev/null +++ b/net/tools/flip_server/url_to_filename_encoder.h @@ -0,0 +1,127 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ +#define NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ + +#include <string> +#include "net/tools/flip_server/url_utilities.h" + +namespace net { + +// Helper class for converting a URL into a filename. +class UrlToFilenameEncoder { + public: + // Given a |url| and a |base_path|, returns a string which represents this + // |url|. + static string Encode(const std::string& url, std::string base_path) { + std::string clean_url(url); + if (clean_url.length() && clean_url[clean_url.length()-1] == '/') + clean_url.append("index.html"); + + std::string host = UrlUtilities::GetUrlHost(clean_url); + std::string filename(base_path); + filename = filename.append(host + "/"); + + std::string url_filename = UrlUtilities::GetUrlPath(clean_url); + // Strip the leading '/' + if (url_filename[0] == '/') + url_filename = url_filename.substr(1); + + // replace '/' with '\' + ConvertToSlashes(url_filename); + + // strip double slashes ("\\") + StripDoubleSlashes(url_filename); + + // Save path as filesystem-safe characters + url_filename = Escape(url_filename); + filename = filename.append(url_filename); + +#ifndef WIN32 + // Last step - convert to native slashes! + const std::string slash("/"); + const std::string backslash("\\"); + ReplaceAll(filename, backslash, slash); +#endif + + return filename; + } + + private: + static const int kMaximumSubdirectoryLength = 128; + + + // Escape the given input |path| and chop any individual components + // of the path which are greater than kMaximumSubdirectoryLength characters + // into two chunks. + static std::string Escape(const std::string& path) { + std::string output; + + // Note: We also chop paths into medium sized 'chunks'. + // This is due to the incompetence of the windows + // filesystem, which still hasn't figured out how + // to deal with long filenames. + int last_slash = 0; + for (size_t index = 0; index < path.length(); index++) { + char ch = path[index]; + if (ch == 0x5C) + last_slash = index; + if ((ch == 0x2D) || // hyphen + (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore + ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9] + ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z] + ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z] + output.append(&path[index],1); + } else { + char encoded[3]; + encoded[0] = 'x'; + encoded[1] = ch / 16; + encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; + encoded[2] = ch % 16; + encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; + output.append(encoded, 3); + } + if (index - last_slash > kMaximumSubdirectoryLength) { +#ifdef WIN32 + char slash = '\\'; +#else + char slash = '/'; +#endif + output.append(&slash, 1); + last_slash = index; + } + } + return output; + } + + // Replace all instances of |from| within |str| as |to|. + static void ReplaceAll(std::string& str, const std::string& from, + const std::string& to) { + std::string::size_type pos(0); + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.size(), to); + pos += from.size(); + } + } + + // Replace all instances of "/" with "\" in |path|. + static void ConvertToSlashes(std::string& path) { + const std::string slash("/"); + const std::string backslash("\\"); + ReplaceAll(path, slash, backslash); + } + + // Replace all instances of "\\" with "%5C%5C" in |path|. + static void StripDoubleSlashes(std::string& path) { + const std::string doubleslash("\\\\"); + const std::string escaped_doubleslash("%5C%5C"); + ReplaceAll(path, doubleslash, escaped_doubleslash); + } +}; + +} // namespace net + +#endif // NET_TOOLS_FLIP_SERVER_URL_TO_FILE_ENCODER_H__ + diff --git a/net/tools/flip_server/url_utilities.h b/net/tools/flip_server/url_utilities.h new file mode 100644 index 0000000..488753c --- /dev/null +++ b/net/tools/flip_server/url_utilities.h @@ -0,0 +1,69 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ +#define NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ + +#include <string> + +namespace net { + +struct UrlUtilities { + // Get the host from an url + static string GetUrlHost(const string& url) { + size_t b = url.find("//"); + if (b == string::npos) + b = 0; + else + b += 2; + size_t next_slash = url.find_first_of('/', b); + size_t next_colon = url.find_first_of(':', b); + if (next_slash != string::npos + && next_colon != string::npos + && next_colon < next_slash) { + return string(url, b, next_colon - b); + } + if (next_slash == string::npos) { + if (next_colon != string::npos) { + return string(url, next_colon - b); + } else { + next_slash = url.size(); + } + } + return string(url, b, next_slash - b); + } + + // Get the host + path portion of an url + // e.g http://www.foo.com/path + // returns www.foo.com/path + static string GetUrlHostPath(const string& url) { + size_t b = url.find("//"); + if (b == string::npos) + b = 0; + else + b += 2; + return string(url, b); + } + + // Get the path portion of an url + // e.g http://www.foo.com/path + // returns /path + static string GetUrlPath(const string& url) { + size_t b = url.find("//"); + if (b == string::npos) + b = 0; + else + b += 2; + b = url.find("/", b+1); + if (b == string::npos) + return "/"; + + return string(url, b); + } +}; + +} // namespace net + +#endif // NET_TOOLS_FLIP_SERVER_URL_UTILITIES_H__ + |