// Copyright (c) 2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Fuzz testing for EncodedProgram serialized format and assembly. // // We would like some assurance that if an EncodedProgram is malformed we will // not crash. The EncodedProgram could be malformed either due to malicious // attack to due to an error in patch generation. // // We try a lot of arbitrary modifications to the serialized form and make sure // that the outcome is not a crash. #include <string> #include "base/path_service.h" #include "base/file_util.h" #include "base/string_util.h" #include "base/test/test_suite.h" #include "courgette/courgette.h" #include "courgette/streams.h" #include "testing/gtest/include/gtest/gtest.h" class DecodeFuzzTest : public testing::Test { public: void FuzzExe(const char *) const; private: virtual void SetUp() { PathService::Get(base::DIR_SOURCE_ROOT, &testdata_dir_); testdata_dir_ = testdata_dir_.AppendASCII("courgette"); testdata_dir_ = testdata_dir_.AppendASCII("testdata"); } virtual void TearDown() { } void FuzzByte(const std::string& buffer, const std::string& output, size_t index) const; void FuzzBits(const std::string& buffer, const std::string& output, size_t index, int bits_to_flip) const; // Returns true if could assemble, false if rejected. bool TryAssemble(const std::string& buffer, std::string* output) const; // Returns contents of |file_name| as uninterprested bytes stored in a string. std::string FileContents(const char* file_name) const; // Full path name of testdata directory FilePath testdata_dir_; }; // Reads a test file into a string. std::string DecodeFuzzTest::FileContents(const char* file_name) const { FilePath file_path = testdata_dir_.AppendASCII(file_name); std::string file_contents; if (!file_util::ReadFileToString(file_path, &file_contents)) { EXPECT_TRUE(!"Could not read test data"); } return file_contents; } // Loads an executable and does fuzz testing in the serialized format. void DecodeFuzzTest::FuzzExe(const char* file_name) const { std::string file1 = FileContents(file_name); const void* original_buffer = file1.c_str(); size_t original_length = file1.size(); courgette::AssemblyProgram* program = NULL; const courgette::Status parse_status = courgette::ParseWin32X86PE(original_buffer, original_length, &program); EXPECT_EQ(courgette::C_OK, parse_status); courgette::EncodedProgram* encoded = NULL; const courgette::Status encode_status = Encode(program, &encoded); EXPECT_EQ(courgette::C_OK, encode_status); DeleteAssemblyProgram(program); courgette::SinkStreamSet sinks; const courgette::Status write_status = WriteEncodedProgram(encoded, &sinks); EXPECT_EQ(courgette::C_OK, write_status); DeleteEncodedProgram(encoded); courgette::SinkStream sink; bool can_collect = sinks.CopyTo(&sink); EXPECT_TRUE(can_collect); size_t length = sink.Length(); std::string base_buffer(reinterpret_cast<const char*>(sink.Buffer()), length); std::string base_output; bool ok = TryAssemble(base_buffer, &base_output); EXPECT_EQ(true, ok); // Now we have a good serialized EncodedProgram in |base_buffer|. Time to // fuzz. // More intense fuzzing on the first part because it contains more control // information like substeam lengths. size_t position = 0; for ( ; position < 100 && position < length; position += 1) { FuzzByte(base_buffer, base_output, position); } // We would love to fuzz every position, but it takes too long. for ( ; position < length; position += 900) { FuzzByte(base_buffer, base_output, position); } } // FuzzByte tries to break the EncodedProgram deserializer and assembler. It // takes a good serialization of and EncodedProgram, flips some bits, and checks // that the behaviour is reasonable. It has testing checks for unreasonable // behaviours. void DecodeFuzzTest::FuzzByte(const std::string& base_buffer, const std::string& base_output, size_t index) const { printf("Fuzzing position %d\n", static_cast<int>(index)); // The following 10 values are a compromize between run time and coverage of // the 255 'wrong' values at this byte position. // 0xFF flips all the bits. FuzzBits(base_buffer, base_output, index, 0xFF); // 0x7F flips the most bits without changing Varint32 framing. FuzzBits(base_buffer, base_output, index, 0x7F); // These all flip one bit. FuzzBits(base_buffer, base_output, index, 0x80); FuzzBits(base_buffer, base_output, index, 0x40); FuzzBits(base_buffer, base_output, index, 0x20); FuzzBits(base_buffer, base_output, index, 0x10); FuzzBits(base_buffer, base_output, index, 0x08); FuzzBits(base_buffer, base_output, index, 0x04); FuzzBits(base_buffer, base_output, index, 0x02); FuzzBits(base_buffer, base_output, index, 0x01); } // FuzzBits tries to break the EncodedProgram deserializer and assembler. It // takes a good serialization of and EncodedProgram, flips some bits, and checks // that the behaviour is reasonable. // // There are EXPECT calls to check for unreasonable behaviour. These are // somewhat arbitrary in that the parameters cannot easily be derived from first // principles. They may need updating as the serialized format evolves. void DecodeFuzzTest::FuzzBits(const std::string& base_buffer, const std::string& base_output, size_t index, int bits_to_flip) const { std::string modified_buffer = base_buffer; std::string modified_output; modified_buffer[index] ^= bits_to_flip; bool ok = TryAssemble(modified_buffer, &modified_output); if (ok) { // We normally expect TryAssemble to fail. But sometimes it succeeds. // What could have happened? We changed one byte in the serialized form: // // * If we changed one of the copied bytes, we would see a single byte // change in the output. // * If we changed an address table element, all the references to that // address would be different. // * If we changed a copy count, we would run out of data in some stream, // or leave data remaining, so should not be here. // * If we changed an origin address, it could affect all relocations based // off that address. If no relocations were based off the address then // there will be no changes. // * If we changed an origin address, it could cause some abs32 relocs to // shift from one page to the next, changing the number and layout of // blocks in the base relocation table. // Generated length could vary slightly due to base relocation table layout. // In the worst case the number of base relocation blocks doubles, approx // 12/4096 or 0.3% size of file. size_t base_length = base_output.length(); size_t modified_length = modified_output.length(); ptrdiff_t diff = base_length - modified_length; if (diff < -200 || diff > 200) { EXPECT_EQ(base_length, modified_length); } size_t changed_byte_count = 0; for (size_t i = 0; i < base_length && i < modified_length; ++i) { changed_byte_count += (base_output[i] != modified_output[i]); } if (index > 60) { // Beyond the origin addresses ... EXPECT_NE(0U, changed_byte_count); // ... we expect some difference. } // Currently all changes are smaller than this number: EXPECT_GE(45000U, changed_byte_count); } } bool DecodeFuzzTest::TryAssemble(const std::string& buffer, std::string* output) const { courgette::EncodedProgram *encoded = NULL; bool result = false; courgette::SourceStreamSet sources; bool can_get_source_streams = sources.Init(buffer.c_str(), buffer.length()); if (can_get_source_streams) { const courgette::Status read_status = ReadEncodedProgram(&sources, &encoded); if (read_status == courgette::C_OK) { courgette::SinkStream assembled; const courgette::Status assemble_status = Assemble(encoded, &assembled); if (assemble_status == courgette::C_OK) { const void* assembled_buffer = assembled.Buffer(); size_t assembled_length = assembled.Length(); output->clear(); output->assign(reinterpret_cast<const char*>(assembled_buffer), assembled_length); result = true; } } } DeleteEncodedProgram(encoded); return result; } TEST_F(DecodeFuzzTest, All) { FuzzExe("setup1.exe"); } int main(int argc, char** argv) { return TestSuite(argc, argv).Run(); }