From e1d6a597b92bb42fa6414bb90613433c38694682 Mon Sep 17 00:00:00 2001 From: "bryner@chromium.org" Date: Fri, 3 Sep 2010 21:02:15 +0000 Subject: Add a term feature extractor for client-side phishing detection. This class creates features for n-grams in the page text that appear in the phishing classification model. It will eventually operate on the plain text that is extracted by RenderView::CaptureText(). To make it harder for phishers to enumerate the terms in the classification model, they will be supplied as SHA-256 hashes rather than plain text. The term feature extractor hashes the words in the document in order to check whether they match the model. Since this is potentially expensive, the term feature extractor limits how long it will run on each iteration, similar to the PhishingDOMFeatureExtractor. TEST=PhishingTermFeatureExtractorTest BUG=none Review URL: http://codereview.chromium.org/3214002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@58537 0039d316-1c4b-4281-b951-d872f2087c98 --- base/sha2.cc | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'base/sha2.cc') diff --git a/base/sha2.cc b/base/sha2.cc index b6d6d56..47d381b 100644 --- a/base/sha2.cc +++ b/base/sha2.cc @@ -4,6 +4,7 @@ #include "base/sha2.h" +#include "base/stl_util-inl.h" #include "base/third_party/nss/blapi.h" #include "base/third_party/nss/sha256.h" @@ -19,4 +20,10 @@ void SHA256HashString(const std::string& str, void* output, size_t len) { static_cast(len)); } +std::string SHA256HashString(const std::string& str) { + std::string output(SHA256_LENGTH, 0); + SHA256HashString(str, string_as_array(&output), str.size()); + return output; +} + } // namespace base -- cgit v1.1