summaryrefslogtreecommitdiffstats
path: root/pdf
diff options
context:
space:
mode:
authorthestig <thestig@chromium.org>2014-09-12 12:44:36 -0700
committerCommit bot <commit-bot@chromium.org>2014-09-12 19:48:12 +0000
commit974086e8a7ca04b9bb947380bb2ac684c028edc1 (patch)
tree10b2d933918a06331cd46e592dfbd6fe84ddaade /pdf
parenteef1b5914f37b0fb0edbd1c340a60332d5453453 (diff)
downloadchromium_src-974086e8a7ca04b9bb947380bb2ac684c028edc1.zip
chromium_src-974086e8a7ca04b9bb947380bb2ac684c028edc1.tar.gz
chromium_src-974086e8a7ca04b9bb947380bb2ac684c028edc1.tar.bz2
PDF: Manipulate the selected text buffer to deal with special 0xfffe hyphens.
Also format text to match Adobe Reader. BUG=307523 TEST=Manual, see bug (and duplicates for more examples) Review URL: https://codereview.chromium.org/566773005 Cr-Commit-Position: refs/heads/master@{#294638}
Diffstat (limited to 'pdf')
-rw-r--r--pdf/pdfium/pdfium_engine.cc105
1 files changed, 69 insertions, 36 deletions
diff --git a/pdf/pdfium/pdfium_engine.cc b/pdf/pdfium/pdfium_engine.cc
index 28fca44..6457478 100644
--- a/pdf/pdfium/pdfium_engine.cc
+++ b/pdf/pdfium/pdfium_engine.cc
@@ -45,6 +45,8 @@
namespace chrome_pdf {
+namespace {
+
#define kPageShadowTop 3
#define kPageShadowBottom 7
#define kPageShadowLeft 5
@@ -458,46 +460,75 @@ void CalculateNonScaledClipBoxOffset(const pp::Rect& content_rect, int rotation,
}
}
-// Do an in-place transformation of objects on |page|. Translate all objects on
-// |page| in |source_clip_box| by (|offset_x|, |offset_y|) and scale them by
-// |scale_factor|.
-//
-// |page| Handle to the page. Returned by FPDF_LoadPage function.
-// |source_clip_box| specifies the source clip box positions, relative to
-// origin at left-bottom.
-// |scale_factor| specifies the scale factor that should be applied to page
-// objects.
-// |offset_x| and |offset_y| specifies the translation offsets for the page
-// objects, relative to origin at left-bottom.
-
-void TransformPageObjects(FPDF_PAGE page, const ClipBox& source_clip_box,
- const double scale_factor, double offset_x,
- double offset_y) {
- const int obj_count = FPDFPage_CountObject(page);
-
- // Create a new clip path.
- FPDF_CLIPPATH clip_path = FPDF_CreateClipPath(
- source_clip_box.left + offset_x, source_clip_box.bottom + offset_y,
- source_clip_box.right + offset_x, source_clip_box.top + offset_y);
-
- for (int obj_idx = 0; obj_idx < obj_count; ++obj_idx) {
- FPDF_PAGEOBJECT page_obj = FPDFPage_GetObject(page, obj_idx);
- FPDFPageObj_Transform(page_obj, scale_factor, 0, 0, scale_factor,
- offset_x, offset_y);
- FPDFPageObj_TransformClipPath(page_obj, scale_factor, 0, 0, scale_factor,
- offset_x, offset_y);
+// This formats a string with special 0xfffe end-of-line hyphens the same way
+// as Adobe Reader. When a hyphen is encountered, the next non-CR/LF whitespace
+// becomes CR+LF and the hyphen is erased. If there is no whitespace between
+// two hyphens, the latter hyphen is erased and ignored.
+void FormatStringWithHyphens(base::string16* text) {
+ // First pass marks all the hyphen positions.
+ struct HyphenPosition {
+ HyphenPosition() : position(0), next_whitespace_position(0) {}
+ size_t position;
+ size_t next_whitespace_position; // 0 for none
+ };
+ std::vector<HyphenPosition> hyphen_positions;
+ HyphenPosition current_hyphen_position;
+ bool current_hyphen_position_is_valid = false;
+ const base::char16 kPdfiumHyphenEOL = 0xfffe;
+
+ for (size_t i = 0; i < text->size(); ++i) {
+ const base::char16& current_char = (*text)[i];
+ if (current_char == kPdfiumHyphenEOL) {
+ if (current_hyphen_position_is_valid)
+ hyphen_positions.push_back(current_hyphen_position);
+ current_hyphen_position = HyphenPosition();
+ current_hyphen_position.position = i;
+ current_hyphen_position_is_valid = true;
+ } else if (IsWhitespace(current_char)) {
+ if (current_hyphen_position_is_valid) {
+ if (current_char != L'\r' && current_char != L'\n')
+ current_hyphen_position.next_whitespace_position = i;
+ hyphen_positions.push_back(current_hyphen_position);
+ current_hyphen_position_is_valid = false;
+ }
+ }
}
- FPDFPage_TransformAnnots(page, scale_factor, 0, 0, scale_factor,
- offset_x, offset_y);
- FPDFPage_GenerateContent(page);
+ if (current_hyphen_position_is_valid)
+ hyphen_positions.push_back(current_hyphen_position);
- // Add a extra clip path to the new pdf page here.
- FPDFPage_InsertClipPath(page, clip_path);
-
- // Destroy the clip path.
- FPDF_DestroyClipPath(clip_path);
+ // With all the hyphen positions, do the search and replace.
+ while (!hyphen_positions.empty()) {
+ static const base::char16 kCr[] = {L'\r', L'\0'};
+ const HyphenPosition& position = hyphen_positions.back();
+ if (position.next_whitespace_position != 0) {
+ (*text)[position.next_whitespace_position] = L'\n';
+ text->insert(position.next_whitespace_position, kCr);
+ }
+ text->erase(position.position, 1);
+ hyphen_positions.pop_back();
+ }
+
+ // Adobe Reader also get rid of trailing spaces right before a CRLF.
+ static const base::char16 kSpaceCrCn[] = {L' ', L'\r', L'\n', L'\0'};
+ static const base::char16 kCrCn[] = {L'\r', L'\n', L'\0'};
+ ReplaceSubstringsAfterOffset(text, 0, kSpaceCrCn, kCrCn);
+}
+
+// Replace CR/LF with just LF on POSIX.
+void FormatStringForOS(base::string16* text) {
+#if defined(OS_POSIX)
+ static const base::char16 kCr[] = {L'\r', L'\0'};
+ static const base::char16 kBlank[] = {L'\0'};
+ base::ReplaceChars(*text, kCr, kBlank, text);
+#elif defined(OS_WIN)
+ // Do nothing
+#else
+ NOTIMPLEMENTED();
+#endif
}
+} // namespace
+
bool InitializeSDK(void* data) {
FPDF_InitLibrary(data);
@@ -1882,6 +1913,8 @@ std::string PDFiumEngine::GetSelectedText() {
}
}
+ FormatStringWithHyphens(&result);
+ FormatStringForOS(&result);
return base::UTF16ToUTF8(result);
}