summaryrefslogtreecommitdiffstats
path: root/third_party/dom_distiller_js/package/proto/dom_distiller.proto
blob: c89867e0ff4ea24537d023914c1cf59f3f71e012 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

syntax = "proto2";

package dom_distiller.proto;
option optimize_for = LITE_RUNTIME;
option java_package = "com.dom_distiller.proto";
option java_outer_classname = "DomDistillerProtos";

message DistilledContent {
  optional string html = 1;
}

message PaginationInfo {
  optional string next_page = 1;
  optional string prev_page = 2;
  optional string canonical_page = 3;
}

message MarkupArticle {
  optional string published_time = 1;
  optional string modified_time = 2;
  optional string expiration_time = 3;
  optional string section = 4;
  repeated string authors = 5;
}

message MarkupImage {
  optional string url = 1;
  optional string secure_url = 2;
  optional string type = 3;
  optional string caption = 4;
  optional int32 width = 5;
  optional int32 height = 6;
}

message MarkupInfo {
  optional string title = 1;
  optional string type = 2;
  optional string url = 3;
  optional string description = 4;
  optional string publisher = 5;
  optional string copyright = 6;
  optional string author = 7;
  optional MarkupArticle article = 8;
  repeated MarkupImage images = 9;
}

message TimingInfo {
  optional double markup_parsing_time = 1;
  optional double document_construction_time = 2;
  optional double article_processing_time = 3;
  optional double formatting_time = 4;
  optional double total_time = 5;
}

message DebugInfo {
  optional string log = 1;
}

message StatisticsInfo {
  optional int32 word_count = 1;
}

message DomDistillerResult {
  optional string title = 1;
  optional DistilledContent distilled_content = 2;
  optional PaginationInfo pagination_info = 3;
  repeated string image_urls = 4;
  optional MarkupInfo markup_info = 5;
  optional TimingInfo timing_info = 6;
  optional DebugInfo debug_info = 7;
  optional StatisticsInfo statistics_info = 8;
  optional string text_direction = 9;
}

message DomDistillerOptions {
  // Whether to extract only the text (or to include the containing html).
  optional bool extract_text_only = 1;

  // How much debug output to dump to window.console.
  // (0): Logs nothing
  // (1): Text Node data for each stage of processing
  // (2): (1) and some node visibility information
  // (3): (2) and extracted paging information
  optional int32 debug_level = 2;
}