#!/usr/bin/env python # Copyright 2013 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import unittest from document_parser import ParseDocument, RemoveTitle _WHOLE_DOCUMENT = ''' Preamble before heading.

Main header

Some intro to the content.

Bananas

Something about bananas.

Oranges

Something about oranges.

Valencia Oranges

A description of valencia oranges.

Seville Oranges

A description of seville oranges.

Grapefruit

Grapefruit closed a h2 with a h3. This should be a warning.

Not the main header

But it should still show up in the TOC as though it were an h2.

Not

a banana

The embedded h3 should be ignored.

It's a h4

h4 are part of the document structure, but this is not inside a h3.

Plantains

Now I'm just getting lazy.

Another h4

This h4 is inside a h3 so will show up.
Header 5
Header 5s are not parsed. ''' _WHOLE_DOCUMENT_WITHOUT_TITLE = ''' Preamble before heading. Some intro to the content.

Bananas

Something about bananas.

Oranges

Something about oranges.

Valencia Oranges

A description of valencia oranges.

Seville Oranges

A description of seville oranges.

Grapefruit

Grapefruit closed a h2 with a h3. This should be a warning.

Not the main header

But it should still show up in the TOC as though it were an h2.

Not

a banana

The embedded h3 should be ignored.

It's a h4

h4 are part of the document structure, but this is not inside a h3.

Plantains

Now I'm just getting lazy.

Another h4

This h4 is inside a h3 so will show up.
Header 5
Header 5s are not parsed. ''' class DocumentParserUnittest(unittest.TestCase): def testEmptyDocument(self): self.assertEqual(('', 'No opening

was found'), RemoveTitle('')) result = ParseDocument('') self.assertEqual(None, result.title) self.assertEqual(None, result.title_attributes) self.assertEqual([], result.sections) self.assertEqual([], result.warnings) result = ParseDocument('', expect_title=True) self.assertEqual('', result.title) self.assertEqual({}, result.title_attributes) self.assertEqual([], result.sections) self.assertEqual(['Expected a title'], result.warnings) def testRemoveTitle(self): no_closing_tag = '

No closing tag' self.assertEqual((no_closing_tag, 'No closing

was found'), RemoveTitle(no_closing_tag)) no_opening_tag = 'No opening tag' self.assertEqual((no_opening_tag, 'No opening

was found'), RemoveTitle(no_opening_tag)) tags_wrong_order = '

Tags in wrong order

' self.assertEqual((tags_wrong_order, 'The

appeared before the

'), RemoveTitle(tags_wrong_order)) multiple_titles = '

First header

and

Second header

' self.assertEqual((' and

Second header

', None), RemoveTitle(multiple_titles)) upper_case = '

Upper case header tag

hi' self.assertEqual((' hi', None), RemoveTitle(upper_case)) mixed_case = '

Mixed case header tag

hi' self.assertEqual((' hi', None), RemoveTitle(mixed_case)) def testOnlyTitleDocument(self): document = '

heading

' self.assertEqual(('', None), RemoveTitle(document)) result = ParseDocument(document) self.assertEqual(None, result.title) self.assertEqual(None, result.title_attributes) self.assertEqual([], result.sections) self.assertEqual(['Found unexpected title "heading"'], result.warnings) result = ParseDocument(document, expect_title=True) self.assertEqual('heading', result.title) self.assertEqual({'id': 'header'}, result.title_attributes) self.assertEqual([], result.sections) self.assertEqual([], result.warnings) def testWholeDocument(self): self.assertEqual((_WHOLE_DOCUMENT_WITHOUT_TITLE, None), RemoveTitle(_WHOLE_DOCUMENT)) result = ParseDocument(_WHOLE_DOCUMENT, expect_title=True) self.assertEqual('Main header', result.title) self.assertEqual({'id': 'main', 'class': 'header'}, result.title_attributes) self.assertEqual([ 'Found closing while processing a

(line 19, column 15)', 'Found multiple

tags. Subsequent

tags will be classified as ' '

for the purpose of the structure (line 22, column 1)', 'Found

in the middle of processing a

(line 25, column 9)', # TODO(kalman): Re-enable this warning once the reference pages have # their references fixed. #'Found

without any preceding

(line 28, column 1)', ], result.warnings) # The non-trivial table of contents assertions... self.assertEqual(1, len(result.sections)) entries = result.sections[0].structure self.assertEqual(4, len(entries), entries) entry0, entry1, entry2, entry3 = entries self.assertEqual('hello', entry0.name) self.assertEqual({'id': 'orange'}, entry0.attributes) self.assertEqual(2, len(entry0.entries)) entry0_0, entry0_1 = entry0.entries self.assertEqual('Valencia Oranges', entry0_0.name) self.assertEqual({'id': 'valencia'}, entry0_0.attributes) self.assertEqual([], entry0_0.entries) self.assertEqual('Seville Oranges', entry0_1.name) self.assertEqual({'id': 'seville'}, entry0_1.attributes) self.assertEqual([], entry0_1.entries) self.assertEqual('Grapefruit', entry1.name) self.assertEqual({}, entry1.attributes) self.assertEqual([], entry1.entries) self.assertEqual('Not the main header', entry2.name) self.assertEqual({'id': 'not-main'}, entry2.attributes) self.assertEqual([], entry2.entries) self.assertEqual('Not a banana', entry3.name) self.assertEqual({}, entry3.attributes) self.assertEqual(2, len(entry3.entries)) entry3_1, entry3_2 = entry3.entries self.assertEqual('It\'s a h4', entry3_1.name) self.assertEqual({}, entry3_1.attributes) self.assertEqual([], entry3_1.entries) self.assertEqual('Plantains', entry3_2.name) self.assertEqual({}, entry3_2.attributes) self.assertEqual(1, len(entry3_2.entries)) entry3_2_1, = entry3_2.entries self.assertEqual('Another h4', entry3_2_1.name) self.assertEqual({}, entry3_2_1.attributes) self.assertEqual([], entry3_2_1.entries) def testSingleExplicitSection(self): def test(document): result = ParseDocument(document, expect_title=True) self.assertEqual([], result.warnings) self.assertEqual('Header', result.title) self.assertEqual(1, len(result.sections)) section0, = result.sections entry0, = section0.structure self.assertEqual('An inner header', entry0.name) # A single section, one with the title inside the section, the other out. test('

Header

' '
' 'Just a single section here.' '

An inner header

' '
') test('
' 'Another single section here.' '

Header

' '

An inner header

' '
') def testMultipleSections(self): result = ParseDocument( '

Header

' '

First header

' 'This content outside a section is the first section.' '
' 'Second section' '

Second header

' '
' '
' 'Third section' '

Third header

' '
', expect_title=True) self.assertEqual([], result.warnings) self.assertEqual('Header', result.title) self.assertEqual(3, len(result.sections)) section0, section1, section2 = result.sections def assert_single_header(section, name): self.assertEqual(1, len(section.structure)) self.assertEqual(name, section.structure[0].name) assert_single_header(section0, 'First header') assert_single_header(section1, 'Second header') assert_single_header(section2, 'Third header') if __name__ == '__main__': unittest.main()