// Copyright 2015 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. /** * Parser for a simple grammar that describes a tree structure using a function- * like "a(b(c,d))" syntax. Original intended usage: to have browsertests * specify an arbitrary tree of iframes, loaded from various sites, without * having to write a .html page for each level or do crazy feats of data: url * escaping. But there's nothing really iframe-specific here. See below for some * examples of the grammar and the parser output. * * @example Basic syntax: an identifier followed by arg list. * TreeParserUtil.parse('abc ()'); // returns { value: 'abc', children: [] } * * @example The arg list is optional. Dots are legal in ids. * TreeParserUtil.parse('b.com'); // returns { value: 'b.com', children: [] } * * @example Commas separate children in the arg list. * // returns { value: 'b', children: [ * // { value: 'c', children: [] }, * // { value: 'd', children: [] } * // ]} * TreeParserUtil.parse('b (c, d)'; * * @example Children can have children, and so on. * // returns { value: 'e', children: [ * // { value: 'f', children: [ * // { value: 'g', children: [ * // { value: 'h', children: [] }, * // { value: 'i', children: [ * // { value: 'j', children: [] } * // ]}, * // ]} * // ]} * // ]} * TreeParserUtil.parse('e(f(g(h(),i(j))))'; * * @example flatten() converts a [sub]tree back to a string. * var tree = TreeParserUtil.parse('b.com (c.com(e.com), d.com)'); * TreeParserUtil.flatten(tree.children[0]); // returns 'c.com(e.com())' */ var TreeParserUtil = (function() { 'use strict'; /** * Parses an input string into a tree. See class comment for examples. * @returns A tree of the form {value: , children: >}. */ function parse(input) { var tokenStream = lex(input); var result = takeIdAndChild(tokenStream); if (tokenStream.length != 0) throw new Error('Expected end of stream, but found "' + tokenStream[0] + '".') return result; } /** * Inverse of |parse|. Converts a parsed tree object into a string. Can be * used to forward a subtree as an argument to a nested document. */ function flatten(tree) { return tree.value + '(' + tree.children.map(flatten).join(',') + ')'; } /** * Lexer function to convert an input string into a token stream. Splits the * input along whitespace, parens and commas. Whitespace is removed, while * parens and commas are preserved as standalone tokens. * * @param {string} input The input string. * @return {Array.} The resulting token stream. */ function lex(input) { return input.split(/(\s+|\(|\)|,)/).reduce( function (resultArray, token) { var trimmed = token.trim(); if (trimmed) { resultArray.push(trimmed); } return resultArray; }, []); } /** * Consumes from the stream an identifier and optional child list, returning * its parsed representation. */ function takeIdAndChild(tokenStream) { return { value: takeIdentifier(tokenStream), children: takeChildList(tokenStream) }; } /** * Consumes from the stream an identifier, returning its value (as a string). */ function takeIdentifier(tokenStream) { if (tokenStream.length == 0) throw new Error('Expected an identifier, but found end-of-stream.'); var token = tokenStream.shift(); if (!token.match(/[a-zA-Z0-9.-]+/)) throw new Error('Expected an identifier, but found "' + token + '".'); return token; } /** * Consumes an optional child list from the token stream, returning a list of * the parsed children. */ function takeChildList(tokenStream) { // Remove the next token from the stream if it matches |token|. function tryToEatA(token) { if (tokenStream[0] == token) { tokenStream.shift(); return true; } return false; } // Bare identifier case, as in 'b' in the input '(a (b, c))' if (!tryToEatA('(')) return []; // Empty list case, as in 'b' in the input 'a (b (), c)'. if (tryToEatA(')')) { return []; } // List with at least one entry. var result = [ takeIdAndChild(tokenStream) ]; // Additional entries allowed with commas. while (tryToEatA(',')) { result.push(takeIdAndChild(tokenStream)); } // End of list. if (tryToEatA(')')) { return result; } if (tokenStream.length == 0) throw new Error('Expected ")" or ",", but found end-of-stream.'); throw new Error('Expected ")" or ",", but found "' + tokenStream[0] + '".'); } return { parse: parse, flatten: flatten }; })();