diff options
author | tbreisacher@chromium.org <tbreisacher@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-02-28 19:22:59 +0000 |
---|---|---|
committer | tbreisacher@chromium.org <tbreisacher@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-02-28 19:22:59 +0000 |
commit | 2915cd620ee1348fc43f3326abc0b7252f4b9d0b (patch) | |
tree | 88a682d501775b41278df7780a5591067e300257 /third_party | |
parent | 887e4ac50ad9ac62e39d26e2518cf8a975fd2205 (diff) | |
download | chromium_src-2915cd620ee1348fc43f3326abc0b7252f4b9d0b.zip chromium_src-2915cd620ee1348fc43f3326abc0b7252f4b9d0b.tar.gz chromium_src-2915cd620ee1348fc43f3326abc0b7252f4b9d0b.tar.bz2 |
PRESUBMIT check for JavaScript style errors
See https://groups.google.com/a/chromium.org/group/chromium-dev/browse_thread/thread/97b5dc28d9e5109b/a5bd070bb7f0a4b9
BUG=none
TEST=modify any .js file; `git commit` it; run `git cl presubmit`; look at the errors
Review URL: https://chromiumcodereview.appspot.com/9288045
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@124007 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party')
57 files changed, 13329 insertions, 0 deletions
diff --git a/third_party/closure_linter/AUTHORS b/third_party/closure_linter/AUTHORS new file mode 100644 index 0000000..2f72bd6 --- /dev/null +++ b/third_party/closure_linter/AUTHORS @@ -0,0 +1,6 @@ +# This is a list of contributors to the Closure Linter. + +# Names should be added to this file like so: +# Name or Organization <email address> + +Google Inc. diff --git a/third_party/closure_linter/LICENSE b/third_party/closure_linter/LICENSE new file mode 100644 index 0000000..d9a10c0 --- /dev/null +++ b/third_party/closure_linter/LICENSE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/third_party/closure_linter/README b/third_party/closure_linter/README new file mode 100644 index 0000000..4a21b2d --- /dev/null +++ b/third_party/closure_linter/README @@ -0,0 +1,9 @@ +This repository contains the Closure Linter - a style checker for JavaScript. + +To install the application, run + python ./setup.py install + +After installing, you get two helper applications installed into /usr/local/bin: + + gjslint.py - runs the linter and checks for errors + fixjsstyle.py - tries to fix errors automatically diff --git a/third_party/closure_linter/README.chromium b/third_party/closure_linter/README.chromium new file mode 100644 index 0000000..1fc156d --- /dev/null +++ b/third_party/closure_linter/README.chromium @@ -0,0 +1,15 @@ +Name: closure-linter +URL: http://code.google.com/p/closure-linter/ +Version: 2.3.4 +Date: 28 Feb 2012 +Revision: 16 +License: Apache 2.0 +Security Critical: no + +Description: + The Closure Linter enforces the guidelines set by the Google JavaScript Style + Guide. The linter handles style issues so that you can focus on the code. + +Local modifications: + Removed closure_linter/testdata/ + Added Apache License notice to closure_linter/common/tokens_test.py diff --git a/third_party/closure_linter/closure_linter/__init__.py b/third_party/closure_linter/closure_linter/__init__.py new file mode 100755 index 0000000..1798c8c --- /dev/null +++ b/third_party/closure_linter/closure_linter/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Package indicator for gjslint.""" diff --git a/third_party/closure_linter/closure_linter/checker.py b/third_party/closure_linter/closure_linter/checker.py new file mode 100755 index 0000000..5c0fa81 --- /dev/null +++ b/third_party/closure_linter/closure_linter/checker.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core methods for checking JS files for common style guide violations.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import gflags as flags + +from closure_linter import checkerbase +from closure_linter import closurizednamespacesinfo +from closure_linter import ecmametadatapass +from closure_linter import javascriptlintrules +from closure_linter import javascriptstatetracker +from closure_linter.common import lintrunner + +flags.DEFINE_list('limited_doc_files', ['dummy.js', 'externs.js'], + 'List of files with relaxed documentation checks. Will not ' + 'report errors for missing documentation, some missing ' + 'descriptions, or methods whose @return tags don\'t have a ' + 'matching return statement.') +flags.DEFINE_list('closurized_namespaces', '', + 'Namespace prefixes, used for testing of' + 'goog.provide/require') +flags.DEFINE_list('ignored_extra_namespaces', '', + 'Fully qualified namespaces that should be not be reported ' + 'as extra by the linter.') + + +class JavaScriptStyleChecker(checkerbase.CheckerBase): + """Checker that applies JavaScriptLintRules.""" + + def __init__(self, error_handler): + """Initialize an JavaScriptStyleChecker object. + + Args: + error_handler: Error handler to pass all errors to. + """ + self._namespaces_info = None + if flags.FLAGS.closurized_namespaces: + self._namespaces_info = ( + closurizednamespacesinfo.ClosurizedNamespacesInfo( + flags.FLAGS.closurized_namespaces, + flags.FLAGS.ignored_extra_namespaces)) + + checkerbase.CheckerBase.__init__( + self, + error_handler=error_handler, + lint_rules=javascriptlintrules.JavaScriptLintRules( + self._namespaces_info), + state_tracker=javascriptstatetracker.JavaScriptStateTracker(), + metadata_pass=ecmametadatapass.EcmaMetaDataPass(), + limited_doc_files=flags.FLAGS.limited_doc_files) + + def _CheckTokens(self, token, parse_error, debug_tokens): + """Checks a token stream for lint warnings/errors. + + Adds a separate pass for computing dependency information based on + goog.require and goog.provide statements prior to the main linting pass. + + Args: + token: The first token in the token stream. + parse_error: A ParseError if any errors occurred. + debug_tokens: Whether every token should be printed as it is encountered + during the pass. + + Returns: + A boolean indicating whether the full token stream could be checked or if + checking failed prematurely. + """ + # To maximize the amount of errors that get reported before a parse error + # is displayed, don't run the dependency pass if a parse error exists. + if self._namespaces_info and not parse_error: + self._namespaces_info.Reset() + result = (self._ExecutePass(token, self._DependencyPass) and + self._ExecutePass(token, self._LintPass, + debug_tokens=debug_tokens)) + else: + result = self._ExecutePass(token, self._LintPass, parse_error, + debug_tokens) + + if not result: + return False + + self._lint_rules.Finalize(self._state_tracker, self._tokenizer.mode) + + self._error_handler.FinishFile() + return True + + def _DependencyPass(self, token): + """Processes an invidual token for dependency information. + + Used to encapsulate the logic needed to process an individual token so that + it can be passed to _ExecutePass. + + Args: + token: The token to process. + """ + self._namespaces_info.ProcessToken(token, self._state_tracker) + + +class GJsLintRunner(lintrunner.LintRunner): + """Wrapper class to run GJsLint.""" + + def Run(self, filenames, error_handler): + """Run GJsLint on the given filenames. + + Args: + filenames: The filenames to check + error_handler: An ErrorHandler object. + """ + checker = JavaScriptStyleChecker(error_handler) + + # Check the list of files. + for filename in filenames: + checker.Check(filename) diff --git a/third_party/closure_linter/closure_linter/checkerbase.py b/third_party/closure_linter/closure_linter/checkerbase.py new file mode 100755 index 0000000..c7735a0 --- /dev/null +++ b/third_party/closure_linter/closure_linter/checkerbase.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes for writing checkers that operate on tokens.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import StringIO +import traceback + +import gflags as flags +from closure_linter import ecmametadatapass +from closure_linter import errorrules +from closure_linter import errors +from closure_linter import javascripttokenizer +from closure_linter.common import error +from closure_linter.common import htmlutil + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('debug_tokens', False, + 'Whether to print all tokens for debugging.') + +flags.DEFINE_boolean('error_trace', False, + 'Whether to show error exceptions.') + + +class LintRulesBase(object): + """Base class for all classes defining the lint rules for a language.""" + + def __init__(self): + self.__checker = None + + def Initialize(self, checker, limited_doc_checks, is_html): + """Initializes to prepare to check a file. + + Args: + checker: Class to report errors to. + limited_doc_checks: Whether doc checking is relaxed for this file. + is_html: Whether the file is an HTML file with extracted contents. + """ + self.__checker = checker + self._limited_doc_checks = limited_doc_checks + self._is_html = is_html + + def _HandleError(self, code, message, token, position=None, + fix_data=None): + """Call the HandleError function for the checker we are associated with.""" + if errorrules.ShouldReportError(code): + self.__checker.HandleError(code, message, token, position, fix_data) + + def _SetLimitedDocChecks(self, limited_doc_checks): + """Sets whether doc checking is relaxed for this file. + + Args: + limited_doc_checks: Whether doc checking is relaxed for this file. + """ + self._limited_doc_checks = limited_doc_checks + + def CheckToken(self, token, parser_state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration. + parser_state: Object that indicates the parser state in the page. + + Raises: + TypeError: If not overridden. + """ + raise TypeError('Abstract method CheckToken not implemented') + + def Finalize(self, parser_state, tokenizer_mode): + """Perform all checks that need to occur after all lines are processed. + + Args: + parser_state: State of the parser after parsing all tokens + tokenizer_mode: Mode of the tokenizer after parsing the entire page + + Raises: + TypeError: If not overridden. + """ + raise TypeError('Abstract method Finalize not implemented') + + +class CheckerBase(object): + """This class handles checking a LintRules object against a file.""" + + def __init__(self, error_handler, lint_rules, state_tracker, + limited_doc_files=None, metadata_pass=None): + """Initialize a checker object. + + Args: + error_handler: Object that handles errors. + lint_rules: LintRules object defining lint errors given a token + and state_tracker object. + state_tracker: Object that tracks the current state in the token stream. + limited_doc_files: List of filenames that are not required to have + documentation comments. + metadata_pass: Object that builds metadata about the token stream. + """ + self._error_handler = error_handler + self._lint_rules = lint_rules + self._state_tracker = state_tracker + self._metadata_pass = metadata_pass + self._limited_doc_files = limited_doc_files + + # TODO(user): Factor out. A checker does not need to know about the + # tokenizer, only the token stream. + self._tokenizer = javascripttokenizer.JavaScriptTokenizer() + + self._has_errors = False + + def HandleError(self, code, message, token, position=None, + fix_data=None): + """Prints out the given error message including a line number. + + Args: + code: The error code. + message: The error to print. + token: The token where the error occurred, or None if it was a file-wide + issue. + position: The position of the error, defaults to None. + fix_data: Metadata used for fixing the error. + """ + self._has_errors = True + self._error_handler.HandleError( + error.Error(code, message, token, position, fix_data)) + + def HasErrors(self): + """Returns true if the style checker has found any errors. + + Returns: + True if the style checker has found any errors. + """ + return self._has_errors + + def Check(self, filename, source=None): + """Checks the file, printing warnings and errors as they are found. + + Args: + filename: The name of the file to check. + source: Optional. The contents of the file. Can be either a string or + file-like object. If omitted, contents will be read from disk from + the given filename. + """ + + if source is None: + try: + f = open(filename) + except IOError: + self._error_handler.HandleFile(filename, None) + self.HandleError(errors.FILE_NOT_FOUND, 'File not found', None) + self._error_handler.FinishFile() + return + else: + if type(source) in [str, unicode]: + f = StringIO.StringIO(source) + else: + f = source + + try: + if filename.endswith('.html') or filename.endswith('.htm'): + self.CheckLines(filename, htmlutil.GetScriptLines(f), True) + else: + self.CheckLines(filename, f, False) + finally: + f.close() + + def CheckLines(self, filename, lines_iter, is_html): + """Checks a file, given as an iterable of lines, for warnings and errors. + + Args: + filename: The name of the file to check. + lines_iter: An iterator that yields one line of the file at a time. + is_html: Whether the file being checked is an HTML file with extracted + contents. + + Returns: + A boolean indicating whether the full file could be checked or if checking + failed prematurely. + """ + limited_doc_checks = False + if self._limited_doc_files: + for limited_doc_filename in self._limited_doc_files: + if filename.endswith(limited_doc_filename): + limited_doc_checks = True + break + + lint_rules = self._lint_rules + lint_rules.Initialize(self, limited_doc_checks, is_html) + + token = self._tokenizer.TokenizeFile(lines_iter) + + parse_error = None + if self._metadata_pass: + try: + self._metadata_pass.Reset() + self._metadata_pass.Process(token) + except ecmametadatapass.ParseError, caught_parse_error: + if FLAGS.error_trace: + traceback.print_exc() + parse_error = caught_parse_error + except Exception: + print 'Internal error in %s' % filename + traceback.print_exc() + return False + + self._error_handler.HandleFile(filename, token) + + return self._CheckTokens(token, parse_error=parse_error, + debug_tokens=FLAGS.debug_tokens) + + def _CheckTokens(self, token, parse_error, debug_tokens): + """Checks a token stream for lint warnings/errors. + + Args: + token: The first token in the token stream to check. + parse_error: A ParseError if any errors occurred. + debug_tokens: Whether every token should be printed as it is encountered + during the pass. + + Returns: + A boolean indicating whether the full token stream could be checked or if + checking failed prematurely. + """ + result = self._ExecutePass(token, self._LintPass, parse_error, debug_tokens) + + if not result: + return False + + self._lint_rules.Finalize(self._state_tracker, self._tokenizer.mode) + self._error_handler.FinishFile() + return True + + def _LintPass(self, token): + """Checks an individual token for lint warnings/errors. + + Used to encapsulate the logic needed to check an individual token so that it + can be passed to _ExecutePass. + + Args: + token: The token to check. + """ + self._lint_rules.CheckToken(token, self._state_tracker) + + def _ExecutePass(self, token, pass_function, parse_error=None, + debug_tokens=False): + """Calls the given function for every token in the given token stream. + + As each token is passed to the given function, state is kept up to date and, + depending on the error_trace flag, errors are either caught and reported, or + allowed to bubble up so developers can see the full stack trace. If a parse + error is specified, the pass will proceed as normal until the token causing + the parse error is reached. + + Args: + token: The first token in the token stream. + pass_function: The function to call for each token in the token stream. + parse_error: A ParseError if any errors occurred. + debug_tokens: Whether every token should be printed as it is encountered + during the pass. + + Returns: + A boolean indicating whether the full token stream could be checked or if + checking failed prematurely. + + Raises: + Exception: If any error occurred while calling the given function. + """ + self._state_tracker.Reset() + while token: + if debug_tokens: + print token + + if parse_error and parse_error.token == token: + message = ('Error parsing file at token "%s". Unable to ' + 'check the rest of file.' % token.string) + self.HandleError(errors.FILE_DOES_NOT_PARSE, message, token) + self._error_handler.FinishFile() + return + + try: + self._state_tracker.HandleToken( + token, self._state_tracker.GetLastNonSpaceToken()) + pass_function(token) + self._state_tracker.HandleAfterToken(token) + except: + if FLAGS.error_trace: + raise + else: + self.HandleError(errors.FILE_DOES_NOT_PARSE, + ('Error parsing file at token "%s". Unable to ' + 'check the rest of file.' % token.string), + token) + self._error_handler.FinishFile() + return False + token = token.next + return True diff --git a/third_party/closure_linter/closure_linter/closurizednamespacesinfo.py b/third_party/closure_linter/closure_linter/closurizednamespacesinfo.py new file mode 100755 index 0000000..f2902dc --- /dev/null +++ b/third_party/closure_linter/closure_linter/closurizednamespacesinfo.py @@ -0,0 +1,500 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Logic for computing dependency information for closurized JavaScript files. + +Closurized JavaScript files express dependencies using goog.require and +goog.provide statements. In order for the linter to detect when a statement is +missing or unnecessary, all identifiers in the JavaScript file must first be +processed to determine if they constitute the creation or usage of a dependency. +""" + + + +from closure_linter import javascripttokens +from closure_linter import tokenutil + +# pylint: disable-msg=C6409 +TokenType = javascripttokens.JavaScriptTokenType + +DEFAULT_EXTRA_NAMESPACES = [ + 'goog.testing.asserts', + 'goog.testing.jsunit', +] + +class ClosurizedNamespacesInfo(object): + """Dependency information for closurized JavaScript files. + + Processes token streams for dependency creation or usage and provides logic + for determining if a given require or provide statement is unnecessary or if + there are missing require or provide statements. + """ + + def __init__(self, closurized_namespaces, ignored_extra_namespaces): + """Initializes an instance the ClosurizedNamespacesInfo class. + + Args: + closurized_namespaces: A list of namespace prefixes that should be + processed for dependency information. Non-matching namespaces are + ignored. + ignored_extra_namespaces: A list of namespaces that should not be reported + as extra regardless of whether they are actually used. + """ + self._closurized_namespaces = closurized_namespaces + self._ignored_extra_namespaces = (ignored_extra_namespaces + + DEFAULT_EXTRA_NAMESPACES) + self.Reset() + + def Reset(self): + """Resets the internal state to prepare for processing a new file.""" + + # A list of goog.provide tokens in the order they appeared in the file. + self._provide_tokens = [] + + # A list of goog.require tokens in the order they appeared in the file. + self._require_tokens = [] + + # Namespaces that are already goog.provided. + self._provided_namespaces = [] + + # Namespaces that are already goog.required. + self._required_namespaces = [] + + # Note that created_namespaces and used_namespaces contain both namespaces + # and identifiers because there are many existing cases where a method or + # constant is provided directly instead of its namespace. Ideally, these + # two lists would only have to contain namespaces. + + # A list of tuples where the first element is the namespace of an identifier + # created in the file and the second is the identifier itself. + self._created_namespaces = [] + + # A list of tuples where the first element is the namespace of an identifier + # used in the file and the second is the identifier itself. + self._used_namespaces = [] + + # A list of seemingly-unnecessary namespaces that are goog.required() and + # annotated with @suppress {extraRequire}. + self._suppressed_requires = [] + + # A list of goog.provide tokens which are duplicates. + self._duplicate_provide_tokens = [] + + # A list of goog.require tokens which are duplicates. + self._duplicate_require_tokens = [] + + # Whether this file is in a goog.scope. Someday, we may add support + # for checking scopified namespaces, but for now let's just fail + # in a more reasonable way. + self._scopified_file = False + + # TODO(user): Handle the case where there are 2 different requires + # that can satisfy the same dependency, but only one is necessary. + + def GetProvidedNamespaces(self): + """Returns the namespaces which are already provided by this file. + + Returns: + A list of strings where each string is a 'namespace' corresponding to an + existing goog.provide statement in the file being checked. + """ + return list(self._provided_namespaces) + + def GetRequiredNamespaces(self): + """Returns the namespaces which are already required by this file. + + Returns: + A list of strings where each string is a 'namespace' corresponding to an + existing goog.require statement in the file being checked. + """ + return list(self._required_namespaces) + + def IsExtraProvide(self, token): + """Returns whether the given goog.provide token is unnecessary. + + Args: + token: A goog.provide token. + + Returns: + True if the given token corresponds to an unnecessary goog.provide + statement, otherwise False. + """ + if self._scopified_file: + return False + + namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string + + base_namespace = namespace.split('.', 1)[0] + if base_namespace not in self._closurized_namespaces: + return False + + if token in self._duplicate_provide_tokens: + return True + + # TODO(user): There's probably a faster way to compute this. + for created_namespace, created_identifier in self._created_namespaces: + if namespace == created_namespace or namespace == created_identifier: + return False + + return True + + def IsExtraRequire(self, token): + """Returns whether the given goog.require token is unnecessary. + + Args: + token: A goog.require token. + + Returns: + True if the given token corresponds to an unnecessary goog.require + statement, otherwise False. + """ + if self._scopified_file: + return False + + namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string + + base_namespace = namespace.split('.', 1)[0] + if base_namespace not in self._closurized_namespaces: + return False + + if namespace in self._ignored_extra_namespaces: + return False + + if token in self._duplicate_require_tokens: + return True + + if namespace in self._suppressed_requires: + return False + + # If the namespace contains a component that is initial caps, then that + # must be the last component of the namespace. + parts = namespace.split('.') + if len(parts) > 1 and parts[-2][0].isupper(): + return True + + # TODO(user): There's probably a faster way to compute this. + for used_namespace, used_identifier in self._used_namespaces: + if namespace == used_namespace or namespace == used_identifier: + return False + + return True + + def GetMissingProvides(self): + """Returns the set of missing provided namespaces for the current file. + + Returns: + Returns a set of strings where each string is a namespace that should be + provided by this file, but is not. + """ + if self._scopified_file: + return set() + + missing_provides = set() + for namespace, identifier in self._created_namespaces: + if (not self._IsPrivateIdentifier(identifier) and + namespace not in self._provided_namespaces and + identifier not in self._provided_namespaces and + namespace not in self._required_namespaces): + missing_provides.add(namespace) + + return missing_provides + + def GetMissingRequires(self): + """Returns the set of missing required namespaces for the current file. + + For each non-private identifier used in the file, find either a + goog.require, goog.provide or a created identifier that satisfies it. + goog.require statements can satisfy the identifier by requiring either the + namespace of the identifier or the identifier itself. goog.provide + statements can satisfy the identifier by providing the namespace of the + identifier. A created identifier can only satisfy the used identifier if + it matches it exactly (necessary since things can be defined on a + namespace in more than one file). Note that provided namespaces should be + a subset of created namespaces, but we check both because in some cases we + can't always detect the creation of the namespace. + + Returns: + Returns a set of strings where each string is a namespace that should be + required by this file, but is not. + """ + if self._scopified_file: + return set() + + external_dependencies = set(self._required_namespaces) + + # Assume goog namespace is always available. + external_dependencies.add('goog') + + created_identifiers = set() + for namespace, identifier in self._created_namespaces: + created_identifiers.add(identifier) + + missing_requires = set() + for namespace, identifier in self._used_namespaces: + if (not self._IsPrivateIdentifier(identifier) and + namespace not in external_dependencies and + namespace not in self._provided_namespaces and + identifier not in external_dependencies and + identifier not in created_identifiers): + missing_requires.add(namespace) + + return missing_requires + + def _IsPrivateIdentifier(self, identifier): + """Returns whether the given identifer is private.""" + pieces = identifier.split('.') + for piece in pieces: + if piece.endswith('_'): + return True + return False + + def IsFirstProvide(self, token): + """Returns whether token is the first provide token.""" + return self._provide_tokens and token == self._provide_tokens[0] + + def IsFirstRequire(self, token): + """Returns whether token is the first require token.""" + return self._require_tokens and token == self._require_tokens[0] + + def IsLastProvide(self, token): + """Returns whether token is the last provide token.""" + return self._provide_tokens and token == self._provide_tokens[-1] + + def IsLastRequire(self, token): + """Returns whether token is the last require token.""" + return self._require_tokens and token == self._require_tokens[-1] + + def ProcessToken(self, token, state_tracker): + """Processes the given token for dependency information. + + Args: + token: The token to process. + state_tracker: The JavaScript state tracker. + """ + + # Note that this method is in the critical path for the linter and has been + # optimized for performance in the following ways: + # - Tokens are checked by type first to minimize the number of function + # calls necessary to determine if action needs to be taken for the token. + # - The most common tokens types are checked for first. + # - The number of function calls has been minimized (thus the length of this + # function. + + if token.type == TokenType.IDENTIFIER: + # TODO(user): Consider saving the whole identifier in metadata. + whole_identifier_string = self._GetWholeIdentifierString(token) + if whole_identifier_string is None: + # We only want to process the identifier one time. If the whole string + # identifier is None, that means this token was part of a multi-token + # identifier, but it was not the first token of the identifier. + return + + # In the odd case that a goog.require is encountered inside a function, + # just ignore it (e.g. dynamic loading in test runners). + if token.string == 'goog.require' and not state_tracker.InFunction(): + self._require_tokens.append(token) + namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string + if namespace in self._required_namespaces: + self._duplicate_require_tokens.append(token) + else: + self._required_namespaces.append(namespace) + + # If there is a suppression for the require, add a usage for it so it + # gets treated as a regular goog.require (i.e. still gets sorted). + jsdoc = state_tracker.GetDocComment() + if jsdoc and ('extraRequire' in jsdoc.suppressions): + self._suppressed_requires.append(namespace) + self._AddUsedNamespace(state_tracker, namespace) + + elif token.string == 'goog.provide': + self._provide_tokens.append(token) + namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string + if namespace in self._provided_namespaces: + self._duplicate_provide_tokens.append(token) + else: + self._provided_namespaces.append(namespace) + + # If there is a suppression for the provide, add a creation for it so it + # gets treated as a regular goog.provide (i.e. still gets sorted). + jsdoc = state_tracker.GetDocComment() + if jsdoc and ('extraProvide' in jsdoc.suppressions): + self._AddCreatedNamespace(state_tracker, namespace) + + elif token.string == 'goog.scope': + self._scopified_file = True + + else: + jsdoc = state_tracker.GetDocComment() + if jsdoc and jsdoc.HasFlag('typedef'): + self._AddCreatedNamespace(state_tracker, whole_identifier_string, + self.GetClosurizedNamespace( + whole_identifier_string)) + else: + self._AddUsedNamespace(state_tracker, whole_identifier_string) + + elif token.type == TokenType.SIMPLE_LVALUE: + identifier = token.values['identifier'] + namespace = self.GetClosurizedNamespace(identifier) + if state_tracker.InFunction(): + self._AddUsedNamespace(state_tracker, identifier) + elif namespace and namespace != 'goog': + self._AddCreatedNamespace(state_tracker, identifier, namespace) + + elif token.type == TokenType.DOC_FLAG: + flag_type = token.attached_object.flag_type + is_interface = state_tracker.GetDocComment().HasFlag('interface') + if flag_type == 'implements' or (flag_type == 'extends' and is_interface): + # Interfaces should be goog.require'd. + doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE) + interface = tokenutil.Search(doc_start, TokenType.COMMENT) + self._AddUsedNamespace(state_tracker, interface.string) + + + def _GetWholeIdentifierString(self, token): + """Returns the whole identifier string for the given token. + + Checks the tokens after the current one to see if the token is one in a + sequence of tokens which are actually just one identifier (i.e. a line was + wrapped in the middle of an identifier). + + Args: + token: The token to check. + + Returns: + The whole identifier string or None if this token is not the first token + in a multi-token identifier. + """ + result = '' + + # Search backward to determine if this token is the first token of the + # identifier. If it is not the first token, return None to signal that this + # token should be ignored. + prev_token = token.previous + while prev_token: + if (prev_token.IsType(TokenType.IDENTIFIER) or + prev_token.IsType(TokenType.NORMAL) and prev_token.string == '.'): + return None + elif (not prev_token.IsType(TokenType.WHITESPACE) and + not prev_token.IsAnyType(TokenType.COMMENT_TYPES)): + break + prev_token = prev_token.previous + + # Search forward to find other parts of this identifier separated by white + # space. + next_token = token + while next_token: + if (next_token.IsType(TokenType.IDENTIFIER) or + next_token.IsType(TokenType.NORMAL) and next_token.string == '.'): + result += next_token.string + elif (not next_token.IsType(TokenType.WHITESPACE) and + not next_token.IsAnyType(TokenType.COMMENT_TYPES)): + break + next_token = next_token.next + + return result + + def _AddCreatedNamespace(self, state_tracker, identifier, namespace=None): + """Adds the namespace of an identifier to the list of created namespaces. + + If the identifier is annotated with a 'missingProvide' suppression, it is + not added. + + Args: + state_tracker: The JavaScriptStateTracker instance. + identifier: The identifier to add. + namespace: The namespace of the identifier or None if the identifier is + also the namespace. + """ + if not namespace: + namespace = identifier + + jsdoc = state_tracker.GetDocComment() + if jsdoc and 'missingProvide' in jsdoc.suppressions: + return + + self._created_namespaces.append([namespace, identifier]) + + def _AddUsedNamespace(self, state_tracker, identifier): + """Adds the namespace of an identifier to the list of used namespaces. + + If the identifier is annotated with a 'missingRequire' suppression, it is + not added. + + Args: + state_tracker: The JavaScriptStateTracker instance. + identifier: An identifier which has been used. + """ + jsdoc = state_tracker.GetDocComment() + if jsdoc and 'missingRequire' in jsdoc.suppressions: + return + + namespace = self.GetClosurizedNamespace(identifier) + if namespace: + self._used_namespaces.append([namespace, identifier]) + + def GetClosurizedNamespace(self, identifier): + """Given an identifier, returns the namespace that identifier is from. + + Args: + identifier: The identifier to extract a namespace from. + + Returns: + The namespace the given identifier resides in, or None if one could not + be found. + """ + if identifier.startswith('goog.global'): + # Ignore goog.global, since it is, by definition, global. + return None + + parts = identifier.split('.') + for namespace in self._closurized_namespaces: + if not identifier.startswith(namespace + '.'): + continue + + last_part = parts[-1] + if not last_part: + # TODO(robbyw): Handle this: it's a multi-line identifier. + return None + + # The namespace for a class is the shortest prefix ending in a class + # name, which starts with a capital letter but is not a capitalized word. + # + # We ultimately do not want to allow requiring or providing of inner + # classes/enums. Instead, a file should provide only the top-level class + # and users should require only that. + namespace = [] + for part in parts: + if part == 'prototype' or part.isupper(): + return '.'.join(namespace) + namespace.append(part) + if part[0].isupper(): + return '.'.join(namespace) + + # At this point, we know there's no class or enum, so the namespace is + # just the identifier with the last part removed. With the exception of + # apply, inherits, and call, which should also be stripped. + if parts[-1] in ('apply', 'inherits', 'call'): + parts.pop() + parts.pop() + + # If the last part ends with an underscore, it is a private variable, + # method, or enum. The namespace is whatever is before it. + if parts and parts[-1].endswith('_'): + parts.pop() + + return '.'.join(parts) + + return None diff --git a/third_party/closure_linter/closure_linter/closurizednamespacesinfo_test.py b/third_party/closure_linter/closure_linter/closurizednamespacesinfo_test.py new file mode 100755 index 0000000..cec3376 --- /dev/null +++ b/third_party/closure_linter/closure_linter/closurizednamespacesinfo_test.py @@ -0,0 +1,451 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for ClosurizedNamespacesInfo.""" + + + +import unittest as googletest +from closure_linter import closurizednamespacesinfo +from closure_linter import javascriptstatetracker +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import tokenutil + +# pylint: disable-msg=C6409 +TokenType = javascripttokens.JavaScriptTokenType + + +class ClosurizedNamespacesInfoTest(googletest.TestCase): + """Tests for ClosurizedNamespacesInfo.""" + + _test_cases = { + 'goog.global.anything': None, + 'package.CONSTANT': 'package', + 'package.methodName': 'package', + 'package.subpackage.methodName': 'package.subpackage', + 'package.subpackage.methodName.apply': 'package.subpackage', + 'package.ClassName.something': 'package.ClassName', + 'package.ClassName.Enum.VALUE.methodName': 'package.ClassName', + 'package.ClassName.CONSTANT': 'package.ClassName', + 'package.namespace.CONSTANT.methodName': 'package.namespace', + 'package.ClassName.inherits': 'package.ClassName', + 'package.ClassName.apply': 'package.ClassName', + 'package.ClassName.methodName.apply': 'package.ClassName', + 'package.ClassName.methodName.call': 'package.ClassName', + 'package.ClassName.prototype.methodName': 'package.ClassName', + 'package.ClassName.privateMethod_': 'package.ClassName', + 'package.className.privateProperty_': 'package.className', + 'package.className.privateProperty_.methodName': 'package.className', + 'package.ClassName.PrivateEnum_': 'package.ClassName', + 'package.ClassName.prototype.methodName.apply': 'package.ClassName', + 'package.ClassName.property.subProperty': 'package.ClassName', + 'package.className.prototype.something.somethingElse': 'package.className' + } + + _tokenizer = javascripttokenizer.JavaScriptTokenizer() + + def testGetClosurizedNamespace(self): + """Tests that the correct namespace is returned for various identifiers.""" + namespaces_info = closurizednamespacesinfo.ClosurizedNamespacesInfo( + closurized_namespaces=['package'], ignored_extra_namespaces=[]) + for identifier, expected_namespace in self._test_cases.items(): + actual_namespace = namespaces_info.GetClosurizedNamespace(identifier) + self.assertEqual( + expected_namespace, + actual_namespace, + 'expected namespace "' + str(expected_namespace) + + '" for identifier "' + str(identifier) + '" but was "' + + str(actual_namespace) + '"') + + def testIgnoredExtraNamespaces(self): + """Tests that ignored_extra_namespaces are ignored.""" + token = self._GetRequireTokens('package.Something') + namespaces_info = closurizednamespacesinfo.ClosurizedNamespacesInfo( + closurized_namespaces=['package'], + ignored_extra_namespaces=['package.Something']) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Should be valid since it is in ignored namespaces.') + + namespaces_info = closurizednamespacesinfo.ClosurizedNamespacesInfo( + ['package'], []) + + self.assertTrue(namespaces_info.IsExtraRequire(token), + 'Should be invalid since it is not in ignored namespaces.') + + def testIsExtraProvide_created(self): + """Tests that provides for created namespaces are not extra.""" + input_lines = [ + 'goog.provide(\'package.Foo\');', + 'package.Foo = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraProvide(token), + 'Should not be extra since it is created.') + + def testIsExtraProvide_createdIdentifier(self): + """Tests that provides for created identifiers are not extra.""" + input_lines = [ + 'goog.provide(\'package.Foo.methodName\');', + 'package.Foo.methodName = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraProvide(token), + 'Should not be extra since it is created.') + + def testIsExtraProvide_notCreated(self): + """Tests that provides for non-created namespaces are extra.""" + input_lines = ['goog.provide(\'package.Foo\');'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsExtraProvide(token), + 'Should be extra since it is not created.') + + def testIsExtraProvide_duplicate(self): + """Tests that providing a namespace twice makes the second one extra.""" + input_lines = [ + 'goog.provide(\'package.Foo\');', + 'goog.provide(\'package.Foo\');', + 'package.Foo = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + # Advance to the second goog.provide token. + token = tokenutil.Search(token.next, TokenType.IDENTIFIER) + + self.assertTrue(namespaces_info.IsExtraProvide(token), + 'Should be extra since it is already provided.') + + def testIsExtraProvide_notClosurized(self): + """Tests that provides of non-closurized namespaces are not extra.""" + input_lines = ['goog.provide(\'notclosurized.Foo\');'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraProvide(token), + 'Should not be extra since it is not closurized.') + + def testIsExtraRequire_used(self): + """Tests that requires for used namespaces are not extra.""" + input_lines = [ + 'goog.require(\'package.Foo\');', + 'var x = package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Should not be extra since it is used.') + + def testIsExtraRequire_usedIdentifier(self): + """Tests that requires for used methods on classes are extra.""" + input_lines = [ + 'goog.require(\'package.Foo.methodName\');', + 'var x = package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsExtraRequire(token), + 'Should require the package, not the method specifically.') + + def testIsExtraRequire_notUsed(self): + """Tests that requires for unused namespaces are extra.""" + input_lines = ['goog.require(\'package.Foo\');'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsExtraRequire(token), + 'Should be extra since it is not used.') + + def testIsExtraRequire_notClosurized(self): + """Tests that requires of non-closurized namespaces are not extra.""" + input_lines = ['goog.require(\'notclosurized.Foo\');'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Should not be extra since it is not closurized.') + + def testIsExtraRequire_objectOnClass(self): + """Tests that requiring an object on a class is extra.""" + input_lines = [ + 'goog.require(\'package.Foo.Enum\');', + 'var x = package.Foo.Enum.VALUE1;', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsExtraRequire(token), + 'The whole class, not the object, should be required.'); + + def testIsExtraRequire_constantOnClass(self): + """Tests that requiring a constant on a class is extra.""" + input_lines = [ + 'goog.require(\'package.Foo.CONSTANT\');', + 'var x = package.Foo.CONSTANT', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsExtraRequire(token), + 'The class, not the constant, should be required.'); + + def testIsExtraRequire_constantNotOnClass(self): + """Tests that requiring a constant not on a class is OK.""" + input_lines = [ + 'goog.require(\'package.subpackage.CONSTANT\');', + 'var x = package.subpackage.CONSTANT', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Constants can be required except on classes.'); + + def testIsExtraRequire_methodNotOnClass(self): + """Tests that requiring a method not on a class is OK.""" + input_lines = [ + 'goog.require(\'package.subpackage.method\');', + 'var x = package.subpackage.method()', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Methods can be required except on classes.'); + + def testIsExtraRequire_defaults(self): + """Tests that there are no warnings about extra requires for test utils""" + input_lines = ['goog.require(\'goog.testing.jsunit\');'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['goog'], []) + + self.assertFalse(namespaces_info.IsExtraRequire(token), + 'Should not be extra since it is for testing.') + + def testGetMissingProvides_provided(self): + """Tests that provided functions don't cause a missing provide.""" + input_lines = [ + 'goog.provide(\'package.Foo\');', + 'package.Foo = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingProvides_providedIdentifier(self): + """Tests that provided identifiers don't cause a missing provide.""" + input_lines = [ + 'goog.provide(\'package.Foo.methodName\');', + 'package.Foo.methodName = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingProvides_providedParentIdentifier(self): + """Tests that provided identifiers on a class don't cause a missing provide + on objects attached to that class.""" + input_lines = [ + 'goog.provide(\'package.foo.ClassName\');', + 'package.foo.ClassName.methodName = function() {};', + 'package.foo.ClassName.ObjectName = 1;', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingProvides_unprovided(self): + """Tests that unprovided functions cause a missing provide.""" + input_lines = ['package.Foo = function() {};'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(1, len(namespaces_info.GetMissingProvides())) + self.assertTrue('package.Foo' in namespaces_info.GetMissingProvides()) + + def testGetMissingProvides_privatefunction(self): + """Tests that unprovided private functions don't cause a missing provide.""" + input_lines = ['package.Foo_ = function() {};'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingProvides_required(self): + """Tests that required namespaces don't cause a missing provide.""" + input_lines = [ + 'goog.require(\'package.Foo\');', + 'package.Foo.methodName = function() {};' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingRequires_required(self): + """Tests that required namespaces don't cause a missing require.""" + input_lines = [ + 'goog.require(\'package.Foo\');', + 'package.Foo();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingRequires_requiredIdentifier(self): + """Tests that required namespaces satisfy identifiers on that namespace.""" + input_lines = [ + 'goog.require(\'package.Foo\');', + 'package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingProvides())) + + def testGetMissingRequires_requiredParentClass(self): + """Tests that requiring a parent class of an object is sufficient to prevent + a missing require on that object.""" + input_lines = [ + 'goog.require(\'package.Foo\');', + 'package.Foo.methodName();', + 'package.Foo.methodName(package.Foo.ObjectName);' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingRequires())) + + def testGetMissingRequires_unrequired(self): + """Tests that unrequired namespaces cause a missing require.""" + input_lines = ['package.Foo();'] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(1, len(namespaces_info.GetMissingRequires())) + self.assertTrue('package.Foo' in namespaces_info.GetMissingRequires()) + + def testGetMissingRequires_provided(self): + """Tests that provided namespaces satisfy identifiers on that namespace.""" + input_lines = [ + 'goog.provide(\'package.Foo\');', + 'package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingRequires())) + + def testGetMissingRequires_created(self): + """Tests that created namespaces do not satisfy usage of an identifier.""" + input_lines = [ + 'package.Foo = function();', + 'package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(1, len(namespaces_info.GetMissingRequires())) + self.assertTrue('package.Foo' in namespaces_info.GetMissingRequires()) + + def testGetMissingRequires_createdIdentifier(self): + """Tests that created identifiers satisfy usage of the identifier.""" + input_lines = [ + 'package.Foo.methodName = function();', + 'package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(0, len(namespaces_info.GetMissingRequires())) + + def testGetMissingRequires_objectOnClass(self): + """Tests that we should require a class, not the object on the class.""" + input_lines = [ + 'goog.require(\'package.Foo.Enum\');', + 'var x = package.Foo.Enum.VALUE1;', + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertEquals(1, len(namespaces_info.GetMissingRequires()), + 'The whole class, not the object, should be required.'); + + def testIsFirstProvide(self): + """Tests operation of the isFirstProvide method.""" + input_lines = [ + 'goog.provide(\'package.Foo\');', + 'package.Foo.methodName();' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = self._GetInitializedNamespacesInfo(token, ['package'], []) + + self.assertTrue(namespaces_info.IsFirstProvide(token)) + + def testGetWholeIdentifierString(self): + """Tests that created identifiers satisfy usage of the identifier.""" + input_lines = [ + 'package.Foo.', + ' veryLong.', + ' identifier;' + ] + token = self._tokenizer.TokenizeFile(input_lines) + namespaces_info = closurizednamespacesinfo.ClosurizedNamespacesInfo([], []) + + self.assertEquals('package.Foo.veryLong.identifier', + namespaces_info._GetWholeIdentifierString(token)) + self.assertEquals(None, + namespaces_info._GetWholeIdentifierString(token.next)) + + def _GetInitializedNamespacesInfo(self, token, closurized_namespaces, + ignored_extra_namespaces): + """Returns a namespaces info initialized with the given token stream.""" + namespaces_info = closurizednamespacesinfo.ClosurizedNamespacesInfo( + closurized_namespaces=closurized_namespaces, + ignored_extra_namespaces=ignored_extra_namespaces) + state_tracker = javascriptstatetracker.JavaScriptStateTracker() + + while token: + namespaces_info.ProcessToken(token, state_tracker) + token = token.next + + return namespaces_info + + def _GetProvideTokens(self, namespace): + """Returns a list of tokens for a goog.require of the given namespace.""" + line_text = 'goog.require(\'' + namespace + '\');\n' + return javascripttokenizer.JavaScriptTokenizer().TokenizeFile([line_text]) + + def _GetRequireTokens(self, namespace): + """Returns a list of tokens for a goog.require of the given namespace.""" + line_text = 'goog.require(\'' + namespace + '\');\n' + return javascripttokenizer.JavaScriptTokenizer().TokenizeFile([line_text]) + +if __name__ == '__main__': + googletest.main() diff --git a/third_party/closure_linter/closure_linter/common/__init__.py b/third_party/closure_linter/closure_linter/common/__init__.py new file mode 100755 index 0000000..5793043 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Package indicator for gjslint.common.""" diff --git a/third_party/closure_linter/closure_linter/common/error.py b/third_party/closure_linter/closure_linter/common/error.py new file mode 100755 index 0000000..0e3b476 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/error.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Error object commonly used in linters.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class Error(object): + """Object representing a style error.""" + + def __init__(self, code, message, token, position, fix_data): + """Initialize the error object. + + Args: + code: The numeric error code. + message: The error message string. + token: The tokens.Token where the error occurred. + position: The position of the error within the token. + fix_data: Data to be used in autofixing. Codes with fix_data are: + GOOG_REQUIRES_NOT_ALPHABETIZED - List of string value tokens that are + class names in goog.requires calls. + """ + self.code = code + self.message = message + self.token = token + self.position = position + if token: + self.start_index = token.start_index + else: + self.start_index = 0 + self.fix_data = fix_data + if self.position: + self.start_index += self.position.start + + def Compare(a, b): + """Compare two error objects, by source code order. + + Args: + a: First error object. + b: Second error object. + + Returns: + A Negative/0/Positive number when a is before/the same as/after b. + """ + line_diff = a.token.line_number - b.token.line_number + if line_diff: + return line_diff + + return a.start_index - b.start_index + Compare = staticmethod(Compare) diff --git a/third_party/closure_linter/closure_linter/common/erroraccumulator.py b/third_party/closure_linter/closure_linter/common/erroraccumulator.py new file mode 100755 index 0000000..55844ba --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/erroraccumulator.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linter error handler class that accumulates an array of errors.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +from closure_linter.common import errorhandler + + +class ErrorAccumulator(errorhandler.ErrorHandler): + """Error handler object that accumulates errors in a list.""" + + def __init__(self): + self._errors = [] + + def HandleError(self, error): + """Append the error to the list. + + Args: + error: The error object + """ + self._errors.append(error) + + def GetErrors(self): + """Returns the accumulated errors. + + Returns: + A sequence of errors. + """ + return self._errors diff --git a/third_party/closure_linter/closure_linter/common/errorhandler.py b/third_party/closure_linter/closure_linter/common/errorhandler.py new file mode 100755 index 0000000..764d54d --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/errorhandler.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Interface for a linter error handler. + +Error handlers aggregate a set of errors from multiple files and can optionally +perform some action based on the reported errors, for example, logging the error +or automatically fixing it. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class ErrorHandler(object): + """Error handler interface.""" + + def __init__(self): + if self.__class__ == ErrorHandler: + raise NotImplementedError('class ErrorHandler is abstract') + + def HandleFile(self, filename, first_token): + """Notifies this ErrorHandler that subsequent errors are in filename. + + Args: + filename: The file being linted. + first_token: The first token of the file. + """ + + def HandleError(self, error): + """Append the error to the list. + + Args: + error: The error object + """ + + def FinishFile(self): + """Finishes handling the current file. + + Should be called after all errors in a file have been handled. + """ + + def GetErrors(self): + """Returns the accumulated errors. + + Returns: + A sequence of errors. + """ diff --git a/third_party/closure_linter/closure_linter/common/erroroutput.py b/third_party/closure_linter/closure_linter/common/erroroutput.py new file mode 100644 index 0000000..149738b --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/erroroutput.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# +# Copyright 2012 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions to format errors.""" + + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'nnaze@google.com (Nathan Naze)') + + +def GetUnixErrorOutput(filename, error, new_error=False): + """Get a output line for an error in UNIX format.""" + + line = '' + + if error.token: + line = '%d' % error.token.line_number + + error_code = '%04d' % error.code + if new_error: + error_code = 'New Error ' + error_code + return '%s:%s:(%s) %s' % (filename, line, error_code, error.message) + + +def GetErrorOutput(error, new_error=False): + """Get a output line for an error in regular format.""" + + line = '' + if error.token: + line = 'Line %d, ' % error.token.line_number + + code = 'E:%04d' % error.code + + error_message = error.message + if new_error: + error_message = 'New Error ' + error_message + + return '%s%s: %s' % (line, code, error.message) diff --git a/third_party/closure_linter/closure_linter/common/filetestcase.py b/third_party/closure_linter/closure_linter/common/filetestcase.py new file mode 100755 index 0000000..03b5ece --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/filetestcase.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test case that runs a checker on a file, matching errors against annotations. + +Runs the given checker on the given file, accumulating all errors. The list +of errors is then matched against those annotated in the file. Based heavily +on devtools/javascript/gpylint/full_test.py. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re + +import unittest as googletest +from closure_linter.common import erroraccumulator + + +class AnnotatedFileTestCase(googletest.TestCase): + """Test case to run a linter against a single file.""" + + # Matches an all caps letters + underscores error identifer + _MESSAGE = {'msg': '[A-Z][A-Z_]+'} + # Matches a //, followed by an optional line number with a +/-, followed by a + # list of message IDs. Used to extract expected messages from testdata files. + # TODO(robbyw): Generalize to use different commenting patterns. + _EXPECTED_RE = re.compile(r'\s*//\s*(?:(?P<line>[+-]?[0-9]+):)?' + r'\s*(?P<msgs>%(msg)s(?:,\s*%(msg)s)*)' % _MESSAGE) + + def __init__(self, filename, runner, converter): + """Create a single file lint test case. + + Args: + filename: Filename to test. + runner: Object implementing the LintRunner interface that lints a file. + converter: Function taking an error string and returning an error code. + """ + + googletest.TestCase.__init__(self, 'runTest') + self._filename = filename + self._messages = [] + self._runner = runner + self._converter = converter + + def shortDescription(self): + """Provides a description for the test.""" + return 'Run linter on %s' % self._filename + + def runTest(self): + """Runs the test.""" + try: + filename = self._filename + stream = open(filename) + except IOError, ex: + raise IOError('Could not find testdata resource for %s: %s' % + (self._filename, ex)) + + expected = self._GetExpectedMessages(stream) + got = self._ProcessFileAndGetMessages(filename) + self.assertEqual(expected, got) + + def _GetExpectedMessages(self, stream): + """Parse a file and get a sorted list of expected messages.""" + messages = [] + for i, line in enumerate(stream): + match = self._EXPECTED_RE.search(line) + if match: + line = match.group('line') + msg_ids = match.group('msgs') + if line is None: + line = i + 1 + elif line.startswith('+') or line.startswith('-'): + line = i + 1 + int(line) + else: + line = int(line) + for msg_id in msg_ids.split(','): + # Ignore a spurious message from the license preamble. + if msg_id != 'WITHOUT': + messages.append((line, self._converter(msg_id.strip()))) + stream.seek(0) + messages.sort() + return messages + + def _ProcessFileAndGetMessages(self, filename): + """Trap gpylint's output parse it to get messages added.""" + errors = erroraccumulator.ErrorAccumulator() + self._runner.Run([filename], errors) + + errors = errors.GetErrors() + + # Convert to expected tuple format. + error_msgs = [(error.token.line_number, error.code) for error in errors] + error_msgs.sort() + return error_msgs diff --git a/third_party/closure_linter/closure_linter/common/htmlutil.py b/third_party/closure_linter/closure_linter/common/htmlutil.py new file mode 100755 index 0000000..26d44c5 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/htmlutil.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for dealing with HTML.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +import cStringIO +import formatter +import htmllib +import HTMLParser +import re + + +class ScriptExtractor(htmllib.HTMLParser): + """Subclass of HTMLParser that extracts script contents from an HTML file. + + Also inserts appropriate blank lines so that line numbers in the extracted + code match the line numbers in the original HTML. + """ + + def __init__(self): + """Initialize a ScriptExtractor.""" + htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) + self._in_script = False + self._text = '' + + def start_script(self, attrs): + """Internal handler for the start of a script tag. + + Args: + attrs: The attributes of the script tag, as a list of tuples. + """ + for attribute in attrs: + if attribute[0].lower() == 'src': + # Skip script tags with a src specified. + return + self._in_script = True + + def end_script(self): + """Internal handler for the end of a script tag.""" + self._in_script = False + + def handle_data(self, data): + """Internal handler for character data. + + Args: + data: The character data from the HTML file. + """ + if self._in_script: + # If the last line contains whitespace only, i.e. is just there to + # properly align a </script> tag, strip the whitespace. + if data.rstrip(' \t') != data.rstrip(' \t\n\r\f'): + data = data.rstrip(' \t') + self._text += data + else: + self._AppendNewlines(data) + + def handle_comment(self, data): + """Internal handler for HTML comments. + + Args: + data: The text of the comment. + """ + self._AppendNewlines(data) + + def _AppendNewlines(self, data): + """Count the number of newlines in the given string and append them. + + This ensures line numbers are correct for reported errors. + + Args: + data: The data to count newlines in. + """ + # We append 'x' to both sides of the string to ensure that splitlines + # gives us an accurate count. + for i in xrange(len(('x' + data + 'x').splitlines()) - 1): + self._text += '\n' + + def GetScriptLines(self): + """Return the extracted script lines. + + Returns: + The extracted script lines as a list of strings. + """ + return self._text.splitlines() + + +def GetScriptLines(f): + """Extract script tag contents from the given HTML file. + + Args: + f: The HTML file. + + Returns: + Lines in the HTML file that are from script tags. + """ + extractor = ScriptExtractor() + + # The HTML parser chokes on text like Array.<!string>, so we patch + # that bug by replacing the < with < - escaping all text inside script + # tags would be better but it's a bit of a catch 22. + contents = f.read() + contents = re.sub(r'<([^\s\w/])', + lambda x: '<%s' % x.group(1), + contents) + + extractor.feed(contents) + extractor.close() + return extractor.GetScriptLines() + + +def StripTags(str): + """Returns the string with HTML tags stripped. + + Args: + str: An html string. + + Returns: + The html string with all tags stripped. If there was a parse error, returns + the text successfully parsed so far. + """ + # Brute force approach to stripping as much HTML as possible. If there is a + # parsing error, don't strip text before parse error position, and continue + # trying from there. + final_text = '' + finished = False + while not finished: + try: + strip = _HtmlStripper() + strip.feed(str) + strip.close() + str = strip.get_output() + final_text += str + finished = True + except HTMLParser.HTMLParseError, e: + final_text += str[:e.offset] + str = str[e.offset + 1:] + + return final_text + + +class _HtmlStripper(HTMLParser.HTMLParser): + """Simple class to strip tags from HTML. + + Does so by doing nothing when encountering tags, and appending character data + to a buffer when that is encountered. + """ + def __init__(self): + self.reset() + self.__output = cStringIO.StringIO() + + def handle_data(self, d): + self.__output.write(d) + + def get_output(self): + return self.__output.getvalue() diff --git a/third_party/closure_linter/closure_linter/common/lintrunner.py b/third_party/closure_linter/closure_linter/common/lintrunner.py new file mode 100755 index 0000000..07842c7 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/lintrunner.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Interface for a lint running wrapper.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class LintRunner(object): + """Interface for a lint running wrapper.""" + + def __init__(self): + if self.__class__ == LintRunner: + raise NotImplementedError('class LintRunner is abstract') + + def Run(self, filenames, error_handler): + """Run a linter on the given filenames. + + Args: + filenames: The filenames to check + error_handler: An ErrorHandler object + + Returns: + The error handler, which may have been used to collect error info. + """ diff --git a/third_party/closure_linter/closure_linter/common/matcher.py b/third_party/closure_linter/closure_linter/common/matcher.py new file mode 100755 index 0000000..9b4402c --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/matcher.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based JavaScript matcher classes.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import position +from closure_linter.common import tokens + +# Shorthand +Token = tokens.Token +Position = position.Position + + +class Matcher(object): + """A token matcher. + + Specifies a pattern to match, the type of token it represents, what mode the + token changes to, and what mode the token applies to. + + Modes allow more advanced grammars to be incorporated, and are also necessary + to tokenize line by line. We can have different patterns apply to different + modes - i.e. looking for documentation while in comment mode. + + Attributes: + regex: The regular expression representing this matcher. + type: The type of token indicated by a successful match. + result_mode: The mode to move to after a successful match. + """ + + def __init__(self, regex, token_type, result_mode=None, line_start=False): + """Create a new matcher template. + + Args: + regex: The regular expression to match. + token_type: The type of token a successful match indicates. + result_mode: What mode to change to after a successful match. Defaults to + None, which means to not change the current mode. + line_start: Whether this matcher should only match string at the start + of a line. + """ + self.regex = regex + self.type = token_type + self.result_mode = result_mode + self.line_start = line_start diff --git a/third_party/closure_linter/closure_linter/common/position.py b/third_party/closure_linter/closure_linter/common/position.py new file mode 100755 index 0000000..cebf17e --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/position.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent positions within strings.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class Position(object): + """Object representing a segment of a string. + + Attributes: + start: The index in to the string where the segment starts. + length: The length of the string segment. + """ + + def __init__(self, start, length): + """Initialize the position object. + + Args: + start: The start index. + length: The number of characters to include. + """ + self.start = start + self.length = length + + def Get(self, string): + """Returns this range of the given string. + + Args: + string: The string to slice. + + Returns: + The string within the range specified by this object. + """ + return string[self.start:self.start + self.length] + + def Set(self, target, source): + """Sets this range within the target string to the source string. + + Args: + target: The target string. + source: The source string. + + Returns: + The resulting string + """ + return target[:self.start] + source + target[self.start + self.length:] + + def AtEnd(string): + """Create a Position representing the end of the given string. + + Args: + string: The string to represent the end of. + + Returns: + The created Position object. + """ + return Position(len(string), 0) + AtEnd = staticmethod(AtEnd) + + def IsAtEnd(self, string): + """Returns whether this position is at the end of the given string. + + Args: + string: The string to test for the end of. + + Returns: + Whether this position is at the end of the given string. + """ + return self.start == len(string) and self.length == 0 + + def AtBeginning(): + """Create a Position representing the beginning of any string. + + Returns: + The created Position object. + """ + return Position(0, 0) + AtBeginning = staticmethod(AtBeginning) + + def IsAtBeginning(self): + """Returns whether this position is at the beginning of any string. + + Returns: + Whether this position is at the beginning of any string. + """ + return self.start == 0 and self.length == 0 + + def All(string): + """Create a Position representing the entire string. + + Args: + string: The string to represent the entirety of. + + Returns: + The created Position object. + """ + return Position(0, len(string)) + All = staticmethod(All) + + def Index(index): + """Returns a Position object for the specified index. + + Args: + index: The index to select, inclusively. + + Returns: + The created Position object. + """ + return Position(index, 1) + Index = staticmethod(Index) diff --git a/third_party/closure_linter/closure_linter/common/simplefileflags.py b/third_party/closure_linter/closure_linter/common/simplefileflags.py new file mode 100755 index 0000000..3402bef --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/simplefileflags.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Determines the list of files to be checked from command line arguments.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import glob +import os +import re + +import gflags as flags + + +FLAGS = flags.FLAGS + +flags.DEFINE_multistring( + 'recurse', + None, + 'Recurse in to the subdirectories of the given path', + short_name='r') +flags.DEFINE_list( + 'exclude_directories', + ('_demos'), + 'Exclude the specified directories (only applicable along with -r or ' + '--presubmit)', + short_name='e') +flags.DEFINE_list( + 'exclude_files', + ('deps.js'), + 'Exclude the specified files', + short_name='x') + + +def MatchesSuffixes(filename, suffixes): + """Returns whether the given filename matches one of the given suffixes. + + Args: + filename: Filename to check. + suffixes: Sequence of suffixes to check. + + Returns: + Whether the given filename matches one of the given suffixes. + """ + suffix = filename[filename.rfind('.'):] + return suffix in suffixes + + +def _GetUserSpecifiedFiles(argv, suffixes): + """Returns files to be linted, specified directly on the command line. + + Can handle the '*' wildcard in filenames, but no other wildcards. + + Args: + argv: Sequence of command line arguments. The second and following arguments + are assumed to be files that should be linted. + suffixes: Expected suffixes for the file type being checked. + + Returns: + A sequence of files to be linted. + """ + files = argv[1:] or [] + all_files = [] + lint_files = [] + + # Perform any necessary globs. + for f in files: + if f.find('*') != -1: + for result in glob.glob(f): + all_files.append(result) + else: + all_files.append(f) + + for f in all_files: + if MatchesSuffixes(f, suffixes): + lint_files.append(f) + return lint_files + + +def _GetRecursiveFiles(suffixes): + """Returns files to be checked specified by the --recurse flag. + + Args: + suffixes: Expected suffixes for the file type being checked. + + Returns: + A list of files to be checked. + """ + lint_files = [] + # Perform any request recursion + if FLAGS.recurse: + for start in FLAGS.recurse: + for root, subdirs, files in os.walk(start): + for f in files: + if MatchesSuffixes(f, suffixes): + lint_files.append(os.path.join(root, f)) + return lint_files + + +def GetAllSpecifiedFiles(argv, suffixes): + """Returns all files specified by the user on the commandline. + + Args: + argv: Sequence of command line arguments. The second and following arguments + are assumed to be files that should be linted. + suffixes: Expected suffixes for the file type + + Returns: + A list of all files specified directly or indirectly (via flags) on the + command line by the user. + """ + files = _GetUserSpecifiedFiles(argv, suffixes) + + if FLAGS.recurse: + files += _GetRecursiveFiles(suffixes) + + return FilterFiles(files) + + +def FilterFiles(files): + """Filters the list of files to be linted be removing any excluded files. + + Filters out files excluded using --exclude_files and --exclude_directories. + + Args: + files: Sequence of files that needs filtering. + + Returns: + Filtered list of files to be linted. + """ + num_files = len(files) + + ignore_dirs_regexs = [] + for ignore in FLAGS.exclude_directories: + ignore_dirs_regexs.append(re.compile(r'(^|[\\/])%s[\\/]' % ignore)) + + result_files = [] + for f in files: + add_file = True + for exclude in FLAGS.exclude_files: + if f.endswith('/' + exclude) or f == exclude: + add_file = False + break + for ignore in ignore_dirs_regexs: + if ignore.search(f): + # Break out of ignore loop so we don't add to + # filtered files. + add_file = False + break + if add_file: + # Convert everything to absolute paths so we can easily remove duplicates + # using a set. + result_files.append(os.path.abspath(f)) + + skipped = num_files - len(result_files) + if skipped: + print 'Skipping %d file(s).' % skipped + + return set(result_files) + + +def GetFileList(argv, file_type, suffixes): + """Parse the flags and return the list of files to check. + + Args: + argv: Sequence of command line arguments. + suffixes: Sequence of acceptable suffixes for the file type. + + Returns: + The list of files to check. + """ + return sorted(GetAllSpecifiedFiles(argv, suffixes)) + + +def IsEmptyArgumentList(argv): + return not (len(argv[1:]) or FLAGS.recurse) diff --git a/third_party/closure_linter/closure_linter/common/tokenizer.py b/third_party/closure_linter/closure_linter/common/tokenizer.py new file mode 100755 index 0000000..0234720 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/tokenizer.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based lexer.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import tokens + +# Shorthand +Type = tokens.TokenType + + +class Tokenizer(object): + """General purpose tokenizer. + + Attributes: + mode: The latest mode of the tokenizer. This allows patterns to distinguish + if they are mid-comment, mid-parameter list, etc. + matchers: Dictionary of modes to sequences of matchers that define the + patterns to check at any given time. + default_types: Dictionary of modes to types, defining what type to give + non-matched text when in the given mode. Defaults to Type.NORMAL. + """ + + def __init__(self, starting_mode, matchers, default_types): + """Initialize the tokenizer. + + Args: + starting_mode: Mode to start in. + matchers: Dictionary of modes to sequences of matchers that defines the + patterns to check at any given time. + default_types: Dictionary of modes to types, defining what type to give + non-matched text when in the given mode. Defaults to Type.NORMAL. + """ + self.__starting_mode = starting_mode + self.matchers = matchers + self.default_types = default_types + + def TokenizeFile(self, file): + """Tokenizes the given file. + + Args: + file: An iterable that yields one line of the file at a time. + + Returns: + The first token in the file + """ + # The current mode. + self.mode = self.__starting_mode + # The first token in the stream. + self.__first_token = None + # The last token added to the token stream. + self.__last_token = None + # The current line number. + self.__line_number = 0 + + for line in file: + self.__line_number += 1 + self.__TokenizeLine(line) + + return self.__first_token + + def _CreateToken(self, string, token_type, line, line_number, values=None): + """Creates a new Token object (or subclass). + + Args: + string: The string of input the token represents. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + + Returns: + The newly created Token object. + """ + return tokens.Token(string, token_type, line, line_number, values) + + def __TokenizeLine(self, line): + """Tokenizes the given line. + + Args: + line: The contents of the line. + """ + string = line.rstrip('\n\r\f') + line_number = self.__line_number + self.__start_index = 0 + + if not string: + self.__AddToken(self._CreateToken('', Type.BLANK_LINE, line, line_number)) + return + + normal_token = '' + index = 0 + while index < len(string): + for matcher in self.matchers[self.mode]: + if matcher.line_start and index > 0: + continue + + match = matcher.regex.match(string, index) + + if match: + if normal_token: + self.__AddToken( + self.__CreateNormalToken(self.mode, normal_token, line, + line_number)) + normal_token = '' + + # Add the match. + self.__AddToken(self._CreateToken(match.group(), matcher.type, line, + line_number, match.groupdict())) + + # Change the mode to the correct one for after this match. + self.mode = matcher.result_mode or self.mode + + # Shorten the string to be matched. + index = match.end() + + break + + else: + # If the for loop finishes naturally (i.e. no matches) we just add the + # first character to the string of consecutive non match characters. + # These will constitute a NORMAL token. + if string: + normal_token += string[index:index + 1] + index += 1 + + if normal_token: + self.__AddToken( + self.__CreateNormalToken(self.mode, normal_token, line, line_number)) + + def __CreateNormalToken(self, mode, string, line, line_number): + """Creates a normal token. + + Args: + mode: The current mode. + string: The string to tokenize. + line: The line of text. + line_number: The line number within the file. + + Returns: + A Token object, of the default type for the current mode. + """ + type = Type.NORMAL + if mode in self.default_types: + type = self.default_types[mode] + return self._CreateToken(string, type, line, line_number) + + def __AddToken(self, token): + """Add the given token to the token stream. + + Args: + token: The token to add. + """ + # Store the first token, or point the previous token to this one. + if not self.__first_token: + self.__first_token = token + else: + self.__last_token.next = token + + # Establish the doubly linked list + token.previous = self.__last_token + self.__last_token = token + + # Compute the character indices + token.start_index = self.__start_index + self.__start_index += token.length diff --git a/third_party/closure_linter/closure_linter/common/tokens.py b/third_party/closure_linter/closure_linter/common/tokens.py new file mode 100755 index 0000000..4c7d818 --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/tokens.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent tokens and positions within them.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +class TokenType(object): + """Token types common to all languages.""" + NORMAL = 'normal' + WHITESPACE = 'whitespace' + BLANK_LINE = 'blank line' + + +class Token(object): + """Token class for intelligent text splitting. + + The token class represents a string of characters and an identifying type. + + Attributes: + type: The type of token. + string: The characters the token comprises. + length: The length of the token. + line: The text of the line the token is found in. + line_number: The number of the line the token is found in. + values: Dictionary of values returned from the tokens regex match. + previous: The token before this one. + next: The token after this one. + start_index: The character index in the line where this token starts. + attached_object: Object containing more information about this token. + metadata: Object containing metadata about this token. Must be added by + a separate metadata pass. + """ + + def __init__(self, string, token_type, line, line_number, values=None): + """Creates a new Token object. + + Args: + string: The string of input the token contains. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + """ + self.type = token_type + self.string = string + self.length = len(string) + self.line = line + self.line_number = line_number + self.values = values + + # These parts can only be computed when the file is fully tokenized + self.previous = None + self.next = None + self.start_index = None + + # This part is set in statetracker.py + # TODO(robbyw): Wrap this in to metadata + self.attached_object = None + + # This part is set in *metadatapass.py + self.metadata = None + + def IsFirstInLine(self): + """Tests if this token is the first token in its line. + + Returns: + Whether the token is the first token in its line. + """ + return not self.previous or self.previous.line_number != self.line_number + + def IsLastInLine(self): + """Tests if this token is the last token in its line. + + Returns: + Whether the token is the last token in its line. + """ + return not self.next or self.next.line_number != self.line_number + + def IsType(self, token_type): + """Tests if this token is of the given type. + + Args: + token_type: The type to test for. + + Returns: + True if the type of this token matches the type passed in. + """ + return self.type == token_type + + def IsAnyType(self, *token_types): + """Tests if this token is any of the given types. + + Args: + token_types: The types to check. Also accepts a single array. + + Returns: + True if the type of this token is any of the types passed in. + """ + if not isinstance(token_types[0], basestring): + return self.type in token_types[0] + else: + return self.type in token_types + + def __repr__(self): + return '<Token: %s, "%s", %r, %d, %r>' % (self.type, self.string, + self.values, self.line_number, + self.metadata) + + def __iter__(self): + """Returns a token iterator.""" + node = self + while node: + yield node + node = node.next + + def __reversed__(self): + """Returns a reverse-direction token iterator.""" + node = self + while node: + yield node + node = node.previous diff --git a/third_party/closure_linter/closure_linter/common/tokens_test.py b/third_party/closure_linter/closure_linter/common/tokens_test.py new file mode 100644 index 0000000..79ac0ae --- /dev/null +++ b/third_party/closure_linter/closure_linter/common/tokens_test.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# Copyright 2011 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + + +import unittest as googletest +from closure_linter.common import tokens + + +def _CreateDummyToken(): + return tokens.Token('foo', None, 1, 1) + + +def _CreateDummyTokens(count): + dummy_tokens = [] + for _ in xrange(count): + dummy_tokens.append(_CreateDummyToken()) + return dummy_tokens + + +def _SetTokensAsNeighbors(neighbor_tokens): + for i in xrange(len(neighbor_tokens)): + prev_index = i - 1 + next_index = i + 1 + + if prev_index >= 0: + neighbor_tokens[i].previous = neighbor_tokens[prev_index] + + if next_index < len(neighbor_tokens): + neighbor_tokens[i].next = neighbor_tokens[next_index] + + +class TokensTest(googletest.TestCase): + + def testIsFirstInLine(self): + + # First token in file (has no previous). + self.assertTrue(_CreateDummyToken().IsFirstInLine()) + + a, b = _CreateDummyTokens(2) + _SetTokensAsNeighbors([a, b]) + + # Tokens on same line + a.line_number = 30 + b.line_number = 30 + + self.assertFalse(b.IsFirstInLine()) + + # Tokens on different lines + b.line_number = 31 + self.assertTrue(b.IsFirstInLine()) + + def testIsLastInLine(self): + # Last token in file (has no next). + self.assertTrue(_CreateDummyToken().IsLastInLine()) + + a, b = _CreateDummyTokens(2) + _SetTokensAsNeighbors([a, b]) + + # Tokens on same line + a.line_number = 30 + b.line_number = 30 + self.assertFalse(a.IsLastInLine()) + + b.line_number = 31 + self.assertTrue(a.IsLastInLine()) + + def testIsType(self): + a = tokens.Token('foo', 'fakeType1', 1, 1) + self.assertTrue(a.IsType('fakeType1')) + self.assertFalse(a.IsType('fakeType2')) + + def testIsAnyType(self): + a = tokens.Token('foo', 'fakeType1', 1, 1) + self.assertTrue(a.IsAnyType(['fakeType1', 'fakeType2'])) + self.assertFalse(a.IsAnyType(['fakeType3', 'fakeType4'])) + + def testRepr(self): + a = tokens.Token('foo', 'fakeType1', 1, 1) + self.assertEquals('<Token: fakeType1, "foo", None, 1, None>', str(a)) + + def testIter(self): + dummy_tokens = _CreateDummyTokens(5) + _SetTokensAsNeighbors(dummy_tokens) + a, b, c, d, e = dummy_tokens + + i = iter(a) + self.assertListEqual([a, b, c, d, e], list(i)) + + def testReverseIter(self): + dummy_tokens = _CreateDummyTokens(5) + _SetTokensAsNeighbors(dummy_tokens) + a, b, c, d, e = dummy_tokens + + ri = reversed(e) + self.assertListEqual([e, d, c, b, a], list(ri)) + + +if __name__ == '__main__': + googletest.main() diff --git a/third_party/closure_linter/closure_linter/ecmalintrules.py b/third_party/closure_linter/closure_linter/ecmalintrules.py new file mode 100755 index 0000000..1187f51 --- /dev/null +++ b/third_party/closure_linter/closure_linter/ecmalintrules.py @@ -0,0 +1,786 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core methods for checking EcmaScript files for common style guide violations. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import re + +from closure_linter import checkerbase +from closure_linter import ecmametadatapass +from closure_linter import error_check +from closure_linter import errors +from closure_linter import indentation +from closure_linter import javascripttokens +from closure_linter import javascripttokenizer +from closure_linter import statetracker +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import htmlutil +from closure_linter.common import lintrunner +from closure_linter.common import position +from closure_linter.common import tokens +import gflags as flags + +FLAGS = flags.FLAGS +flags.DEFINE_list('custom_jsdoc_tags', '', 'Extra jsdoc tags to allow') + +# TODO(robbyw): Check for extra parens on return statements +# TODO(robbyw): Check for 0px in strings +# TODO(robbyw): Ensure inline jsDoc is in {} +# TODO(robbyw): Check for valid JS types in parameter docs + +# Shorthand +Context = ecmametadatapass.EcmaContext +Error = error.Error +Modes = javascripttokenizer.JavaScriptModes +Position = position.Position +Rule = error_check.Rule +Type = javascripttokens.JavaScriptTokenType + +class EcmaScriptLintRules(checkerbase.LintRulesBase): + """EmcaScript lint style checking rules. + + Can be used to find common style errors in JavaScript, ActionScript and other + Ecma like scripting languages. Style checkers for Ecma scripting languages + should inherit from this style checker. + Please do not add any state to EcmaScriptLintRules or to any subclasses. + + All state should be added to the StateTracker subclass used for a particular + language. + """ + + # Static constants. + MAX_LINE_LENGTH = 80 + + MISSING_PARAMETER_SPACE = re.compile(r',\S') + + EXTRA_SPACE = re.compile('(\(\s|\s\))') + + ENDS_WITH_SPACE = re.compile('\s$') + + ILLEGAL_TAB = re.compile(r'\t') + + # Regex used to split up complex types to check for invalid use of ? and |. + TYPE_SPLIT = re.compile(r'[,<>()]') + + # Regex for form of author lines after the @author tag. + AUTHOR_SPEC = re.compile(r'(\s*)[^\s]+@[^(\s]+(\s*)\(.+\)') + + # Acceptable tokens to remove for line too long testing. + LONG_LINE_IGNORE = frozenset(['*', '//', '@see'] + + ['@%s' % tag for tag in statetracker.DocFlag.HAS_TYPE]) + + def __init__(self): + """Initialize this lint rule object.""" + checkerbase.LintRulesBase.__init__(self) + + def Initialize(self, checker, limited_doc_checks, is_html): + """Initialize this lint rule object before parsing a new file.""" + checkerbase.LintRulesBase.Initialize(self, checker, limited_doc_checks, + is_html) + self._indentation = indentation.IndentationRules() + + def HandleMissingParameterDoc(self, token, param_name): + """Handle errors associated with a parameter missing a @param tag.""" + raise TypeError('Abstract method HandleMissingParameterDoc not implemented') + + def _CheckLineLength(self, last_token, state): + """Checks whether the line is too long. + + Args: + last_token: The last token in the line. + """ + # Start from the last token so that we have the flag object attached to + # and DOC_FLAG tokens. + line_number = last_token.line_number + token = last_token + + # Build a representation of the string where spaces indicate potential + # line-break locations. + line = [] + while token and token.line_number == line_number: + if state.IsTypeToken(token): + line.insert(0, 'x' * len(token.string)) + elif token.type in (Type.IDENTIFIER, Type.NORMAL): + # Dots are acceptable places to wrap. + line.insert(0, token.string.replace('.', ' ')) + else: + line.insert(0, token.string) + token = token.previous + + line = ''.join(line) + line = line.rstrip('\n\r\f') + try: + length = len(unicode(line, 'utf-8')) + except: + # Unknown encoding. The line length may be wrong, as was originally the + # case for utf-8 (see bug 1735846). For now just accept the default + # length, but as we find problems we can either add test for other + # possible encodings or return without an error to protect against + # false positives at the cost of more false negatives. + length = len(line) + + if length > self.MAX_LINE_LENGTH: + + # If the line matches one of the exceptions, then it's ok. + for long_line_regexp in self.GetLongLineExceptions(): + if long_line_regexp.match(last_token.line): + return + + # If the line consists of only one "word", or multiple words but all + # except one are ignoreable, then it's ok. + parts = set(line.split()) + + # We allow two "words" (type and name) when the line contains @param + max = 1 + if '@param' in parts: + max = 2 + + # Custom tags like @requires may have url like descriptions, so ignore + # the tag, similar to how we handle @see. + custom_tags = set(['@%s' % f for f in FLAGS.custom_jsdoc_tags]) + if (len(parts.difference(self.LONG_LINE_IGNORE | custom_tags)) > max): + self._HandleError(errors.LINE_TOO_LONG, + 'Line too long (%d characters).' % len(line), last_token) + + def _CheckJsDocType(self, token): + """Checks the given type for style errors. + + Args: + token: The DOC_FLAG token for the flag whose type to check. + """ + flag = token.attached_object + type = flag.type + if type and type is not None and not type.isspace(): + pieces = self.TYPE_SPLIT.split(type) + if len(pieces) == 1 and type.count('|') == 1 and ( + type.endswith('|null') or type.startswith('null|')): + self._HandleError(errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL, + 'Prefer "?Type" to "Type|null": "%s"' % type, token) + + for p in pieces: + if p.count('|') and p.count('?'): + # TODO(robbyw): We should do actual parsing of JsDoc types. As is, + # this won't report an error for {number|Array.<string>?}, etc. + self._HandleError(errors.JSDOC_ILLEGAL_QUESTION_WITH_PIPE, + 'JsDoc types cannot contain both "?" and "|": "%s"' % p, token) + + if error_check.ShouldCheck(Rule.BRACES_AROUND_TYPE) and ( + flag.type_start_token.type != Type.DOC_START_BRACE or + flag.type_end_token.type != Type.DOC_END_BRACE): + self._HandleError(errors.MISSING_BRACES_AROUND_TYPE, + 'Type must always be surrounded by curly braces.', token) + + def _CheckForMissingSpaceBeforeToken(self, token): + """Checks for a missing space at the beginning of a token. + + Reports a MISSING_SPACE error if the token does not begin with a space or + the previous token doesn't end with a space and the previous token is on the + same line as the token. + + Args: + token: The token being checked + """ + # TODO(user): Check if too many spaces? + if (len(token.string) == len(token.string.lstrip()) and + token.previous and token.line_number == token.previous.line_number and + len(token.previous.string) - len(token.previous.string.rstrip()) == 0): + self._HandleError( + errors.MISSING_SPACE, + 'Missing space before "%s"' % token.string, + token, + Position.AtBeginning()) + + def _ExpectSpaceBeforeOperator(self, token): + """Returns whether a space should appear before the given operator token. + + Args: + token: The operator token. + + Returns: + Whether there should be a space before the token. + """ + if token.string == ',' or token.metadata.IsUnaryPostOperator(): + return False + + # Colons should appear in labels, object literals, the case of a switch + # statement, and ternary operator. Only want a space in the case of the + # ternary operator. + if (token.string == ':' and + token.metadata.context.type in (Context.LITERAL_ELEMENT, + Context.CASE_BLOCK, + Context.STATEMENT)): + return False + + if token.metadata.IsUnaryOperator() and token.IsFirstInLine(): + return False + + return True + + def CheckToken(self, token, state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration + state: parser_state object that indicates the current state in the page + """ + # Store some convenience variables + first_in_line = token.IsFirstInLine() + last_in_line = token.IsLastInLine() + last_non_space_token = state.GetLastNonSpaceToken() + + type = token.type + + # Process the line change. + if not self._is_html and error_check.ShouldCheck(Rule.INDENTATION): + # TODO(robbyw): Support checking indentation in HTML files. + indentation_errors = self._indentation.CheckToken(token, state) + for indentation_error in indentation_errors: + self._HandleError(*indentation_error) + + if last_in_line: + self._CheckLineLength(token, state) + + if type == Type.PARAMETERS: + # Find missing spaces in parameter lists. + if self.MISSING_PARAMETER_SPACE.search(token.string): + self._HandleError(errors.MISSING_SPACE, 'Missing space after ","', + token) + + # Find extra spaces at the beginning of parameter lists. Make sure + # we aren't at the beginning of a continuing multi-line list. + if not first_in_line: + space_count = len(token.string) - len(token.string.lstrip()) + if space_count: + self._HandleError(errors.EXTRA_SPACE, 'Extra space after "("', + token, Position(0, space_count)) + + elif (type == Type.START_BLOCK and + token.metadata.context.type == Context.BLOCK): + self._CheckForMissingSpaceBeforeToken(token) + + elif type == Type.END_BLOCK: + # This check is for object literal end block tokens, but there is no need + # to test that condition since a comma at the end of any other kind of + # block is undoubtedly a parse error. + last_code = token.metadata.last_code + if last_code.IsOperator(','): + self._HandleError(errors.COMMA_AT_END_OF_LITERAL, + 'Illegal comma at end of object literal', last_code, + Position.All(last_code.string)) + + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + if state.InTopLevelFunction(): + # When the function was top-level and not immediately called, check + # that it's terminated by a semi-colon. + if state.InAssignedFunction(): + if not is_immediately_called and (last_in_line or + not token.next.type == Type.SEMICOLON): + self._HandleError(errors.MISSING_SEMICOLON_AFTER_FUNCTION, + 'Missing semicolon after function assigned to a variable', + token, Position.AtEnd(token.string)) + else: + if not last_in_line and token.next.type == Type.SEMICOLON: + self._HandleError(errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION, + 'Illegal semicolon after function declaration', + token.next, Position.All(token.next.string)) + + if (state.InInterfaceMethod() and last_code.type != Type.START_BLOCK): + self._HandleError(errors.INTERFACE_METHOD_CANNOT_HAVE_CODE, + 'Interface methods cannot contain code', last_code) + + elif (state.IsBlockClose() and + token.next and token.next.type == Type.SEMICOLON): + self._HandleError(errors.REDUNDANT_SEMICOLON, + 'No semicolon is required to end a code block', + token.next, Position.All(token.next.string)) + + elif type == Type.SEMICOLON: + if token.previous and token.previous.type == Type.WHITESPACE: + self._HandleError(errors.EXTRA_SPACE, 'Extra space before ";"', + token.previous, Position.All(token.previous.string)) + + if token.next and token.next.line_number == token.line_number: + if token.metadata.context.type != Context.FOR_GROUP_BLOCK: + # TODO(robbyw): Error about no multi-statement lines. + pass + + elif token.next.type not in ( + Type.WHITESPACE, Type.SEMICOLON, Type.END_PAREN): + self._HandleError(errors.MISSING_SPACE, + 'Missing space after ";" in for statement', + token.next, + Position.AtBeginning()) + + last_code = token.metadata.last_code + if last_code and last_code.type == Type.SEMICOLON: + # Allow a single double semi colon in for loops for cases like: + # for (;;) { }. + # NOTE(user): This is not a perfect check, and will not throw an error + # for cases like: for (var i = 0;; i < n; i++) {}, but then your code + # probably won't work either. + for_token = tokenutil.CustomSearch(last_code, + lambda token: token.type == Type.KEYWORD and token.string == 'for', + end_func=lambda token: token.type == Type.SEMICOLON, + distance=None, + reverse=True) + + if not for_token: + self._HandleError(errors.REDUNDANT_SEMICOLON, 'Redundant semicolon', + token, Position.All(token.string)) + + elif type == Type.START_PAREN: + if token.previous and token.previous.type == Type.KEYWORD: + self._HandleError(errors.MISSING_SPACE, 'Missing space before "("', + token, Position.AtBeginning()) + elif token.previous and token.previous.type == Type.WHITESPACE: + before_space = token.previous.previous + if (before_space and before_space.line_number == token.line_number and + before_space.type == Type.IDENTIFIER): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "("', + token.previous, Position.All(token.previous.string)) + + elif type == Type.START_BRACKET: + self._HandleStartBracket(token, last_non_space_token) + elif type in (Type.END_PAREN, Type.END_BRACKET): + # Ensure there is no space before closing parentheses, except when + # it's in a for statement with an omitted section, or when it's at the + # beginning of a line. + if (token.previous and token.previous.type == Type.WHITESPACE and + not token.previous.IsFirstInLine() and + not (last_non_space_token and last_non_space_token.line_number == + token.line_number and + last_non_space_token.type == Type.SEMICOLON)): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "%s"' % + token.string, token.previous, Position.All(token.previous.string)) + + if token.type == Type.END_BRACKET: + last_code = token.metadata.last_code + if last_code.IsOperator(','): + self._HandleError(errors.COMMA_AT_END_OF_LITERAL, + 'Illegal comma at end of array literal', last_code, + Position.All(last_code.string)) + + elif type == Type.WHITESPACE: + if self.ILLEGAL_TAB.search(token.string): + if token.IsFirstInLine(): + if token.next: + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in whitespace before "%s"' % token.next.string, + token, Position.All(token.string)) + else: + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in whitespace', + token, Position.All(token.string)) + else: + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in whitespace after "%s"' % token.previous.string, + token, Position.All(token.string)) + + # Check whitespace length if it's not the first token of the line and + # if it's not immediately before a comment. + if last_in_line: + # Check for extra whitespace at the end of a line. + self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line', + token, Position.All(token.string)) + elif not first_in_line and not token.next.IsComment(): + if token.length > 1: + self._HandleError(errors.EXTRA_SPACE, 'Extra space after "%s"' % + token.previous.string, token, + Position(1, len(token.string) - 1)) + + elif type == Type.OPERATOR: + last_code = token.metadata.last_code + + if not self._ExpectSpaceBeforeOperator(token): + if (token.previous and token.previous.type == Type.WHITESPACE and + last_code and last_code.type in (Type.NORMAL, Type.IDENTIFIER)): + self._HandleError(errors.EXTRA_SPACE, + 'Extra space before "%s"' % token.string, token.previous, + Position.All(token.previous.string)) + + elif (token.previous and + not token.previous.IsComment() and + token.previous.type in Type.EXPRESSION_ENDER_TYPES): + self._HandleError(errors.MISSING_SPACE, + 'Missing space before "%s"' % token.string, token, + Position.AtBeginning()) + + # Check that binary operators are not used to start lines. + if ((not last_code or last_code.line_number != token.line_number) and + not token.metadata.IsUnaryOperator()): + self._HandleError(errors.LINE_STARTS_WITH_OPERATOR, + 'Binary operator should go on previous line "%s"' % token.string, + token) + + elif type == Type.DOC_FLAG: + flag = token.attached_object + + if flag.flag_type == 'bug': + # TODO(robbyw): Check for exactly 1 space on the left. + string = token.next.string.lstrip() + string = string.split(' ', 1)[0] + + if not string.isdigit(): + self._HandleError(errors.NO_BUG_NUMBER_AFTER_BUG_TAG, + '@bug should be followed by a bug number', token) + + elif flag.flag_type == 'suppress': + if flag.type is None: + # A syntactically invalid suppress tag will get tokenized as a normal + # flag, indicating an error. + self._HandleError(errors.INCORRECT_SUPPRESS_SYNTAX, + 'Invalid suppress syntax: should be @suppress {errortype}. ' + 'Spaces matter.', token) + else: + for suppress_type in flag.type.split('|'): + if suppress_type not in state.GetDocFlag().SUPPRESS_TYPES: + self._HandleError(errors.INVALID_SUPPRESS_TYPE, + 'Invalid suppression type: %s' % suppress_type, + token) + + elif (error_check.ShouldCheck(Rule.WELL_FORMED_AUTHOR) and + flag.flag_type == 'author'): + # TODO(user): In non strict mode check the author tag for as much as + # it exists, though the full form checked below isn't required. + string = token.next.string + result = self.AUTHOR_SPEC.match(string) + if not result: + self._HandleError(errors.INVALID_AUTHOR_TAG_DESCRIPTION, + 'Author tag line should be of the form: ' + '@author foo@somewhere.com (Your Name)', + token.next) + else: + # Check spacing between email address and name. Do this before + # checking earlier spacing so positions are easier to calculate for + # autofixing. + num_spaces = len(result.group(2)) + if num_spaces < 1: + self._HandleError(errors.MISSING_SPACE, + 'Missing space after email address', + token.next, Position(result.start(2), 0)) + elif num_spaces > 1: + self._HandleError(errors.EXTRA_SPACE, + 'Extra space after email address', + token.next, + Position(result.start(2) + 1, num_spaces - 1)) + + # Check for extra spaces before email address. Can't be too few, if + # not at least one we wouldn't match @author tag. + num_spaces = len(result.group(1)) + if num_spaces > 1: + self._HandleError(errors.EXTRA_SPACE, + 'Extra space before email address', + token.next, Position(1, num_spaces - 1)) + + elif (flag.flag_type in state.GetDocFlag().HAS_DESCRIPTION and + not self._limited_doc_checks): + if flag.flag_type == 'param': + if flag.name is None: + self._HandleError(errors.MISSING_JSDOC_PARAM_NAME, + 'Missing name in @param tag', token) + + if not flag.description or flag.description is None: + flag_name = token.type + if 'name' in token.values: + flag_name = '@' + token.values['name'] + self._HandleError(errors.MISSING_JSDOC_TAG_DESCRIPTION, + 'Missing description in %s tag' % flag_name, token) + else: + self._CheckForMissingSpaceBeforeToken(flag.description_start_token) + + # We want punctuation to be inside of any tags ending a description, + # so strip tags before checking description. See bug 1127192. Note + # that depending on how lines break, the real description end token + # may consist only of stripped html and the effective end token can + # be different. + end_token = flag.description_end_token + end_string = htmlutil.StripTags(end_token.string).strip() + while (end_string == '' and not + end_token.type in Type.FLAG_ENDING_TYPES): + end_token = end_token.previous + if end_token.type in Type.FLAG_DESCRIPTION_TYPES: + end_string = htmlutil.StripTags(end_token.string).rstrip() + + if not (end_string.endswith('.') or end_string.endswith('?') or + end_string.endswith('!')): + # Find the position for the missing punctuation, inside of any html + # tags. + desc_str = end_token.string.rstrip() + while desc_str.endswith('>'): + start_tag_index = desc_str.rfind('<') + if start_tag_index < 0: + break + desc_str = desc_str[:start_tag_index].rstrip() + end_position = Position(len(desc_str), 0) + + self._HandleError( + errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER, + ('%s descriptions must end with valid punctuation such as a ' + 'period.' % token.string), + end_token, end_position) + + if flag.flag_type in state.GetDocFlag().HAS_TYPE: + if flag.type_start_token is not None: + self._CheckForMissingSpaceBeforeToken( + token.attached_object.type_start_token) + + if flag.type and flag.type != '' and not flag.type.isspace(): + self._CheckJsDocType(token) + + if type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG): + if (token.values['name'] not in state.GetDocFlag().LEGAL_DOC and + token.values['name'] not in FLAGS.custom_jsdoc_tags): + self._HandleError(errors.INVALID_JSDOC_TAG, + 'Invalid JsDoc tag: %s' % token.values['name'], token) + + if (error_check.ShouldCheck(Rule.NO_BRACES_AROUND_INHERIT_DOC) and + token.values['name'] == 'inheritDoc' and + type == Type.DOC_INLINE_FLAG): + self._HandleError(errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC, + 'Unnecessary braces around @inheritDoc', + token) + + elif type == Type.SIMPLE_LVALUE: + identifier = token.values['identifier'] + + if ((not state.InFunction() or state.InConstructor()) and + not state.InParentheses() and not state.InObjectLiteralDescendant()): + jsdoc = state.GetDocComment() + if not state.HasDocComment(identifier): + # Only test for documentation on identifiers with .s in them to + # avoid checking things like simple variables. We don't require + # documenting assignments to .prototype itself (bug 1880803). + if (not state.InConstructor() and + identifier.find('.') != -1 and not + identifier.endswith('.prototype') and not + self._limited_doc_checks): + comment = state.GetLastComment() + if not (comment and comment.lower().count('jsdoc inherited')): + self._HandleError(errors.MISSING_MEMBER_DOCUMENTATION, + "No docs found for member '%s'" % identifier, + token); + elif jsdoc and (not state.InConstructor() or + identifier.startswith('this.')): + # We are at the top level and the function/member is documented. + if identifier.endswith('_') and not identifier.endswith('__'): + # Can have a private class which inherits documentation from a + # public superclass. + # + # @inheritDoc is deprecated in favor of using @override, and they + if (jsdoc.HasFlag('override') and not jsdoc.HasFlag('constructor') + and not ('accessControls' in jsdoc.suppressions)): + self._HandleError(errors.INVALID_OVERRIDE_PRIVATE, + '%s should not override a private member.' % identifier, + jsdoc.GetFlag('override').flag_token) + if (jsdoc.HasFlag('inheritDoc') and not jsdoc.HasFlag('constructor') + and not ('accessControls' in jsdoc.suppressions)): + self._HandleError(errors.INVALID_INHERIT_DOC_PRIVATE, + '%s should not inherit from a private member.' % identifier, + jsdoc.GetFlag('inheritDoc').flag_token) + if (not jsdoc.HasFlag('private') and + not ('underscore' in jsdoc.suppressions) and not + ((jsdoc.HasFlag('inheritDoc') or jsdoc.HasFlag('override')) and + ('accessControls' in jsdoc.suppressions))): + self._HandleError(errors.MISSING_PRIVATE, + 'Member "%s" must have @private JsDoc.' % + identifier, token) + if jsdoc.HasFlag('private') and 'underscore' in jsdoc.suppressions: + self._HandleError(errors.UNNECESSARY_SUPPRESS, + '@suppress {underscore} is not necessary with @private', + jsdoc.suppressions['underscore']) + elif (jsdoc.HasFlag('private') and + not self.InExplicitlyTypedLanguage()): + # It is convention to hide public fields in some ECMA + # implementations from documentation using the @private tag. + self._HandleError(errors.EXTRA_PRIVATE, + 'Member "%s" must not have @private JsDoc' % + identifier, token) + + # These flags are only legal on localizable message definitions; + # such variables always begin with the prefix MSG_. + for f in ('desc', 'hidden', 'meaning'): + if (jsdoc.HasFlag(f) + and not identifier.startswith('MSG_') + and identifier.find('.MSG_') == -1): + self._HandleError(errors.INVALID_USE_OF_DESC_TAG, + 'Member "%s" should not have @%s JsDoc' % (identifier, f), + token) + + # Check for illegaly assigning live objects as prototype property values. + index = identifier.find('.prototype.') + # Ignore anything with additional .s after the prototype. + if index != -1 and identifier.find('.', index + 11) == -1: + equal_operator = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + next_code = tokenutil.SearchExcept(equal_operator, Type.NON_CODE_TYPES) + if next_code and ( + next_code.type in (Type.START_BRACKET, Type.START_BLOCK) or + next_code.IsOperator('new')): + self._HandleError(errors.ILLEGAL_PROTOTYPE_MEMBER_VALUE, + 'Member %s cannot have a non-primitive value' % identifier, + token) + + elif type == Type.END_PARAMETERS: + # Find extra space at the end of parameter lists. We check the token + # prior to the current one when it is a closing paren. + if (token.previous and token.previous.type == Type.PARAMETERS + and self.ENDS_WITH_SPACE.search(token.previous.string)): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before ")"', + token.previous) + + jsdoc = state.GetDocComment() + if state.GetFunction().is_interface: + if token.previous and token.previous.type == Type.PARAMETERS: + self._HandleError(errors.INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS, + 'Interface constructor cannot have parameters', + token.previous) + elif (state.InTopLevel() and jsdoc and not jsdoc.HasFlag('see') + and not jsdoc.InheritsDocumentation() + and not state.InObjectLiteralDescendant() and not + jsdoc.IsInvalidated()): + distance, edit = jsdoc.CompareParameters(state.GetParams()) + if distance: + params_iter = iter(state.GetParams()) + docs_iter = iter(jsdoc.ordered_params) + + for op in edit: + if op == 'I': + # Insertion. + # Parsing doc comments is the same for all languages + # but some languages care about parameters that don't have + # doc comments and some languages don't care. + # Languages that don't allow variables to by typed such as + # JavaScript care but languages such as ActionScript or Java + # that allow variables to be typed don't care. + if not self._limited_doc_checks: + self.HandleMissingParameterDoc(token, params_iter.next()) + + elif op == 'D': + # Deletion + self._HandleError(errors.EXTRA_PARAMETER_DOCUMENTATION, + 'Found docs for non-existing parameter: "%s"' % + docs_iter.next(), token) + elif op == 'S': + # Substitution + if not self._limited_doc_checks: + self._HandleError(errors.WRONG_PARAMETER_DOCUMENTATION, + 'Parameter mismatch: got "%s", expected "%s"' % + (params_iter.next(), docs_iter.next()), token) + + else: + # Equality - just advance the iterators + params_iter.next() + docs_iter.next() + + elif type == Type.STRING_TEXT: + # If this is the first token after the start of the string, but it's at + # the end of a line, we know we have a multi-line string. + if token.previous.type in (Type.SINGLE_QUOTE_STRING_START, + Type.DOUBLE_QUOTE_STRING_START) and last_in_line: + self._HandleError(errors.MULTI_LINE_STRING, + 'Multi-line strings are not allowed', token) + + + # This check is orthogonal to the ones above, and repeats some types, so + # it is a plain if and not an elif. + if token.type in Type.COMMENT_TYPES: + if self.ILLEGAL_TAB.search(token.string): + self._HandleError(errors.ILLEGAL_TAB, + 'Illegal tab in comment "%s"' % token.string, token) + + trimmed = token.string.rstrip() + if last_in_line and token.string != trimmed: + # Check for extra whitespace at the end of a line. + self._HandleError(errors.EXTRA_SPACE, 'Extra space at end of line', + token, Position(len(trimmed), len(token.string) - len(trimmed))) + + # This check is also orthogonal since it is based on metadata. + if token.metadata.is_implied_semicolon: + self._HandleError(errors.MISSING_SEMICOLON, + 'Missing semicolon at end of line', token) + + def _HandleStartBracket(self, token, last_non_space_token): + """Handles a token that is an open bracket. + + Args: + token: The token to handle. + last_non_space_token: The last token that was not a space. + """ + if (not token.IsFirstInLine() and token.previous.type == Type.WHITESPACE and + last_non_space_token and + last_non_space_token.type in Type.EXPRESSION_ENDER_TYPES): + self._HandleError(errors.EXTRA_SPACE, 'Extra space before "["', + token.previous, Position.All(token.previous.string)) + # If the [ token is the first token in a line we shouldn't complain + # about a missing space before [. This is because some Ecma script + # languages allow syntax like: + # [Annotation] + # class MyClass {...} + # So we don't want to blindly warn about missing spaces before [. + # In the the future, when rules for computing exactly how many spaces + # lines should be indented are added, then we can return errors for + # [ tokens that are improperly indented. + # For example: + # var someVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongVariableName = + # [a,b,c]; + # should trigger a proper indentation warning message as [ is not indented + # by four spaces. + elif (not token.IsFirstInLine() and token.previous and + not token.previous.type in ( + [Type.WHITESPACE, Type.START_PAREN, Type.START_BRACKET] + + Type.EXPRESSION_ENDER_TYPES)): + self._HandleError(errors.MISSING_SPACE, 'Missing space before "["', + token, Position.AtBeginning()) + + def Finalize(self, state, tokenizer_mode): + last_non_space_token = state.GetLastNonSpaceToken() + # Check last line for ending with newline. + if state.GetLastLine() and not (state.GetLastLine().isspace() or + state.GetLastLine().rstrip('\n\r\f') != state.GetLastLine()): + self._HandleError( + errors.FILE_MISSING_NEWLINE, + 'File does not end with new line. (%s)' % state.GetLastLine(), + last_non_space_token) + + # Check that the mode is not mid comment, argument list, etc. + if not tokenizer_mode == Modes.TEXT_MODE: + self._HandleError( + errors.FILE_IN_BLOCK, + 'File ended in mode "%s".' % tokenizer_mode, + last_non_space_token) + + try: + self._indentation.Finalize() + except Exception, e: + self._HandleError( + errors.FILE_DOES_NOT_PARSE, + str(e), + last_non_space_token) + + def GetLongLineExceptions(self): + """Gets a list of regexps for lines which can be longer than the limit.""" + return [] + + def InExplicitlyTypedLanguage(self): + """Returns whether this ecma implementation is explicitly typed.""" + return False diff --git a/third_party/closure_linter/closure_linter/ecmametadatapass.py b/third_party/closure_linter/closure_linter/ecmametadatapass.py new file mode 100755 index 0000000..2c797b3 --- /dev/null +++ b/third_party/closure_linter/closure_linter/ecmametadatapass.py @@ -0,0 +1,521 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Metadata pass for annotating tokens in EcmaScript files.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import javascripttokens +from closure_linter import tokenutil + + +TokenType = javascripttokens.JavaScriptTokenType + + +class ParseError(Exception): + """Exception indicating a parse error at the given token. + + Attributes: + token: The token where the parse error occurred. + """ + + def __init__(self, token, message=None): + """Initialize a parse error at the given token with an optional message. + + Args: + token: The token where the parse error occurred. + message: A message describing the parse error. + """ + Exception.__init__(self, message) + self.token = token + + +class EcmaContext(object): + """Context object for EcmaScript languages. + + Attributes: + type: The context type. + start_token: The token where this context starts. + end_token: The token where this context ends. + parent: The parent context. + """ + + # The root context. + ROOT = 'root' + + # A block of code. + BLOCK = 'block' + + # A pseudo-block of code for a given case or default section. + CASE_BLOCK = 'case_block' + + # Block of statements in a for loop's parentheses. + FOR_GROUP_BLOCK = 'for_block' + + # An implied block of code for 1 line if, while, and for statements + IMPLIED_BLOCK = 'implied_block' + + # An index in to an array or object. + INDEX = 'index' + + # An array literal in []. + ARRAY_LITERAL = 'array_literal' + + # An object literal in {}. + OBJECT_LITERAL = 'object_literal' + + # An individual element in an array or object literal. + LITERAL_ELEMENT = 'literal_element' + + # The portion of a ternary statement between ? and : + TERNARY_TRUE = 'ternary_true' + + # The portion of a ternary statment after : + TERNARY_FALSE = 'ternary_false' + + # The entire switch statment. This will contain a GROUP with the variable + # and a BLOCK with the code. + + # Since that BLOCK is not a normal block, it can not contain statements except + # for case and default. + SWITCH = 'switch' + + # A normal comment. + COMMENT = 'comment' + + # A JsDoc comment. + DOC = 'doc' + + # An individual statement. + STATEMENT = 'statement' + + # Code within parentheses. + GROUP = 'group' + + # Parameter names in a function declaration. + PARAMETERS = 'parameters' + + # A set of variable declarations appearing after the 'var' keyword. + VAR = 'var' + + # Context types that are blocks. + BLOCK_TYPES = frozenset([ + ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK]) + + def __init__(self, type, start_token, parent): + """Initializes the context object. + + Args: + type: The context type. + start_token: The token where this context starts. + parent: The parent context. + """ + self.type = type + self.start_token = start_token + self.end_token = None + self.parent = parent + + def __repr__(self): + """Returns a string representation of the context object.""" + stack = [] + context = self + while context: + stack.append(context.type) + context = context.parent + return 'Context(%s)' % ' > '.join(stack) + + +class EcmaMetaData(object): + """Token metadata for EcmaScript languages. + + Attributes: + last_code: The last code token to appear before this one. + context: The context this token appears in. + operator_type: The operator type, will be one of the *_OPERATOR constants + defined below. + """ + + UNARY_OPERATOR = 'unary' + + UNARY_POST_OPERATOR = 'unary_post' + + BINARY_OPERATOR = 'binary' + + TERNARY_OPERATOR = 'ternary' + + def __init__(self): + """Initializes a token metadata object.""" + self.last_code = None + self.context = None + self.operator_type = None + self.is_implied_semicolon = False + self.is_implied_block = False + self.is_implied_block_close = False + + def __repr__(self): + """Returns a string representation of the context object.""" + parts = ['%r' % self.context] + if self.operator_type: + parts.append('optype: %r' % self.operator_type) + if self.is_implied_semicolon: + parts.append('implied;') + return 'MetaData(%s)' % ', '.join(parts) + + def IsUnaryOperator(self): + return self.operator_type in (EcmaMetaData.UNARY_OPERATOR, + EcmaMetaData.UNARY_POST_OPERATOR) + + def IsUnaryPostOperator(self): + return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR + + +class EcmaMetaDataPass(object): + """A pass that iterates over all tokens and builds metadata about them.""" + + def __init__(self): + """Initialize the meta data pass object.""" + self.Reset() + + def Reset(self): + """Resets the metadata pass to prepare for the next file.""" + self._token = None + self._context = None + self._AddContext(EcmaContext.ROOT) + self._last_code = None + + def _CreateContext(self, type): + """Overridable by subclasses to create the appropriate context type.""" + return EcmaContext(type, self._token, self._context) + + def _CreateMetaData(self): + """Overridable by subclasses to create the appropriate metadata type.""" + return EcmaMetaData() + + def _AddContext(self, type): + """Adds a context of the given type to the context stack. + + Args: + type: The type of context to create + """ + self._context = self._CreateContext(type) + + def _PopContext(self): + """Moves up one level in the context stack. + + Returns: + The former context. + + Raises: + ParseError: If the root context is popped. + """ + top_context = self._context + top_context.end_token = self._token + self._context = top_context.parent + if self._context: + return top_context + else: + raise ParseError(self._token) + + def _PopContextType(self, *stop_types): + """Pops the context stack until a context of the given type is popped. + + Args: + stop_types: The types of context to pop to - stops at the first match. + + Returns: + The context object of the given type that was popped. + """ + last = None + while not last or last.type not in stop_types: + last = self._PopContext() + return last + + def _EndStatement(self): + """Process the end of a statement.""" + self._PopContextType(EcmaContext.STATEMENT) + if self._context.type == EcmaContext.IMPLIED_BLOCK: + self._token.metadata.is_implied_block_close = True + self._PopContext() + + def _ProcessContext(self): + """Process the context at the current token. + + Returns: + The context that should be assigned to the current token, or None if + the current context after this method should be used. + + Raises: + ParseError: When the token appears in an invalid context. + """ + token = self._token + token_type = token.type + + if self._context.type in EcmaContext.BLOCK_TYPES: + # Whenever we're in a block, we add a statement context. We make an + # exception for switch statements since they can only contain case: and + # default: and therefore don't directly contain statements. + # The block we add here may be immediately removed in some cases, but + # that causes no harm. + parent = self._context.parent + if not parent or parent.type != EcmaContext.SWITCH: + self._AddContext(EcmaContext.STATEMENT) + + elif self._context.type == EcmaContext.ARRAY_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + if token_type == TokenType.START_PAREN: + if self._last_code and self._last_code.IsKeyword('for'): + # for loops contain multiple statements in the group unlike while, + # switch, if, etc. + self._AddContext(EcmaContext.FOR_GROUP_BLOCK) + else: + self._AddContext(EcmaContext.GROUP) + + elif token_type == TokenType.END_PAREN: + result = self._PopContextType(EcmaContext.GROUP, + EcmaContext.FOR_GROUP_BLOCK) + keyword_token = result.start_token.metadata.last_code + # keyword_token will not exist if the open paren is the first line of the + # file, for example if all code is wrapped in an immediately executed + # annonymous function. + if keyword_token and keyword_token.string in ('if', 'for', 'while'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if next_code.type != TokenType.START_BLOCK: + # Check for do-while. + is_do_while = False + pre_keyword_token = keyword_token.metadata.last_code + if (pre_keyword_token and + pre_keyword_token.type == TokenType.END_BLOCK): + start_block_token = pre_keyword_token.metadata.context.start_token + is_do_while = start_block_token.metadata.last_code.string == 'do' + + # If it's not do-while, it's an implied block. + if not is_do_while: + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + return result + + # else (not else if) with no open brace after it should be considered the + # start of an implied block, similar to the case with if, for, and while + # above. + elif (token_type == TokenType.KEYWORD and + token.string == 'else'): + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + if (next_code.type != TokenType.START_BLOCK and + (next_code.type != TokenType.KEYWORD or next_code.string != 'if')): + self._AddContext(EcmaContext.IMPLIED_BLOCK) + token.metadata.is_implied_block = True + + elif token_type == TokenType.START_PARAMETERS: + self._AddContext(EcmaContext.PARAMETERS) + + elif token_type == TokenType.END_PARAMETERS: + return self._PopContextType(EcmaContext.PARAMETERS) + + elif token_type == TokenType.START_BRACKET: + if (self._last_code and + self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + self._AddContext(EcmaContext.INDEX) + else: + self._AddContext(EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.END_BRACKET: + return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL) + + elif token_type == TokenType.START_BLOCK: + if (self._last_code.type in (TokenType.END_PAREN, + TokenType.END_PARAMETERS) or + self._last_code.IsKeyword('else') or + self._last_code.IsKeyword('do') or + self._last_code.IsKeyword('try') or + self._last_code.IsKeyword('finally') or + (self._last_code.IsOperator(':') and + self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)): + # else, do, try, and finally all might have no () before {. + # Also, handle the bizzare syntax case 10: {...}. + self._AddContext(EcmaContext.BLOCK) + else: + self._AddContext(EcmaContext.OBJECT_LITERAL) + + elif token_type == TokenType.END_BLOCK: + context = self._PopContextType(EcmaContext.BLOCK, + EcmaContext.OBJECT_LITERAL) + if self._context.type == EcmaContext.SWITCH: + # The end of the block also means the end of the switch statement it + # applies to. + return self._PopContext() + return context + + elif token.IsKeyword('switch'): + self._AddContext(EcmaContext.SWITCH) + + elif (token_type == TokenType.KEYWORD and + token.string in ('case', 'default')): + # Pop up to but not including the switch block. + while self._context.parent.type != EcmaContext.SWITCH: + self._PopContext() + + elif token.IsOperator('?'): + self._AddContext(EcmaContext.TERNARY_TRUE) + + elif token.IsOperator(':'): + if self._context.type == EcmaContext.OBJECT_LITERAL: + self._AddContext(EcmaContext.LITERAL_ELEMENT) + + elif self._context.type == EcmaContext.TERNARY_TRUE: + self._PopContext() + self._AddContext(EcmaContext.TERNARY_FALSE) + + # Handle nested ternary statements like: + # foo = bar ? baz ? 1 : 2 : 3 + # When we encounter the second ":" the context is + # ternary_false > ternary_true > statement > root + elif (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type == EcmaContext.TERNARY_TRUE): + self._PopContext() # Leave current ternary false context. + self._PopContext() # Leave current parent ternary true + self._AddContext(EcmaContext.TERNARY_FALSE) + + elif self._context.parent.type == EcmaContext.SWITCH: + self._AddContext(EcmaContext.CASE_BLOCK) + + elif token.IsKeyword('var'): + self._AddContext(EcmaContext.VAR) + + elif token.IsOperator(','): + while self._context.type not in (EcmaContext.VAR, + EcmaContext.ARRAY_LITERAL, + EcmaContext.OBJECT_LITERAL, + EcmaContext.STATEMENT, + EcmaContext.PARAMETERS, + EcmaContext.GROUP): + self._PopContext() + + elif token_type == TokenType.SEMICOLON: + self._EndStatement() + + def Process(self, first_token): + """Processes the token stream starting with the given token.""" + self._token = first_token + while self._token: + self._ProcessToken() + + if self._token.IsCode(): + self._last_code = self._token + + self._token = self._token.next + + try: + self._PopContextType(self, EcmaContext.ROOT) + except ParseError: + # Ignore the "popped to root" error. + pass + + def _ProcessToken(self): + """Process the given token.""" + token = self._token + token.metadata = self._CreateMetaData() + context = (self._ProcessContext() or self._context) + token.metadata.context = context + token.metadata.last_code = self._last_code + + # Determine the operator type of the token, if applicable. + if token.type == TokenType.OPERATOR: + token.metadata.operator_type = self._GetOperatorType(token) + + # Determine if there is an implied semicolon after the token. + if token.type != TokenType.SEMICOLON: + next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES) + # A statement like if (x) does not need a semicolon after it + is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK + is_last_code_in_line = token.IsCode() and ( + not next_code or next_code.line_number != token.line_number) + is_continued_identifier = (token.type == TokenType.IDENTIFIER and + token.string.endswith('.')) + is_continued_operator = (token.type == TokenType.OPERATOR and + not token.metadata.IsUnaryPostOperator()) + is_continued_dot = token.string == '.' + next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR + next_code_is_dot = next_code and next_code.string == '.' + is_end_of_block = (token.type == TokenType.END_BLOCK and + token.metadata.context.type != EcmaContext.OBJECT_LITERAL) + is_multiline_string = token.type == TokenType.STRING_TEXT + next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK + if (is_last_code_in_line and + self._StatementCouldEndInContext() and + not is_multiline_string and + not is_end_of_block and + not is_continued_identifier and + not is_continued_operator and + not is_continued_dot and + not next_code_is_dot and + not next_code_is_operator and + not is_implied_block and + not next_code_is_block): + token.metadata.is_implied_semicolon = True + self._EndStatement() + + def _StatementCouldEndInContext(self): + """Returns whether the current statement (if any) may end in this context.""" + # In the basic statement or variable declaration context, statement can + # always end in this context. + if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR): + return True + + # End of a ternary false branch inside a statement can also be the + # end of the statement, for example: + # var x = foo ? foo.bar() : null + # In this case the statement ends after the null, when the context stack + # looks like ternary_false > var > statement > root. + if (self._context.type == EcmaContext.TERNARY_FALSE and + self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)): + return True + + # In all other contexts like object and array literals, ternary true, etc. + # the statement can't yet end. + return False + + def _GetOperatorType(self, token): + """Returns the operator type of the given operator token. + + Args: + token: The token to get arity for. + + Returns: + The type of the operator. One of the *_OPERATOR constants defined in + EcmaMetaData. + """ + if token.string == '?': + return EcmaMetaData.TERNARY_OPERATOR + + if token.string in TokenType.UNARY_OPERATORS: + return EcmaMetaData.UNARY_OPERATOR + + last_code = token.metadata.last_code + if not last_code or last_code.type == TokenType.END_BLOCK: + return EcmaMetaData.UNARY_OPERATOR + + if (token.string in TokenType.UNARY_POST_OPERATORS and + last_code.type in TokenType.EXPRESSION_ENDER_TYPES): + return EcmaMetaData.UNARY_POST_OPERATOR + + if (token.string in TokenType.UNARY_OK_OPERATORS and + last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and + last_code.string not in TokenType.UNARY_POST_OPERATORS): + return EcmaMetaData.UNARY_OPERATOR + + return EcmaMetaData.BINARY_OPERATOR diff --git a/third_party/closure_linter/closure_linter/error_check.py b/third_party/closure_linter/closure_linter/error_check.py new file mode 100755 index 0000000..ed243e9 --- /dev/null +++ b/third_party/closure_linter/closure_linter/error_check.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# +# Copyright 2011 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Specific JSLint errors checker.""" + + + +import gflags as flags + +FLAGS = flags.FLAGS + + +class Rule(object): + """Different rules to check.""" + + # Documentations for specific rules goes in flag definition. + BLANK_LINES_AT_TOP_LEVEL = 'blank_lines_at_top_level' + INDENTATION = 'indentation' + WELL_FORMED_AUTHOR = 'well_formed_author' + NO_BRACES_AROUND_INHERIT_DOC = 'no_braces_around_inherit_doc' + BRACES_AROUND_TYPE = 'braces_around_type' + OPTIONAL_TYPE_MARKER = 'optional_type_marker' + UNUSED_PRIVATE_MEMBERS = 'unused_private_members' + + # Rule to raise all known errors. + ALL = 'all' + + # All rules that are to be checked when using the strict flag. E.g. the rules + # that are specific to the stricter Closure style. + CLOSURE_RULES = frozenset([BLANK_LINES_AT_TOP_LEVEL, + INDENTATION, + WELL_FORMED_AUTHOR, + NO_BRACES_AROUND_INHERIT_DOC, + BRACES_AROUND_TYPE, + OPTIONAL_TYPE_MARKER]) + + +flags.DEFINE_boolean('strict', False, + 'Whether to validate against the stricter Closure style. ' + 'This includes ' + (', '.join(Rule.CLOSURE_RULES)) + '.') +flags.DEFINE_multistring('jslint_error', [], + 'List of specific lint errors to check. Here is a list' + ' of accepted values:\n' + ' - ' + Rule.ALL + ': enables all following errors.\n' + ' - ' + Rule.BLANK_LINES_AT_TOP_LEVEL + ': validates' + 'number of blank lines between blocks at top level.\n' + ' - ' + Rule.INDENTATION + ': checks correct ' + 'indentation of code.\n' + ' - ' + Rule.WELL_FORMED_AUTHOR + ': validates the ' + '@author JsDoc tags.\n' + ' - ' + Rule.NO_BRACES_AROUND_INHERIT_DOC + ': ' + 'forbids braces around @inheritdoc JsDoc tags.\n' + ' - ' + Rule.BRACES_AROUND_TYPE + ': enforces braces ' + 'around types in JsDoc tags.\n' + ' - ' + Rule.OPTIONAL_TYPE_MARKER + ': checks correct ' + 'use of optional marker = in param types.\n' + ' - ' + Rule.UNUSED_PRIVATE_MEMBERS + ': checks for ' + 'unused private variables.\n') + + +def ShouldCheck(rule): + """Returns whether the optional rule should be checked. + + Computes different flags (strict, jslint_error, jslint_noerror) to find out if + this specific rule should be checked. + + Args: + rule: Name of the rule (see Rule). + + Returns: + True if the rule should be checked according to the flags, otherwise False. + """ + if rule in FLAGS.jslint_error or Rule.ALL in FLAGS.jslint_error: + return True + # Checks strict rules. + return FLAGS.strict and rule in Rule.CLOSURE_RULES diff --git a/third_party/closure_linter/closure_linter/error_fixer.py b/third_party/closure_linter/closure_linter/error_fixer.py new file mode 100755 index 0000000..92e2221 --- /dev/null +++ b/third_party/closure_linter/closure_linter/error_fixer.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Main class responsible for automatically fixing simple style violations.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import re + +import gflags as flags +from closure_linter import errors +from closure_linter import javascriptstatetracker +from closure_linter import javascripttokens +from closure_linter import requireprovidesorter +from closure_linter import tokenutil +from closure_linter.common import errorhandler + +# Shorthand +Token = javascripttokens.JavaScriptToken +Type = javascripttokens.JavaScriptTokenType + +END_OF_FLAG_TYPE = re.compile(r'(}?\s*)$') + +# Regex to represent common mistake inverting author name and email as +# @author User Name (user@company) +INVERTED_AUTHOR_SPEC = re.compile(r'(?P<leading_whitespace>\s*)' + '(?P<name>[^(]+)' + '(?P<whitespace_after_name>\s+)' + '\(' + '(?P<email>[^\s]+@[^)\s]+)' + '\)' + '(?P<trailing_characters>.*)') + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('disable_indentation_fixing', False, + 'Whether to disable automatic fixing of indentation.') + + +class ErrorFixer(errorhandler.ErrorHandler): + """Object that fixes simple style errors.""" + + def __init__(self, external_file=None): + """Initialize the error fixer. + + Args: + external_file: If included, all output will be directed to this file + instead of overwriting the files the errors are found in. + """ + errorhandler.ErrorHandler.__init__(self) + + self._file_name = None + self._file_token = None + self._external_file = external_file + + def HandleFile(self, filename, first_token): + """Notifies this ErrorPrinter that subsequent errors are in filename. + + Args: + filename: The name of the file about to be checked. + first_token: The first token in the file. + """ + self._file_name = filename + self._file_token = first_token + self._file_fix_count = 0 + self._file_changed_lines = set() + + def _AddFix(self, tokens): + """Adds the fix to the internal count. + + Args: + tokens: The token or sequence of tokens changed to fix an error. + """ + self._file_fix_count += 1 + if hasattr(tokens, 'line_number'): + self._file_changed_lines.add(tokens.line_number) + else: + for token in tokens: + self._file_changed_lines.add(token.line_number) + + def HandleError(self, error): + """Attempts to fix the error. + + Args: + error: The error object + """ + code = error.code + token = error.token + + if code == errors.JSDOC_PREFER_QUESTION_TO_PIPE_NULL: + iterator = token.attached_object.type_start_token + if iterator.type == Type.DOC_START_BRACE or iterator.string.isspace(): + iterator = iterator.next + + leading_space = len(iterator.string) - len(iterator.string.lstrip()) + iterator.string = '%s?%s' % (' ' * leading_space, + iterator.string.lstrip()) + + # Cover the no outer brace case where the end token is part of the type. + while iterator and iterator != token.attached_object.type_end_token.next: + iterator.string = iterator.string.replace( + 'null|', '').replace('|null', '') + iterator = iterator.next + + # Create a new flag object with updated type info. + token.attached_object = javascriptstatetracker.JsDocFlag(token) + self._AddFix(token) + + elif code == errors.JSDOC_MISSING_OPTIONAL_TYPE: + iterator = token.attached_object.type_end_token + if iterator.type == Type.DOC_END_BRACE or iterator.string.isspace(): + iterator = iterator.previous + + ending_space = len(iterator.string) - len(iterator.string.rstrip()) + iterator.string = '%s=%s' % (iterator.string.rstrip(), + ' ' * ending_space) + + # Create a new flag object with updated type info. + token.attached_object = javascriptstatetracker.JsDocFlag(token) + self._AddFix(token) + + elif code in (errors.MISSING_SEMICOLON_AFTER_FUNCTION, + errors.MISSING_SEMICOLON): + semicolon_token = Token(';', Type.SEMICOLON, token.line, + token.line_number) + tokenutil.InsertTokenAfter(semicolon_token, token) + token.metadata.is_implied_semicolon = False + semicolon_token.metadata.is_implied_semicolon = False + self._AddFix(token) + + elif code in (errors.ILLEGAL_SEMICOLON_AFTER_FUNCTION, + errors.REDUNDANT_SEMICOLON, + errors.COMMA_AT_END_OF_LITERAL): + tokenutil.DeleteToken(token) + self._AddFix(token) + + elif code == errors.INVALID_JSDOC_TAG: + if token.string == '@returns': + token.string = '@return' + self._AddFix(token) + + elif code == errors.FILE_MISSING_NEWLINE: + # This error is fixed implicitly by the way we restore the file + self._AddFix(token) + + elif code == errors.MISSING_SPACE: + if error.position: + if error.position.IsAtBeginning(): + tokenutil.InsertSpaceTokenAfter(token.previous) + elif error.position.IsAtEnd(token.string): + tokenutil.InsertSpaceTokenAfter(token) + else: + token.string = error.position.Set(token.string, ' ') + self._AddFix(token) + + elif code == errors.EXTRA_SPACE: + if error.position: + token.string = error.position.Set(token.string, '') + self._AddFix(token) + + elif code == errors.JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER: + token.string = error.position.Set(token.string, '.') + self._AddFix(token) + + elif code == errors.MISSING_LINE: + if error.position.IsAtBeginning(): + tokenutil.InsertBlankLineAfter(token.previous) + else: + tokenutil.InsertBlankLineAfter(token) + self._AddFix(token) + + elif code == errors.EXTRA_LINE: + tokenutil.DeleteToken(token) + self._AddFix(token) + + elif code == errors.WRONG_BLANK_LINE_COUNT: + if not token.previous: + # TODO(user): Add an insertBefore method to tokenutil. + return + + num_lines = error.fix_data + should_delete = False + + if num_lines < 0: + num_lines *= -1 + should_delete = True + + for i in xrange(1, num_lines + 1): + if should_delete: + # TODO(user): DeleteToken should update line numbers. + tokenutil.DeleteToken(token.previous) + else: + tokenutil.InsertBlankLineAfter(token.previous) + self._AddFix(token) + + elif code == errors.UNNECESSARY_DOUBLE_QUOTED_STRING: + end_quote = tokenutil.Search(token, Type.DOUBLE_QUOTE_STRING_END) + if end_quote: + single_quote_start = Token( + "'", Type.SINGLE_QUOTE_STRING_START, token.line, token.line_number) + single_quote_end = Token( + "'", Type.SINGLE_QUOTE_STRING_START, end_quote.line, + token.line_number) + + tokenutil.InsertTokenAfter(single_quote_start, token) + tokenutil.InsertTokenAfter(single_quote_end, end_quote) + tokenutil.DeleteToken(token) + tokenutil.DeleteToken(end_quote) + self._AddFix([token, end_quote]) + + elif code == errors.MISSING_BRACES_AROUND_TYPE: + fixed_tokens = [] + start_token = token.attached_object.type_start_token + + if start_token.type != Type.DOC_START_BRACE: + leading_space = ( + len(start_token.string) - len(start_token.string.lstrip())) + if leading_space: + start_token = tokenutil.SplitToken(start_token, leading_space) + # Fix case where start and end token were the same. + if token.attached_object.type_end_token == start_token.previous: + token.attached_object.type_end_token = start_token + + new_token = Token('{', Type.DOC_START_BRACE, start_token.line, + start_token.line_number) + tokenutil.InsertTokenAfter(new_token, start_token.previous) + token.attached_object.type_start_token = new_token + fixed_tokens.append(new_token) + + end_token = token.attached_object.type_end_token + if end_token.type != Type.DOC_END_BRACE: + # If the start token was a brace, the end token will be a + # FLAG_ENDING_TYPE token, if there wasn't a starting brace then + # the end token is the last token of the actual type. + last_type = end_token + if not fixed_tokens: + last_type = end_token.previous + + while last_type.string.isspace(): + last_type = last_type.previous + + # If there was no starting brace then a lone end brace wouldn't have + # been type end token. Now that we've added any missing start brace, + # see if the last effective type token was an end brace. + if last_type.type != Type.DOC_END_BRACE: + trailing_space = (len(last_type.string) - + len(last_type.string.rstrip())) + if trailing_space: + tokenutil.SplitToken(last_type, + len(last_type.string) - trailing_space) + + new_token = Token('}', Type.DOC_END_BRACE, last_type.line, + last_type.line_number) + tokenutil.InsertTokenAfter(new_token, last_type) + token.attached_object.type_end_token = new_token + fixed_tokens.append(new_token) + + self._AddFix(fixed_tokens) + + elif code == errors.GOOG_REQUIRES_NOT_ALPHABETIZED: + require_start_token = error.fix_data + sorter = requireprovidesorter.RequireProvideSorter() + sorter.FixRequires(require_start_token) + + self._AddFix(require_start_token) + + elif code == errors.GOOG_PROVIDES_NOT_ALPHABETIZED: + provide_start_token = error.fix_data + sorter = requireprovidesorter.RequireProvideSorter() + sorter.FixProvides(provide_start_token) + + self._AddFix(provide_start_token) + + elif code == errors.UNNECESSARY_BRACES_AROUND_INHERIT_DOC: + if token.previous.string == '{' and token.next.string == '}': + tokenutil.DeleteToken(token.previous) + tokenutil.DeleteToken(token.next) + self._AddFix([token]) + + elif code == errors.INVALID_AUTHOR_TAG_DESCRIPTION: + match = INVERTED_AUTHOR_SPEC.match(token.string) + if match: + token.string = '%s%s%s(%s)%s' % (match.group('leading_whitespace'), + match.group('email'), + match.group('whitespace_after_name'), + match.group('name'), + match.group('trailing_characters')) + self._AddFix(token) + + elif (code == errors.WRONG_INDENTATION and + not FLAGS.disable_indentation_fixing): + token = tokenutil.GetFirstTokenInSameLine(token) + actual = error.position.start + expected = error.position.length + + if token.type in (Type.WHITESPACE, Type.PARAMETERS) and actual != 0: + token.string = token.string.lstrip() + (' ' * expected) + self._AddFix([token]) + else: + # We need to add indentation. + new_token = Token(' ' * expected, Type.WHITESPACE, + token.line, token.line_number) + # Note that we'll never need to add indentation at the first line, + # since it will always not be indented. Therefore it's safe to assume + # token.previous exists. + tokenutil.InsertTokenAfter(new_token, token.previous) + self._AddFix([token]) + + elif code in [errors.MALFORMED_END_OF_SCOPE_COMMENT, + errors.MISSING_END_OF_SCOPE_COMMENT]: + # Only fix cases where }); is found with no trailing content on the line + # other than a comment. Value of 'token' is set to } for this error. + if (token.type == Type.END_BLOCK and + token.next.type == Type.END_PAREN and + token.next.next.type == Type.SEMICOLON): + current_token = token.next.next.next + removed_tokens = [] + while current_token and current_token.line_number == token.line_number: + if current_token.IsAnyType(Type.WHITESPACE, + Type.START_SINGLE_LINE_COMMENT, + Type.COMMENT): + removed_tokens.append(current_token) + current_token = current_token.next + else: + return + + if removed_tokens: + tokenutil.DeleteTokens(removed_tokens[0], len(removed_tokens)) + + whitespace_token = Token(' ', Type.WHITESPACE, token.line, + token.line_number) + start_comment_token = Token('//', Type.START_SINGLE_LINE_COMMENT, + token.line, token.line_number) + comment_token = Token(' goog.scope', Type.COMMENT, token.line, + token.line_number) + insertion_tokens = [whitespace_token, start_comment_token, + comment_token] + + tokenutil.InsertTokensAfter(insertion_tokens, token.next.next) + self._AddFix(removed_tokens + insertion_tokens) + + elif code in [errors.EXTRA_GOOG_PROVIDE, errors.EXTRA_GOOG_REQUIRE]: + tokens_in_line = tokenutil.GetAllTokensInSameLine(token) + tokenutil.DeleteTokens(tokens_in_line[0], len(tokens_in_line)) + self._AddFix(tokens_in_line) + + elif code in [errors.MISSING_GOOG_PROVIDE, errors.MISSING_GOOG_REQUIRE]: + is_provide = code == errors.MISSING_GOOG_PROVIDE + is_require = code == errors.MISSING_GOOG_REQUIRE + + missing_namespaces = error.fix_data[0] + need_blank_line = error.fix_data[1] + + if need_blank_line is None: + # TODO(user): This happens when there are no existing + # goog.provide or goog.require statements to position new statements + # relative to. Consider handling this case with a heuristic. + return + + insert_location = token.previous + + # If inserting a missing require with no existing requires, insert a + # blank line first. + if need_blank_line and is_require: + tokenutil.InsertBlankLineAfter(insert_location) + insert_location = insert_location.next + + for missing_namespace in missing_namespaces: + new_tokens = self._GetNewRequireOrProvideTokens( + is_provide, missing_namespace, insert_location.line_number + 1) + tokenutil.InsertLineAfter(insert_location, new_tokens) + insert_location = new_tokens[-1] + self._AddFix(new_tokens) + + # If inserting a missing provide with no existing provides, insert a + # blank line after. + if need_blank_line and is_provide: + tokenutil.InsertBlankLineAfter(insert_location) + + def _GetNewRequireOrProvideTokens(self, is_provide, namespace, line_number): + """Returns a list of tokens to create a goog.require/provide statement. + + Args: + is_provide: True if getting tokens for a provide, False for require. + namespace: The required or provided namespaces to get tokens for. + line_number: The line number the new require or provide statement will be + on. + + Returns: + Tokens to create a new goog.require or goog.provide statement. + """ + string = 'goog.require' + if is_provide: + string = 'goog.provide' + line_text = string + '(\'' + namespace + '\');\n' + return [ + Token(string, Type.IDENTIFIER, line_text, line_number), + Token('(', Type.START_PAREN, line_text, line_number), + Token('\'', Type.SINGLE_QUOTE_STRING_START, line_text, line_number), + Token(namespace, Type.STRING_TEXT, line_text, line_number), + Token('\'', Type.SINGLE_QUOTE_STRING_END, line_text, line_number), + Token(')', Type.END_PAREN, line_text, line_number), + Token(';', Type.SEMICOLON, line_text, line_number) + ] + + def FinishFile(self): + """Called when the current file has finished style checking. + + Used to go back and fix any errors in the file. + """ + if self._file_fix_count: + f = self._external_file + if not f: + print 'Fixed %d errors in %s' % (self._file_fix_count, self._file_name) + f = open(self._file_name, 'w') + + token = self._file_token + char_count = 0 + while token: + f.write(token.string) + char_count += len(token.string) + + if token.IsLastInLine(): + f.write('\n') + if char_count > 80 and token.line_number in self._file_changed_lines: + print 'WARNING: Line %d of %s is now longer than 80 characters.' % ( + token.line_number, self._file_name) + + char_count = 0 + + token = token.next + + if not self._external_file: + # Close the file if we created it + f.close() diff --git a/third_party/closure_linter/closure_linter/errorrecord.py b/third_party/closure_linter/closure_linter/errorrecord.py new file mode 100644 index 0000000..6b253eb --- /dev/null +++ b/third_party/closure_linter/closure_linter/errorrecord.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright 2012 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""A simple, pickle-serializable class to represent a lint error.""" + + + +import gflags as flags + +from closure_linter import errors +from closure_linter.common import erroroutput + +FLAGS = flags.FLAGS + + +class ErrorRecord(object): + """Record-keeping struct that can be serialized back from a process. + + Attributes: + path: Path to the file. + error_string: Error string for the user. + new_error: Whether this is a "new error" (see errors.NEW_ERRORS). + """ + + def __init__(self, path, error_string, new_error): + self.path = path + self.error_string = error_string + self.new_error = new_error + + +def MakeErrorRecord(path, error): + """Make an error record with correctly formatted error string. + + Errors are not able to be serialized (pickled) over processes because of + their pointers to the complex token/context graph. We use an intermediary + serializable class to pass back just the relevant information. + + Args: + path: Path of file the error was found in. + error: An error.Error instance. + + Returns: + _ErrorRecord instance. + """ + new_error = error.code in errors.NEW_ERRORS + + if FLAGS.unix_mode: + error_string = erroroutput.GetUnixErrorOutput(path, error, new_error) + else: + error_string = erroroutput.GetErrorOutput(error, new_error) + + return ErrorRecord(path, error_string, new_error) diff --git a/third_party/closure_linter/closure_linter/errorrules.py b/third_party/closure_linter/closure_linter/errorrules.py new file mode 100755 index 0000000..afb6fa9 --- /dev/null +++ b/third_party/closure_linter/closure_linter/errorrules.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Linter error rules class for Closure Linter.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import gflags as flags +from closure_linter import errors + + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('jsdoc', True, + 'Whether to report errors for missing JsDoc.') + + +def ShouldReportError(error): + """Whether the given error should be reported. + + Returns: + True for all errors except missing documentation errors. For these, + it returns the value of the jsdoc flag. + """ + return FLAGS.jsdoc or error not in ( + errors.MISSING_PARAMETER_DOCUMENTATION, + errors.MISSING_RETURN_DOCUMENTATION, + errors.MISSING_MEMBER_DOCUMENTATION, + errors.MISSING_PRIVATE, + errors.MISSING_JSDOC_TAG_THIS) diff --git a/third_party/closure_linter/closure_linter/errors.py b/third_party/closure_linter/closure_linter/errors.py new file mode 100755 index 0000000..08c6dbe --- /dev/null +++ b/third_party/closure_linter/closure_linter/errors.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Error codes for JavaScript style checker.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + + +def ByName(name): + """Get the error code for the given error name. + + Args: + name: The name of the error + + Returns: + The error code + """ + return globals()[name] + + +# "File-fatal" errors - these errors stop further parsing of a single file +FILE_NOT_FOUND = -1 +FILE_DOES_NOT_PARSE = -2 + +# Spacing +EXTRA_SPACE = 1 +MISSING_SPACE = 2 +EXTRA_LINE = 3 +MISSING_LINE = 4 +ILLEGAL_TAB = 5 +WRONG_INDENTATION = 6 +WRONG_BLANK_LINE_COUNT = 7 + +# Semicolons +MISSING_SEMICOLON = 10 +MISSING_SEMICOLON_AFTER_FUNCTION = 11 +ILLEGAL_SEMICOLON_AFTER_FUNCTION = 12 +REDUNDANT_SEMICOLON = 13 + +# Miscellaneous +ILLEGAL_PROTOTYPE_MEMBER_VALUE = 100 +LINE_TOO_LONG = 110 +LINE_STARTS_WITH_OPERATOR = 120 +COMMA_AT_END_OF_LITERAL = 121 +MULTI_LINE_STRING = 130 +UNNECESSARY_DOUBLE_QUOTED_STRING = 131 +UNUSED_PRIVATE_MEMBER = 132 + +# Requires, provides +GOOG_REQUIRES_NOT_ALPHABETIZED = 140 +GOOG_PROVIDES_NOT_ALPHABETIZED = 141 +MISSING_GOOG_REQUIRE = 142 +MISSING_GOOG_PROVIDE = 143 +EXTRA_GOOG_REQUIRE = 144 +EXTRA_GOOG_PROVIDE = 145 + +# JsDoc +INVALID_JSDOC_TAG = 200 +INVALID_USE_OF_DESC_TAG = 201 +NO_BUG_NUMBER_AFTER_BUG_TAG = 202 +MISSING_PARAMETER_DOCUMENTATION = 210 +EXTRA_PARAMETER_DOCUMENTATION = 211 +WRONG_PARAMETER_DOCUMENTATION = 212 +MISSING_JSDOC_TAG_TYPE = 213 +MISSING_JSDOC_TAG_DESCRIPTION = 214 +MISSING_JSDOC_PARAM_NAME = 215 +OUT_OF_ORDER_JSDOC_TAG_TYPE = 216 +MISSING_RETURN_DOCUMENTATION = 217 +UNNECESSARY_RETURN_DOCUMENTATION = 218 +MISSING_BRACES_AROUND_TYPE = 219 +MISSING_MEMBER_DOCUMENTATION = 220 +MISSING_PRIVATE = 221 +EXTRA_PRIVATE = 222 +INVALID_OVERRIDE_PRIVATE = 223 +INVALID_INHERIT_DOC_PRIVATE = 224 +MISSING_JSDOC_TAG_THIS = 225 +UNNECESSARY_BRACES_AROUND_INHERIT_DOC = 226 +INVALID_AUTHOR_TAG_DESCRIPTION = 227 +JSDOC_PREFER_QUESTION_TO_PIPE_NULL = 230 +JSDOC_ILLEGAL_QUESTION_WITH_PIPE = 231 +JSDOC_MISSING_OPTIONAL_TYPE = 232 +JSDOC_MISSING_OPTIONAL_PREFIX = 233 +JSDOC_TAG_DESCRIPTION_ENDS_WITH_INVALID_CHARACTER = 240 +# TODO(robbyw): Split this in to more specific syntax problems. +INCORRECT_SUPPRESS_SYNTAX = 250 +INVALID_SUPPRESS_TYPE = 251 +UNNECESSARY_SUPPRESS = 252 + +# File ending +FILE_MISSING_NEWLINE = 300 +FILE_IN_BLOCK = 301 + +# Interfaces +INTERFACE_CONSTRUCTOR_CANNOT_HAVE_PARAMS = 400 +INTERFACE_METHOD_CANNOT_HAVE_CODE = 401 + +# Comments +MISSING_END_OF_SCOPE_COMMENT = 500 +MALFORMED_END_OF_SCOPE_COMMENT = 501 + +# ActionScript specific errors: +# TODO(user): move these errors to their own file and move all JavaScript +# specific errors to their own file as well. +# All ActionScript specific errors should have error number at least 1000. +FUNCTION_MISSING_RETURN_TYPE = 1132 +PARAMETER_MISSING_TYPE = 1133 +VAR_MISSING_TYPE = 1134 +PARAMETER_MISSING_DEFAULT_VALUE = 1135 +IMPORTS_NOT_ALPHABETIZED = 1140 +IMPORT_CONTAINS_WILDCARD = 1141 +UNUSED_IMPORT = 1142 +INVALID_TRACE_SEVERITY_LEVEL = 1250 +MISSING_TRACE_SEVERITY_LEVEL = 1251 +MISSING_TRACE_MESSAGE = 1252 +REMOVE_TRACE_BEFORE_SUBMIT = 1253 +REMOVE_COMMENT_BEFORE_SUBMIT = 1254 +# End of list of ActionScript specific errors. + +NEW_ERRORS = frozenset([ + # Errors added after 2.0.2: + WRONG_INDENTATION, + MISSING_SEMICOLON, + # Errors added after 2.3.4: + MISSING_END_OF_SCOPE_COMMENT, + MALFORMED_END_OF_SCOPE_COMMENT, + UNUSED_PRIVATE_MEMBER, + # Errors added after 2.3.5: + ]) diff --git a/third_party/closure_linter/closure_linter/fixjsstyle.py b/third_party/closure_linter/closure_linter/fixjsstyle.py new file mode 100755 index 0000000..c23f6b7 --- /dev/null +++ b/third_party/closure_linter/closure_linter/fixjsstyle.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# python2.6 for command-line runs using p4lib. pylint: disable-msg=C6301 +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Automatically fix simple style guide violations.""" + +__author__ = 'robbyw@google.com (Robert Walker)' + +import sys + +import gflags as flags +from closure_linter import checker +from closure_linter import error_fixer +from closure_linter.common import simplefileflags as fileflags + +FLAGS = flags.FLAGS +flags.DEFINE_list('additional_extensions', None, 'List of additional file ' + 'extensions (not js) that should be treated as ' + 'JavaScript files.') + + +def main(argv = None): + """Main function. + + Args: + argv: Sequence of command line arguments. + """ + if argv is None: + argv = flags.FLAGS(sys.argv) + + suffixes = ['.js'] + if FLAGS.additional_extensions: + suffixes += ['.%s' % ext for ext in FLAGS.additional_extensions] + + files = fileflags.GetFileList(argv, 'JavaScript', suffixes) + + style_checker = checker.JavaScriptStyleChecker(error_fixer.ErrorFixer()) + + # Check the list of files. + for filename in files: + style_checker.Check(filename) + +if __name__ == '__main__': + main() diff --git a/third_party/closure_linter/closure_linter/fixjsstyle_test.py b/third_party/closure_linter/closure_linter/fixjsstyle_test.py new file mode 100755 index 0000000..5096568 --- /dev/null +++ b/third_party/closure_linter/closure_linter/fixjsstyle_test.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Medium tests for the gpylint auto-fixer.""" + +__author__ = 'robbyw@google.com (Robby Walker)' + +import StringIO + +import gflags as flags +import unittest as googletest +from closure_linter import checker +from closure_linter import error_fixer + +_RESOURCE_PREFIX = 'closure_linter/testdata' + +flags.FLAGS.strict = True +flags.FLAGS.limited_doc_files = ('dummy.js', 'externs.js') +flags.FLAGS.closurized_namespaces = ('goog', 'dummy') + + +class FixJsStyleTest(googletest.TestCase): + """Test case to for gjslint auto-fixing.""" + + def testFixJsStyle(self): + test_cases = [['fixjsstyle.in.js', 'fixjsstyle.out.js'], + ['indentation.js', 'fixjsstyle.indentation.out.js']] + for [running_input_file, running_output_file] in test_cases: + input_filename = None + golden_filename = None + current_filename = None + try: + input_filename = '%s/%s' % (_RESOURCE_PREFIX, running_input_file) + current_filename = input_filename + + golden_filename = '%s/%s' % (_RESOURCE_PREFIX, running_output_file) + current_filename = golden_filename + except IOError, ex: + raise IOError('Could not find testdata resource for %s: %s' % + (current_filename, ex)) + + if running_input_file == 'fixjsstyle.in.js': + with open(input_filename) as f: + for line in f: + # Go to last line. + pass + self.assertTrue(line == line.rstrip(), '%s file should not end ' + 'with a new line.' % (input_filename)) + + # Autofix the file, sending output to a fake file. + actual = StringIO.StringIO() + style_checker = checker.JavaScriptStyleChecker( + error_fixer.ErrorFixer(actual)) + style_checker.Check(input_filename) + + # Now compare the files. + actual.seek(0) + expected = open(golden_filename, 'r') + + self.assertEqual(actual.readlines(), expected.readlines()) + + def testMissingExtraAndUnsortedRequires(self): + """Tests handling of missing extra and unsorted goog.require statements.""" + original = [ + "goog.require('dummy.aa');", + "goog.require('dummy.Cc');", + "goog.require('dummy.Dd');", + "", + "var x = new dummy.Bb();", + "dummy.Cc.someMethod();", + "dummy.aa.someMethod();", + ] + + expected = [ + "goog.require('dummy.Bb');", + "goog.require('dummy.Cc');", + "goog.require('dummy.aa');", + "", + "var x = new dummy.Bb();", + "dummy.Cc.someMethod();", + "dummy.aa.someMethod();", + ] + + self._AssertFixes(original, expected) + + def testMissingExtraAndUnsortedProvides(self): + """Tests handling of missing extra and unsorted goog.provide statements.""" + original = [ + "goog.provide('dummy.aa');", + "goog.provide('dummy.Cc');", + "goog.provide('dummy.Dd');", + "", + "dummy.Cc = function() {};", + "dummy.Bb = function() {};", + "dummy.aa.someMethod = function();", + ] + + expected = [ + "goog.provide('dummy.Bb');", + "goog.provide('dummy.Cc');", + "goog.provide('dummy.aa');", + "", + "dummy.Cc = function() {};", + "dummy.Bb = function() {};", + "dummy.aa.someMethod = function();", + ] + + self._AssertFixes(original, expected) + + def testNoRequires(self): + """Tests positioning of missing requires without existing requires.""" + original = [ + "goog.provide('dummy.Something');", + "", + "dummy.Something = function() {};", + "", + "var x = new dummy.Bb();", + ] + + expected = [ + "goog.provide('dummy.Something');", + "", + "goog.require('dummy.Bb');", + "", + "dummy.Something = function() {};", + "", + "var x = new dummy.Bb();", + ] + + self._AssertFixes(original, expected) + + def testNoProvides(self): + """Tests positioning of missing provides without existing provides.""" + original = [ + "goog.require('dummy.Bb');", + "", + "dummy.Something = function() {};", + "", + "var x = new dummy.Bb();", + ] + + expected = [ + "goog.provide('dummy.Something');", + "", + "goog.require('dummy.Bb');", + "", + "dummy.Something = function() {};", + "", + "var x = new dummy.Bb();", + ] + + self._AssertFixes(original, expected) + + def testGoogScopeIndentation(self): + """Tests Handling a typical end-of-scope indentation fix.""" + original = [ + 'goog.scope(function() {', + ' // TODO(brain): Take over the world.', + '}); // goog.scope', + ] + + expected = [ + 'goog.scope(function() {', + '// TODO(brain): Take over the world.', + '}); // goog.scope', + ] + + self._AssertFixes(original, expected) + + def testMissingEndOfScopeComment(self): + """Tests Handling a missing comment at end of goog.scope.""" + original = [ + 'goog.scope(function() {', + '});', + ] + + expected = [ + 'goog.scope(function() {', + '}); // goog.scope', + ] + + self._AssertFixes(original, expected) + + def testMissingEndOfScopeCommentWithOtherComment(self): + """Tests handling an irrelevant comment at end of goog.scope.""" + original = [ + 'goog.scope(function() {', + "}); // I don't belong here!", + ] + + expected = [ + 'goog.scope(function() {', + '}); // goog.scope', + ] + + self._AssertFixes(original, expected) + + def testMalformedEndOfScopeComment(self): + """Tests Handling a malformed comment at end of goog.scope.""" + original = [ + 'goog.scope(function() {', + '}); // goog.scope FTW', + ] + + expected = [ + 'goog.scope(function() {', + '}); // goog.scope', + ] + + self._AssertFixes(original, expected) + + def _AssertFixes(self, original, expected): + """Asserts that the error fixer corrects original to expected.""" + original = self._GetHeader() + original + expected = self._GetHeader() + expected + + actual = StringIO.StringIO() + style_checker = checker.JavaScriptStyleChecker( + error_fixer.ErrorFixer(actual)) + style_checker.CheckLines('testing.js', original, False) + actual.seek(0) + + expected = [x + '\n' for x in expected] + + self.assertListEqual(actual.readlines(), expected) + + def _GetHeader(self): + """Returns a fake header for a JavaScript file.""" + return [ + "// Copyright 2011 Google Inc. All Rights Reserved.", + "", + "/**", + " * @fileoverview Fake file overview.", + " * @author fake@google.com (Fake Person)", + " */", + "" + ] + + +if __name__ == '__main__': + googletest.main() diff --git a/third_party/closure_linter/closure_linter/full_test.py b/third_party/closure_linter/closure_linter/full_test.py new file mode 100755 index 0000000..fde9c70 --- /dev/null +++ b/third_party/closure_linter/closure_linter/full_test.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Full regression-type (Medium) tests for gjslint. + +Tests every error that can be thrown by gjslint. Based heavily on +devtools/javascript/gpylint/full_test.py +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re +import os +import sys +import unittest + +import gflags as flags +import unittest as googletest + +from closure_linter import checker +from closure_linter import errors +from closure_linter import error_check +from closure_linter.common import filetestcase + +_RESOURCE_PREFIX = 'closure_linter/testdata' + +flags.FLAGS.strict = True +flags.FLAGS.custom_jsdoc_tags = ('customtag', 'requires') +flags.FLAGS.closurized_namespaces = ('goog', 'dummy') +flags.FLAGS.limited_doc_files = ('externs.js', 'dummy.js', + 'limited_doc_checks.js') +flags.FLAGS.jslint_error = error_check.Rule.ALL + +# List of files under testdata to test. +# We need to list files explicitly since pyglib can't list directories. +# TODO(user): Figure out how to list the directory. +_TEST_FILES = [ + 'all_js_wrapped.js', + 'blank_lines.js', + 'ends_with_block.js', + 'externs.js', + 'externs_jsdoc.js', + 'goog_scope.js', + 'html_parse_error.html', + 'indentation.js', + 'interface.js', + 'jsdoc.js', + 'limited_doc_checks.js', + 'minimal.js', + 'other.js', + 'provide_blank.js', + 'provide_extra.js', + 'provide_missing.js', + 'require_all_caps.js', + 'require_blank.js', + 'require_extra.js', + 'require_function.js', + 'require_function_missing.js', + 'require_function_through_both.js', + 'require_function_through_namespace.js', + 'require_interface.js', + 'require_interface_base.js', + 'require_lower_case.js', + 'require_missing.js', + 'require_numeric.js', + 'require_provide_blank.js', + 'require_provide_ok.js', + 'require_provide_missing.js', + 'simple.html', + 'spaces.js', + 'tokenizer.js', + 'unparseable.js', + 'unused_private_members.js', + 'utf8.html' + ] + + +class GJsLintTestSuite(unittest.TestSuite): + """Test suite to run a GJsLintTest for each of several files. + + If sys.argv[1:] is non-empty, it is interpreted as a list of filenames in + testdata to test. Otherwise, _TEST_FILES is used. + """ + + def __init__(self, tests=()): + unittest.TestSuite.__init__(self, tests) + + argv = sys.argv and sys.argv[1:] or [] + if argv: + test_files = argv + else: + test_files = _TEST_FILES + for test_file in test_files: + resource_path = os.path.join(_RESOURCE_PREFIX, test_file) + self.addTest(filetestcase.AnnotatedFileTestCase(resource_path, + checker.GJsLintRunner(), errors.ByName)) + +if __name__ == '__main__': + # Don't let main parse args; it happens in the TestSuite. + googletest.main(argv=sys.argv[0:1], defaultTest='GJsLintTestSuite') diff --git a/third_party/closure_linter/closure_linter/gjslint.py b/third_party/closure_linter/closure_linter/gjslint.py new file mode 100755 index 0000000..dcfe09f --- /dev/null +++ b/third_party/closure_linter/closure_linter/gjslint.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python +# python2.6 for command-line runs using p4lib. pylint: disable-msg=C6301 +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Checks JavaScript files for common style guide violations. + +gjslint.py is designed to be used as a PRESUBMIT script to check for javascript +style guide violations. As of now, it checks for the following violations: + + * Missing and extra spaces + * Lines longer than 80 characters + * Missing newline at end of file + * Missing semicolon after function declaration + * Valid JsDoc including parameter matching + +Someday it will validate to the best of its ability against the entirety of the +JavaScript style guide. + +This file is a front end that parses arguments and flags. The core of the code +is in tokenizer.py and checker.py. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import functools +import itertools +import sys +import time + +import gflags as flags + +from closure_linter import checker +from closure_linter import errorrecord +from closure_linter.common import erroraccumulator +from closure_linter.common import simplefileflags as fileflags + +# Attempt import of multiprocessing (should be available in Python 2.6 and up). +try: + # pylint: disable-msg=C6204 + import multiprocessing +except ImportError: + multiprocessing = None + +FLAGS = flags.FLAGS +flags.DEFINE_boolean('unix_mode', False, + 'Whether to emit warnings in standard unix format.') +flags.DEFINE_boolean('beep', True, 'Whether to beep when errors are found.') +flags.DEFINE_boolean('time', False, 'Whether to emit timing statistics.') +flags.DEFINE_boolean('check_html', False, + 'Whether to check javascript in html files.') +flags.DEFINE_boolean('summary', False, + 'Whether to show an error count summary.') +flags.DEFINE_list('additional_extensions', None, 'List of additional file ' + 'extensions (not js) that should be treated as ' + 'JavaScript files.') +flags.DEFINE_boolean('multiprocess', False, + 'Whether to parallalize linting using the ' + 'multiprocessing module. Disabled by default.') + + +GJSLINT_ONLY_FLAGS = ['--unix_mode', '--beep', '--nobeep', '--time', + '--check_html', '--summary'] + + +def _MultiprocessCheckPaths(paths): + """Run _CheckPath over mutltiple processes. + + Tokenization, passes, and checks are expensive operations. Running in a + single process, they can only run on one CPU/core. Instead, + shard out linting over all CPUs with multiprocessing to parallelize. + + Args: + paths: paths to check. + + Yields: + errorrecord.ErrorRecords for any found errors. + """ + + pool = multiprocessing.Pool() + + for results in pool.imap(_CheckPath, paths): + for record in results: + yield record + + pool.close() + pool.join() + + +def _CheckPaths(paths): + """Run _CheckPath on all paths in one thread. + + Args: + paths: paths to check. + + Yields: + errorrecord.ErrorRecords for any found errors. + """ + + for path in paths: + results = _CheckPath(path) + for record in results: + yield record + + +def _CheckPath(path): + """Check a path and return any errors. + + Args: + path: paths to check. + + Returns: + A list of errorrecord.ErrorRecords for any found errors. + """ + + error_accumulator = erroraccumulator.ErrorAccumulator() + style_checker = checker.JavaScriptStyleChecker(error_accumulator) + style_checker.Check(path) + + # Return any errors as error records. + make_error_record = functools.partial(errorrecord.MakeErrorRecord, path) + return map(make_error_record, error_accumulator.GetErrors()) + + +def _GetFilePaths(argv): + suffixes = ['.js'] + if FLAGS.additional_extensions: + suffixes += ['.%s' % ext for ext in FLAGS.additional_extensions] + if FLAGS.check_html: + suffixes += ['.html', '.htm'] + return fileflags.GetFileList(argv, 'JavaScript', suffixes) + + +# Error printing functions + + +def _PrintFileSummary(paths, records): + """Print a detailed summary of the number of errors in each file.""" + + paths = list(paths) + paths.sort() + + for path in paths: + path_errors = [e for e in records if e.path == path] + print '%s: %d' % (path, len(path_errors)) + + +def _PrintFileSeparator(path): + print '----- FILE : %s -----' % path + + +def _PrintSummary(paths, error_records): + """Print a summary of the number of errors and files.""" + + error_count = len(error_records) + all_paths = set(paths) + all_paths_count = len(all_paths) + + if error_count is 0: + print '%d files checked, no errors found.' % all_paths_count + + new_error_count = len([e for e in error_records if e.new_error]) + + error_paths = set([e.path for e in error_records]) + error_paths_count = len(error_paths) + no_error_paths_count = all_paths_count - error_paths_count + + if error_count or new_error_count: + print ('Found %d errors, including %d new errors, in %d files ' + '(%d files OK).' % ( + error_count, + new_error_count, + error_paths_count, + no_error_paths_count)) + + +def _PrintErrorRecords(error_records): + """Print error records strings in the expected format.""" + + current_path = None + for record in error_records: + + if current_path != record.path: + current_path = record.path + if not FLAGS.unix_mode: + _PrintFileSeparator(current_path) + + print record.error_string + + +def _FormatTime(t): + """Formats a duration as a human-readable string. + + Args: + t: A duration in seconds. + + Returns: + A formatted duration string. + """ + if t < 1: + return '%dms' % round(t * 1000) + else: + return '%.2fs' % t + + +def main(argv = None): + """Main function. + + Args: + argv: Sequence of command line arguments. + """ + if argv is None: + argv = flags.FLAGS(sys.argv) + + if FLAGS.time: + start_time = time.time() + + suffixes = ['.js'] + if FLAGS.additional_extensions: + suffixes += ['.%s' % ext for ext in FLAGS.additional_extensions] + if FLAGS.check_html: + suffixes += ['.html', '.htm'] + paths = fileflags.GetFileList(argv, 'JavaScript', suffixes) + + if FLAGS.multiprocess: + records_iter = _MultiprocessCheckPaths(paths) + else: + records_iter = _CheckPaths(paths) + + records_iter, records_iter_copy = itertools.tee(records_iter, 2) + _PrintErrorRecords(records_iter_copy) + + error_records = list(records_iter) + _PrintSummary(paths, error_records) + + exit_code = 0 + + # If there are any errors + if error_records: + exit_code += 1 + + # If there are any new errors + if [r for r in error_records if r.new_error]: + exit_code += 2 + + if exit_code: + if FLAGS.summary: + _PrintFileSummary(paths, error_records) + + if FLAGS.beep: + # Make a beep noise. + sys.stdout.write(chr(7)) + + # Write out instructions for using fixjsstyle script to fix some of the + # reported errors. + fix_args = [] + for flag in sys.argv[1:]: + for f in GJSLINT_ONLY_FLAGS: + if flag.startswith(f): + break + else: + fix_args.append(flag) + + print """ +Some of the errors reported by GJsLint may be auto-fixable using the script +fixjsstyle. Please double check any changes it makes and report any bugs. The +script can be run by executing: + +fixjsstyle %s """ % ' '.join(fix_args) + + if FLAGS.time: + print 'Done in %s.' % _FormatTime(time.time() - start_time) + + sys.exit(exit_code) + + +if __name__ == '__main__': + main() diff --git a/third_party/closure_linter/closure_linter/indentation.py b/third_party/closure_linter/closure_linter/indentation.py new file mode 100755 index 0000000..cb97853 --- /dev/null +++ b/third_party/closure_linter/closure_linter/indentation.py @@ -0,0 +1,589 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods for checking EcmaScript files for indentation issues.""" + +__author__ = ('robbyw@google.com (Robert Walker)') + +from closure_linter import ecmametadatapass +from closure_linter import errors +from closure_linter import javascripttokens +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import position + +import gflags as flags + +flags.DEFINE_boolean('debug_indentation', False, + 'Whether to print debugging information for indentation.') + + +# Shorthand +Context = ecmametadatapass.EcmaContext +Error = error.Error +Position = position.Position +Type = javascripttokens.JavaScriptTokenType + + +# The general approach: +# +# 1. Build a stack of tokens that can affect indentation. +# For each token, we determine if it is a block or continuation token. +# Some tokens need to be temporarily overwritten in case they are removed +# before the end of the line. +# Much of the work here is determining which tokens to keep on the stack +# at each point. Operators, for example, should be removed once their +# expression or line is gone, while parentheses must stay until the matching +# end parentheses is found. +# +# 2. Given that stack, determine the allowable indentations. +# Due to flexible indentation rules in JavaScript, there may be many +# allowable indentations for each stack. We follows the general +# "no false positives" approach of GJsLint and build the most permissive +# set possible. + + +class TokenInfo(object): + """Stores information about a token. + + Attributes: + token: The token + is_block: Whether the token represents a block indentation. + is_transient: Whether the token should be automatically removed without + finding a matching end token. + overridden_by: TokenInfo for a token that overrides the indentation that + this token would require. + is_permanent_override: Whether the override on this token should persist + even after the overriding token is removed from the stack. For example: + x([ + 1], + 2); + needs this to be set so the last line is not required to be a continuation + indent. + line_number: The effective line number of this token. Will either be the + actual line number or the one before it in the case of a mis-wrapped + operator. + """ + + def __init__(self, token, is_block=False): + """Initializes a TokenInfo object. + + Args: + token: The token + is_block: Whether the token represents a block indentation. + """ + self.token = token + self.overridden_by = None + self.is_permanent_override = False + self.is_block = is_block + self.is_transient = not is_block and not token.type in ( + Type.START_PAREN, Type.START_PARAMETERS) + self.line_number = token.line_number + + def __repr__(self): + result = '\n %s' % self.token + if self.overridden_by: + result = '%s OVERRIDDEN [by "%s"]' % ( + result, self.overridden_by.token.string) + result += ' {is_block: %s, is_transient: %s}' % ( + self.is_block, self.is_transient) + return result + + +class IndentationRules(object): + """EmcaScript indentation rules. + + Can be used to find common indentation errors in JavaScript, ActionScript and + other Ecma like scripting languages. + """ + + def __init__(self): + """Initializes the IndentationRules checker.""" + self._stack = [] + + # Map from line number to number of characters it is off in indentation. + self._start_index_offset = {} + + def Finalize(self): + if self._stack: + old_stack = self._stack + self._stack = [] + raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" % + old_stack) + + def CheckToken(self, token, state): + """Checks a token for indentation errors. + + Args: + token: The current token under consideration + state: Additional information about the current tree state + + Returns: + An error array [error code, error string, error token] if the token is + improperly indented, or None if indentation is correct. + """ + + token_type = token.type + indentation_errors = [] + stack = self._stack + is_first = self._IsFirstNonWhitespaceTokenInLine(token) + + # Add tokens that could decrease indentation before checking. + if token_type == Type.END_PAREN: + self._PopTo(Type.START_PAREN) + + elif token_type == Type.END_PARAMETERS: + self._PopTo(Type.START_PARAMETERS) + + elif token_type == Type.END_BRACKET: + self._PopTo(Type.START_BRACKET) + + elif token_type == Type.END_BLOCK: + start_token = self._PopTo(Type.START_BLOCK) + # Check for required goog.scope comment. + if start_token: + goog_scope = self._GoogScopeOrNone(start_token.token) + if goog_scope is not None: + if not token.line.endswith('; // goog.scope\n'): + if (token.line.find('//') > -1 and + token.line.find('goog.scope') > + token.line.find('//')): + indentation_errors.append([ + errors.MALFORMED_END_OF_SCOPE_COMMENT, + ('Malformed end of goog.scope comment. Please use the ' + 'exact following syntax to close the scope:\n' + '}); // goog.scope'), + token, + Position(token.start_index, token.length)]) + else: + indentation_errors.append([ + errors.MISSING_END_OF_SCOPE_COMMENT, + ('Missing comment for end of goog.scope which opened at line ' + '%d. End the scope with:\n' + '}); // goog.scope' % + (start_token.line_number)), + token, + Position(token.start_index, token.length)]) + + elif token_type == Type.KEYWORD and token.string in ('case', 'default'): + self._Add(self._PopTo(Type.START_BLOCK)) + + elif is_first and token.string == '.': + # This token should have been on the previous line, so treat it as if it + # was there. + info = TokenInfo(token) + info.line_number = token.line_number - 1 + self._Add(info) + + elif token_type == Type.SEMICOLON: + self._PopTransient() + + not_binary_operator = (token_type != Type.OPERATOR or + token.metadata.IsUnaryOperator()) + not_dot = token.string != '.' + if is_first and not_binary_operator and not_dot and token.type not in ( + Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT): + if flags.FLAGS.debug_indentation: + print 'Line #%d: stack %r' % (token.line_number, stack) + + # Ignore lines that start in JsDoc since we don't check them properly yet. + # TODO(robbyw): Support checking JsDoc indentation. + # Ignore lines that start as multi-line strings since indentation is N/A. + # Ignore lines that start with operators since we report that already. + # Ignore lines with tabs since we report that already. + expected = self._GetAllowableIndentations() + actual = self._GetActualIndentation(token) + + # Special case comments describing else, case, and default. Allow them + # to outdent to the parent block. + if token_type in Type.COMMENT_TYPES: + next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if next_code and next_code.type == Type.END_BLOCK: + next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES) + if next_code and next_code.string in ('else', 'case', 'default'): + # TODO(robbyw): This almost certainly introduces false negatives. + expected |= self._AddToEach(expected, -2) + + if actual >= 0 and actual not in expected: + expected = sorted(expected) + indentation_errors.append([ + errors.WRONG_INDENTATION, + 'Wrong indentation: expected any of {%s} but got %d' % ( + ', '.join( + ['%d' % x for x in expected]), actual), + token, + Position(actual, expected[0])]) + self._start_index_offset[token.line_number] = expected[0] - actual + + # Add tokens that could increase indentation. + if token_type == Type.START_BRACKET: + self._Add(TokenInfo(token=token, + is_block=token.metadata.context.type == Context.ARRAY_LITERAL)) + + elif token_type == Type.START_BLOCK or token.metadata.is_implied_block: + self._Add(TokenInfo(token=token, is_block=True)) + + elif token_type in (Type.START_PAREN, Type.START_PARAMETERS): + self._Add(TokenInfo(token=token, is_block=False)) + + elif token_type == Type.KEYWORD and token.string == 'return': + self._Add(TokenInfo(token)) + + elif not token.IsLastInLine() and ( + token.IsAssignment() or token.IsOperator('?')): + self._Add(TokenInfo(token=token)) + + # Handle implied block closes. + if token.metadata.is_implied_block_close: + self._PopToImpliedBlock() + + # Add some tokens only if they appear at the end of the line. + is_last = self._IsLastCodeInLine(token) + if is_last: + if token_type == Type.OPERATOR: + if token.string == ':': + if (stack and stack[-1].token.string == '?'): + # When a ternary : is on a different line than its '?', it doesn't + # add indentation. + if (token.line_number == stack[-1].token.line_number): + self._Add(TokenInfo(token)) + elif token.metadata.context.type == Context.CASE_BLOCK: + # Pop transient tokens from say, line continuations, e.g., + # case x. + # y: + # Want to pop the transient 4 space continuation indent. + self._PopTransient() + # Starting the body of the case statement, which is a type of + # block. + self._Add(TokenInfo(token=token, is_block=True)) + elif token.metadata.context.type == Context.LITERAL_ELEMENT: + # When in an object literal, acts as operator indicating line + # continuations. + self._Add(TokenInfo(token)) + pass + else: + # ':' might also be a statement label, no effect on indentation in + # this case. + pass + + elif token.string != ',': + self._Add(TokenInfo(token)) + else: + # The token is a comma. + if token.metadata.context.type == Context.VAR: + self._Add(TokenInfo(token)) + elif token.metadata.context.type != Context.PARAMETERS: + self._PopTransient() + + elif (token.string.endswith('.') + and token_type in (Type.IDENTIFIER, Type.NORMAL)): + self._Add(TokenInfo(token)) + elif token_type == Type.PARAMETERS and token.string.endswith(','): + # Parameter lists. + self._Add(TokenInfo(token)) + elif token.metadata.is_implied_semicolon: + self._PopTransient() + elif token.IsAssignment(): + self._Add(TokenInfo(token)) + + return indentation_errors + + def _AddToEach(self, original, amount): + """Returns a new set with the given amount added to each element. + + Args: + original: The original set of numbers + amount: The amount to add to each element + + Returns: + A new set containing each element of the original set added to the amount. + """ + return set([x + amount for x in original]) + + _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS, + Type.START_BRACKET) + + _HARD_STOP_STRINGS = ('return', '?') + + def _IsHardStop(self, token): + """Determines if the given token can have a hard stop after it. + + Hard stops are indentations defined by the position of another token as in + indentation lined up with return, (, [, and ?. + """ + return (token.type in self._HARD_STOP_TYPES or + token.string in self._HARD_STOP_STRINGS or + token.IsAssignment()) + + def _GetAllowableIndentations(self): + """Computes the set of allowable indentations. + + Returns: + The set of allowable indentations, given the current stack. + """ + expected = set([0]) + hard_stops = set([]) + + # Whether the tokens are still in the same continuation, meaning additional + # indentation is optional. As an example: + # x = 5 + + # 6 + + # 7; + # The second '+' does not add any required indentation. + in_same_continuation = False + + for token_info in self._stack: + token = token_info.token + + # Handle normal additive indentation tokens. + if not token_info.overridden_by and token.string != 'return': + if token_info.is_block: + expected = self._AddToEach(expected, 2) + hard_stops = self._AddToEach(hard_stops, 2) + in_same_continuation = False + elif in_same_continuation: + expected |= self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + else: + expected = self._AddToEach(expected, 4) + hard_stops |= self._AddToEach(hard_stops, 4) + in_same_continuation = True + + # Handle hard stops after (, [, return, =, and ? + if self._IsHardStop(token): + override_is_hard_stop = (token_info.overridden_by and + self._IsHardStop(token_info.overridden_by.token)) + if not override_is_hard_stop: + start_index = token.start_index + if token.line_number in self._start_index_offset: + start_index += self._start_index_offset[token.line_number] + if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and + not token_info.overridden_by): + hard_stops.add(start_index + 1) + + elif token.string == 'return' and not token_info.overridden_by: + hard_stops.add(start_index + 7) + + elif (token.type == Type.START_BRACKET): + hard_stops.add(start_index + 1) + + elif token.IsAssignment(): + hard_stops.add(start_index + len(token.string) + 1) + + elif token.IsOperator('?') and not token_info.overridden_by: + hard_stops.add(start_index + 2) + + return (expected | hard_stops) or set([0]) + + def _GetActualIndentation(self, token): + """Gets the actual indentation of the line containing the given token. + + Args: + token: Any token on the line. + + Returns: + The actual indentation of the line containing the given token. Returns + -1 if this line should be ignored due to the presence of tabs. + """ + # Move to the first token in the line + token = tokenutil.GetFirstTokenInSameLine(token) + + # If it is whitespace, it is the indentation. + if token.type == Type.WHITESPACE: + if token.string.find('\t') >= 0: + return -1 + else: + return len(token.string) + elif token.type == Type.PARAMETERS: + return len(token.string) - len(token.string.lstrip()) + else: + return 0 + + def _IsFirstNonWhitespaceTokenInLine(self, token): + """Determines if the given token is the first non-space token on its line. + + Args: + token: The token. + + Returns: + True if the token is the first non-whitespace token on its line. + """ + if token.type in (Type.WHITESPACE, Type.BLANK_LINE): + return False + if token.IsFirstInLine(): + return True + return (token.previous and token.previous.IsFirstInLine() and + token.previous.type == Type.WHITESPACE) + + def _IsLastCodeInLine(self, token): + """Determines if the given token is the last code token on its line. + + Args: + token: The token. + + Returns: + True if the token is the last code token on its line. + """ + if token.type in Type.NON_CODE_TYPES: + return False + start_token = token + while True: + token = token.next + if not token or token.line_number != start_token.line_number: + return True + if token.type not in Type.NON_CODE_TYPES: + return False + + def _GoogScopeOrNone(self, token): + """Determines if the given START_BLOCK is part of a goog.scope statement. + + Args: + token: A token of type START_BLOCK. + + Returns: + The goog.scope function call token, or None if such call doesn't exist. + """ + # Search for a goog.scope statement, which will be 5 tokens before the + # block. Illustration of the tokens found prior to the start block: + # goog.scope(function() { + # 5 4 3 21 ^ + + maybe_goog_scope = token + for unused_i in xrange(5): + maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and + maybe_goog_scope.previous else None) + if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope': + return maybe_goog_scope + + def _Add(self, token_info): + """Adds the given token info to the stack. + + Args: + token_info: The token information to add. + """ + if self._stack and self._stack[-1].token == token_info.token: + # Don't add the same token twice. + return + + if token_info.is_block or token_info.token.type == Type.START_PAREN: + token_info.overridden_by = self._GoogScopeOrNone(token_info.token) + index = 1 + while index <= len(self._stack): + stack_info = self._stack[-index] + stack_token = stack_info.token + + if stack_info.line_number == token_info.line_number: + # In general, tokens only override each other when they are on + # the same line. + stack_info.overridden_by = token_info + if (token_info.token.type == Type.START_BLOCK and + (stack_token.IsAssignment() or + stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))): + # Multi-line blocks have lasting overrides, as in: + # callFn({ + # a: 10 + # }, + # 30); + close_block = token_info.token.metadata.context.end_token + stack_info.is_permanent_override = \ + close_block.line_number != token_info.token.line_number + elif (token_info.token.type == Type.START_BLOCK and + token_info.token.metadata.context.type == Context.BLOCK and + (stack_token.IsAssignment() or + stack_token.type == Type.IDENTIFIER)): + # When starting a function block, the override can transcend lines. + # For example + # long.long.name = function( + # a) { + # In this case the { and the = are on different lines. But the + # override should still apply. + stack_info.overridden_by = token_info + stack_info.is_permanent_override = True + else: + break + index += 1 + + self._stack.append(token_info) + + def _Pop(self): + """Pops the top token from the stack. + + Returns: + The popped token info. + """ + token_info = self._stack.pop() + if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET): + # Remove any temporary overrides. + self._RemoveOverrides(token_info) + else: + # For braces and brackets, which can be object and array literals, remove + # overrides when the literal is closed on the same line. + token_check = token_info.token + same_type = token_check.type + goal_type = None + if token_info.token.type == Type.START_BRACKET: + goal_type = Type.END_BRACKET + else: + goal_type = Type.END_BLOCK + line_number = token_info.token.line_number + count = 0 + while token_check and token_check.line_number == line_number: + if token_check.type == goal_type: + count -= 1 + if not count: + self._RemoveOverrides(token_info) + break + if token_check.type == same_type: + count += 1 + token_check = token_check.next + return token_info + + def _PopToImpliedBlock(self): + """Pops the stack until an implied block token is found.""" + while not self._Pop().token.metadata.is_implied_block: + pass + + def _PopTo(self, stop_type): + """Pops the stack until a token of the given type is popped. + + Args: + stop_type: The type of token to pop to. + + Returns: + The token info of the given type that was popped. + """ + last = None + while True: + last = self._Pop() + if last.token.type == stop_type: + break + return last + + def _RemoveOverrides(self, token_info): + """Marks any token that was overridden by this token as active again. + + Args: + token_info: The token that is being removed from the stack. + """ + for stack_token in self._stack: + if (stack_token.overridden_by == token_info and + not stack_token.is_permanent_override): + stack_token.overridden_by = None + + def _PopTransient(self): + """Pops all transient tokens - i.e. not blocks, literals, or parens.""" + while self._stack and self._stack[-1].is_transient: + self._Pop() diff --git a/third_party/closure_linter/closure_linter/javascriptlintrules.py b/third_party/closure_linter/closure_linter/javascriptlintrules.py new file mode 100755 index 0000000..d72fd56 --- /dev/null +++ b/third_party/closure_linter/closure_linter/javascriptlintrules.py @@ -0,0 +1,538 @@ +#!/usr/bin/env python +# +# Copyright 2011 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Methods for checking JS files for common style guide violations. + +These style guide violations should only apply to JavaScript and not an Ecma +scripting languages. +""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)', + 'jacobr@google.com (Jacob Richman)') + +import re +from sets import Set +from closure_linter import ecmalintrules +from closure_linter import error_check +from closure_linter import errors +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import requireprovidesorter +from closure_linter import tokenutil +from closure_linter.common import error +from closure_linter.common import position + +# Shorthand +Error = error.Error +Position = position.Position +Rule = error_check.Rule +Type = javascripttokens.JavaScriptTokenType + + +class JavaScriptLintRules(ecmalintrules.EcmaScriptLintRules): + """JavaScript lint rules that catch JavaScript specific style errors.""" + + def __init__(self, namespaces_info): + """Initializes a JavaScriptLintRules instance.""" + ecmalintrules.EcmaScriptLintRules.__init__(self) + self._namespaces_info = namespaces_info + self._declared_private_member_tokens = {} + self._declared_private_members = Set() + self._used_private_members = Set() + + def HandleMissingParameterDoc(self, token, param_name): + """Handle errors associated with a parameter missing a param tag.""" + self._HandleError(errors.MISSING_PARAMETER_DOCUMENTATION, + 'Missing docs for parameter: "%s"' % param_name, token) + + def __ContainsRecordType(self, token): + """Check whether the given token contains a record type. + + Args: + token: The token being checked + + Returns: + True if the token contains a record type, False otherwise. + """ + # If we see more than one left-brace in the string of an annotation token, + # then there's a record type in there. + return ( + token and token.type == Type.DOC_FLAG and + token.attached_object.type is not None and + token.attached_object.type.find('{') != token.string.rfind('{')) + + def CheckToken(self, token, state): + """Checks a token, given the current parser_state, for warnings and errors. + + Args: + token: The current token under consideration + state: parser_state object that indicates the current state in the page + """ + if self.__ContainsRecordType(token): + # We should bail out and not emit any warnings for this annotation. + # TODO(nicksantos): Support record types for real. + state.GetDocComment().Invalidate() + return + + # Call the base class's CheckToken function. + super(JavaScriptLintRules, self).CheckToken(token, state) + + # Store some convenience variables + namespaces_info = self._namespaces_info + + if error_check.ShouldCheck(Rule.UNUSED_PRIVATE_MEMBERS): + # Find all assignments to private members. + if token.type == Type.SIMPLE_LVALUE: + identifier = token.string + if identifier.endswith('_') and not identifier.endswith('__'): + doc_comment = state.GetDocComment() + suppressed = (doc_comment and doc_comment.HasFlag('suppress') and + doc_comment.GetFlag('suppress').type == 'underscore') + if not suppressed: + # Look for static members defined on a provided namespace. + namespace = namespaces_info.GetClosurizedNamespace(identifier) + provided_namespaces = namespaces_info.GetProvidedNamespaces() + + # Skip cases of this.something_.somethingElse_. + regex = re.compile('^this\.[a-zA-Z_]+$') + if namespace in provided_namespaces or regex.match(identifier): + variable = identifier.split('.')[-1] + self._declared_private_member_tokens[variable] = token + self._declared_private_members.add(variable) + elif not identifier.endswith('__'): + # Consider setting public members of private members to be a usage. + for piece in identifier.split('.'): + if piece.endswith('_'): + self._used_private_members.add(piece) + + # Find all usages of private members. + if token.type == Type.IDENTIFIER: + for piece in token.string.split('.'): + if piece.endswith('_'): + self._used_private_members.add(piece) + + if token.type == Type.DOC_FLAG: + flag = token.attached_object + + if flag.flag_type == 'param' and flag.name_token is not None: + self._CheckForMissingSpaceBeforeToken( + token.attached_object.name_token) + + if (error_check.ShouldCheck(Rule.OPTIONAL_TYPE_MARKER) and + flag.type is not None and flag.name is not None): + # Check for optional marker in type. + if (flag.type.endswith('=') and + not flag.name.startswith('opt_')): + self._HandleError(errors.JSDOC_MISSING_OPTIONAL_PREFIX, + 'Optional parameter name %s must be prefixed ' + 'with opt_.' % flag.name, + token) + elif (not flag.type.endswith('=') and + flag.name.startswith('opt_')): + self._HandleError(errors.JSDOC_MISSING_OPTIONAL_TYPE, + 'Optional parameter %s type must end with =.' % + flag.name, + token) + + if flag.flag_type in state.GetDocFlag().HAS_TYPE: + # Check for both missing type token and empty type braces '{}' + # Missing suppress types are reported separately and we allow enums + # without types. + if (flag.flag_type not in ('suppress', 'enum') and + (not flag.type or flag.type.isspace())): + self._HandleError(errors.MISSING_JSDOC_TAG_TYPE, + 'Missing type in %s tag' % token.string, token) + + elif flag.name_token and flag.type_end_token and tokenutil.Compare( + flag.type_end_token, flag.name_token) > 0: + self._HandleError( + errors.OUT_OF_ORDER_JSDOC_TAG_TYPE, + 'Type should be immediately after %s tag' % token.string, + token) + + elif token.type == Type.DOUBLE_QUOTE_STRING_START: + next_token = token.next + while next_token.type == Type.STRING_TEXT: + if javascripttokenizer.JavaScriptTokenizer.SINGLE_QUOTE.search( + next_token.string): + break + next_token = next_token.next + else: + self._HandleError( + errors.UNNECESSARY_DOUBLE_QUOTED_STRING, + 'Single-quoted string preferred over double-quoted string.', + token, + Position.All(token.string)) + + elif token.type == Type.END_DOC_COMMENT: + doc_comment = state.GetDocComment() + + # When @externs appears in a @fileoverview comment, it should trigger + # the same limited doc checks as a special filename like externs.js. + if doc_comment.HasFlag('fileoverview') and doc_comment.HasFlag('externs'): + self._SetLimitedDocChecks(True) + + if (error_check.ShouldCheck(Rule.BLANK_LINES_AT_TOP_LEVEL) and + not self._is_html and state.InTopLevel() and not state.InBlock()): + + # Check if we're in a fileoverview or constructor JsDoc. + is_constructor = ( + doc_comment.HasFlag('constructor') or + doc_comment.HasFlag('interface')) + is_file_overview = doc_comment.HasFlag('fileoverview') + + # If the comment is not a file overview, and it does not immediately + # precede some code, skip it. + # NOTE: The tokenutil methods are not used here because of their + # behavior at the top of a file. + next_token = token.next + if (not next_token or + (not is_file_overview and next_token.type in Type.NON_CODE_TYPES)): + return + + # Don't require extra blank lines around suppression of extra + # goog.require errors. + if (doc_comment.SuppressionOnly() and + next_token.type == Type.IDENTIFIER and + next_token.string in ['goog.provide', 'goog.require']): + return + + # Find the start of this block (include comments above the block, unless + # this is a file overview). + block_start = doc_comment.start_token + if not is_file_overview: + token = block_start.previous + while token and token.type in Type.COMMENT_TYPES: + block_start = token + token = token.previous + + # Count the number of blank lines before this block. + blank_lines = 0 + token = block_start.previous + while token and token.type in [Type.WHITESPACE, Type.BLANK_LINE]: + if token.type == Type.BLANK_LINE: + # A blank line. + blank_lines += 1 + elif token.type == Type.WHITESPACE and not token.line.strip(): + # A line with only whitespace on it. + blank_lines += 1 + token = token.previous + + # Log errors. + error_message = False + expected_blank_lines = 0 + + if is_file_overview and blank_lines == 0: + error_message = 'Should have a blank line before a file overview.' + expected_blank_lines = 1 + elif is_constructor and blank_lines != 3: + error_message = ( + 'Should have 3 blank lines before a constructor/interface.') + expected_blank_lines = 3 + elif not is_file_overview and not is_constructor and blank_lines != 2: + error_message = 'Should have 2 blank lines between top-level blocks.' + expected_blank_lines = 2 + + if error_message: + self._HandleError( + errors.WRONG_BLANK_LINE_COUNT, error_message, + block_start, Position.AtBeginning(), + expected_blank_lines - blank_lines) + + elif token.type == Type.END_BLOCK: + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + + function = state.GetFunction() + if not self._limited_doc_checks: + if (function.has_return and function.doc and + not is_immediately_called and + not function.doc.HasFlag('return') and + not function.doc.InheritsDocumentation() and + not function.doc.HasFlag('constructor')): + # Check for proper documentation of return value. + self._HandleError( + errors.MISSING_RETURN_DOCUMENTATION, + 'Missing @return JsDoc in function with non-trivial return', + function.doc.end_token, Position.AtBeginning()) + elif (not function.has_return and + not function.has_throw and + function.doc and + function.doc.HasFlag('return') and + not state.InInterfaceMethod()): + return_flag = function.doc.GetFlag('return') + if (return_flag.type is None or ( + 'undefined' not in return_flag.type and + 'void' not in return_flag.type and + '*' not in return_flag.type)): + self._HandleError( + errors.UNNECESSARY_RETURN_DOCUMENTATION, + 'Found @return JsDoc on function that returns nothing', + return_flag.flag_token, Position.AtBeginning()) + + if state.InFunction() and state.IsFunctionClose(): + is_immediately_called = (token.next and + token.next.type == Type.START_PAREN) + if (function.has_this and function.doc and + not function.doc.HasFlag('this') and + not function.is_constructor and + not function.is_interface and + '.prototype.' not in function.name): + self._HandleError( + errors.MISSING_JSDOC_TAG_THIS, + 'Missing @this JsDoc in function referencing "this". (' + 'this usually means you are trying to reference "this" in ' + 'a static function, or you have forgotten to mark a ' + 'constructor with @constructor)', + function.doc.end_token, Position.AtBeginning()) + + elif token.type == Type.IDENTIFIER: + if token.string == 'goog.inherits' and not state.InFunction(): + if state.GetLastNonSpaceToken().line_number == token.line_number: + self._HandleError( + errors.MISSING_LINE, + 'Missing newline between constructor and goog.inherits', + token, + Position.AtBeginning()) + + extra_space = state.GetLastNonSpaceToken().next + while extra_space != token: + if extra_space.type == Type.BLANK_LINE: + self._HandleError( + errors.EXTRA_LINE, + 'Extra line between constructor and goog.inherits', + extra_space) + extra_space = extra_space.next + + # TODO(robbyw): Test the last function was a constructor. + # TODO(robbyw): Test correct @extends and @implements documentation. + + elif (token.string == 'goog.provide' and + not state.InFunction() and + namespaces_info is not None): + namespace = tokenutil.Search(token, Type.STRING_TEXT).string + + # Report extra goog.provide statement. + if namespaces_info.IsExtraProvide(token): + self._HandleError( + errors.EXTRA_GOOG_PROVIDE, + 'Unnecessary goog.provide: ' + namespace, + token, position=Position.AtBeginning()) + + if namespaces_info.IsLastProvide(token): + # Report missing provide statements after the last existing provide. + missing_provides = namespaces_info.GetMissingProvides() + if missing_provides: + self._ReportMissingProvides( + missing_provides, + tokenutil.GetLastTokenInSameLine(token).next, + False) + + # If there are no require statements, missing requires should be + # reported after the last provide. + if not namespaces_info.GetRequiredNamespaces(): + missing_requires = namespaces_info.GetMissingRequires() + if missing_requires: + self._ReportMissingRequires( + missing_requires, + tokenutil.GetLastTokenInSameLine(token).next, + True) + + elif (token.string == 'goog.require' and + not state.InFunction() and + namespaces_info is not None): + namespace = tokenutil.Search(token, Type.STRING_TEXT).string + + # If there are no provide statements, missing provides should be + # reported before the first require. + if (namespaces_info.IsFirstRequire(token) and + not namespaces_info.GetProvidedNamespaces()): + missing_provides = namespaces_info.GetMissingProvides() + if missing_provides: + self._ReportMissingProvides( + missing_provides, + tokenutil.GetFirstTokenInSameLine(token), + True) + + # Report extra goog.require statement. + if namespaces_info.IsExtraRequire(token): + self._HandleError( + errors.EXTRA_GOOG_REQUIRE, + 'Unnecessary goog.require: ' + namespace, + token, position=Position.AtBeginning()) + + # Report missing goog.require statements. + if namespaces_info.IsLastRequire(token): + missing_requires = namespaces_info.GetMissingRequires() + if missing_requires: + self._ReportMissingRequires( + missing_requires, + tokenutil.GetLastTokenInSameLine(token).next, + False) + + elif token.type == Type.OPERATOR: + last_in_line = token.IsLastInLine() + # If the token is unary and appears to be used in a unary context + # it's ok. Otherwise, if it's at the end of the line or immediately + # before a comment, it's ok. + # Don't report an error before a start bracket - it will be reported + # by that token's space checks. + if (not token.metadata.IsUnaryOperator() and not last_in_line + and not token.next.IsComment() + and not token.next.IsOperator(',') + and not token.next.type in (Type.WHITESPACE, Type.END_PAREN, + Type.END_BRACKET, Type.SEMICOLON, + Type.START_BRACKET)): + self._HandleError( + errors.MISSING_SPACE, + 'Missing space after "%s"' % token.string, + token, + Position.AtEnd(token.string)) + elif token.type == Type.WHITESPACE: + first_in_line = token.IsFirstInLine() + last_in_line = token.IsLastInLine() + # Check whitespace length if it's not the first token of the line and + # if it's not immediately before a comment. + if not last_in_line and not first_in_line and not token.next.IsComment(): + # Ensure there is no space after opening parentheses. + if (token.previous.type in (Type.START_PAREN, Type.START_BRACKET, + Type.FUNCTION_NAME) + or token.next.type == Type.START_PARAMETERS): + self._HandleError( + errors.EXTRA_SPACE, + 'Extra space after "%s"' % token.previous.string, + token, + Position.All(token.string)) + + def _ReportMissingProvides(self, missing_provides, token, need_blank_line): + """Reports missing provide statements to the error handler. + + Args: + missing_provides: A list of strings where each string is a namespace that + should be provided, but is not. + token: The token where the error was detected (also where the new provides + will be inserted. + need_blank_line: Whether a blank line needs to be inserted after the new + provides are inserted. May be True, False, or None, where None + indicates that the insert location is unknown. + """ + self._HandleError( + errors.MISSING_GOOG_PROVIDE, + 'Missing the following goog.provide statements:\n' + + '\n'.join(map(lambda x: 'goog.provide(\'%s\');' % x, + sorted(missing_provides))), + token, position=Position.AtBeginning(), + fix_data=(missing_provides, need_blank_line)) + + def _ReportMissingRequires(self, missing_requires, token, need_blank_line): + """Reports missing require statements to the error handler. + + Args: + missing_requires: A list of strings where each string is a namespace that + should be required, but is not. + token: The token where the error was detected (also where the new requires + will be inserted. + need_blank_line: Whether a blank line needs to be inserted before the new + requires are inserted. May be True, False, or None, where None + indicates that the insert location is unknown. + """ + self._HandleError( + errors.MISSING_GOOG_REQUIRE, + 'Missing the following goog.require statements:\n' + + '\n'.join(map(lambda x: 'goog.require(\'%s\');' % x, + sorted(missing_requires))), + token, position=Position.AtBeginning(), + fix_data=(missing_requires, need_blank_line)) + + def Finalize(self, state, tokenizer_mode): + """Perform all checks that need to occur after all lines are processed.""" + # Call the base class's Finalize function. + super(JavaScriptLintRules, self).Finalize(state, tokenizer_mode) + + if error_check.ShouldCheck(Rule.UNUSED_PRIVATE_MEMBERS): + # Report an error for any declared private member that was never used. + unused_private_members = (self._declared_private_members - + self._used_private_members) + + for variable in unused_private_members: + token = self._declared_private_member_tokens[variable] + self._HandleError(errors.UNUSED_PRIVATE_MEMBER, + 'Unused private member: %s.' % token.string, + token) + + # Clear state to prepare for the next file. + self._declared_private_member_tokens = {} + self._declared_private_members = Set() + self._used_private_members = Set() + + namespaces_info = self._namespaces_info + if namespaces_info is not None: + # If there are no provide or require statements, missing provides and + # requires should be reported on line 1. + if (not namespaces_info.GetProvidedNamespaces() and + not namespaces_info.GetRequiredNamespaces()): + missing_provides = namespaces_info.GetMissingProvides() + if missing_provides: + self._ReportMissingProvides( + missing_provides, state.GetFirstToken(), None) + + missing_requires = namespaces_info.GetMissingRequires() + if missing_requires: + self._ReportMissingRequires( + missing_requires, state.GetFirstToken(), None) + + self._CheckSortedRequiresProvides(state.GetFirstToken()) + + def _CheckSortedRequiresProvides(self, token): + """Checks that all goog.require and goog.provide statements are sorted. + + Note that this method needs to be run after missing statements are added to + preserve alphabetical order. + + Args: + token: The first token in the token stream. + """ + sorter = requireprovidesorter.RequireProvideSorter() + provides_result = sorter.CheckProvides(token) + if provides_result: + self._HandleError( + errors.GOOG_PROVIDES_NOT_ALPHABETIZED, + 'goog.provide classes must be alphabetized. The correct code is:\n' + + '\n'.join( + map(lambda x: 'goog.provide(\'%s\');' % x, provides_result[1])), + provides_result[0], + position=Position.AtBeginning(), + fix_data=provides_result[0]) + + requires_result = sorter.CheckRequires(token) + if requires_result: + self._HandleError( + errors.GOOG_REQUIRES_NOT_ALPHABETIZED, + 'goog.require classes must be alphabetized. The correct code is:\n' + + '\n'.join( + map(lambda x: 'goog.require(\'%s\');' % x, requires_result[1])), + requires_result[0], + position=Position.AtBeginning(), + fix_data=requires_result[0]) + + def GetLongLineExceptions(self): + """Gets a list of regexps for lines which can be longer than the limit.""" + return [ + re.compile('goog\.require\(.+\);?\s*$'), + re.compile('goog\.provide\(.+\);?\s*$') + ] diff --git a/third_party/closure_linter/closure_linter/javascriptstatetracker.py b/third_party/closure_linter/closure_linter/javascriptstatetracker.py new file mode 100755 index 0000000..2ce5c02 --- /dev/null +++ b/third_party/closure_linter/closure_linter/javascriptstatetracker.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parser for JavaScript files.""" + + + +from closure_linter import javascripttokens +from closure_linter import statetracker +from closure_linter import tokenutil + +# Shorthand +Type = javascripttokens.JavaScriptTokenType + + +class JsDocFlag(statetracker.DocFlag): + """Javascript doc flag object. + + Attribute: + flag_type: param, return, define, type, etc. + flag_token: The flag token. + type_start_token: The first token specifying the flag JS type, + including braces. + type_end_token: The last token specifying the flag JS type, + including braces. + type: The JavaScript type spec. + name_token: The token specifying the flag name. + name: The flag name + description_start_token: The first token in the description. + description_end_token: The end token in the description. + description: The description. + """ + + # Please keep these lists alphabetized. + + # Some projects use the following extensions to JsDoc. + # TODO(robbyw): determine which of these, if any, should be illegal. + EXTENDED_DOC = frozenset([ + 'class', 'code', 'desc', 'final', 'hidden', 'inheritDoc', 'link', + 'meaning', 'protected', 'notypecheck', 'throws']) + + LEGAL_DOC = EXTENDED_DOC | statetracker.DocFlag.LEGAL_DOC + + def __init__(self, flag_token): + """Creates the JsDocFlag object and attaches it to the given start token. + + Args: + flag_token: The starting token of the flag. + """ + statetracker.DocFlag.__init__(self, flag_token) + + +class JavaScriptStateTracker(statetracker.StateTracker): + """JavaScript state tracker. + + Inherits from the core EcmaScript StateTracker adding extra state tracking + functionality needed for JavaScript. + """ + + def __init__(self): + """Initializes a JavaScript token stream state tracker.""" + statetracker.StateTracker.__init__(self, JsDocFlag) + + def InTopLevel(self): + """Compute whether we are at the top level in the class. + + This function call is language specific. In some languages like + JavaScript, a function is top level if it is not inside any parenthesis. + In languages such as ActionScript, a function is top level if it is directly + within a class. + + Returns: + Whether we are at the top level in the class. + """ + return not self.InParentheses() + + def GetBlockType(self, token): + """Determine the block type given a START_BLOCK token. + + Code blocks come after parameters, keywords like else, and closing parens. + + Args: + token: The current token. Can be assumed to be type START_BLOCK + Returns: + Code block type for current token. + """ + last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None, + True) + if last_code.type in (Type.END_PARAMETERS, Type.END_PAREN, + Type.KEYWORD) and not last_code.IsKeyword('return'): + return self.CODE + else: + return self.OBJECT_LITERAL + + def HandleToken(self, token, last_non_space_token): + """Handles the given token and updates state. + + Args: + token: The token to handle. + last_non_space_token: + """ + super(JavaScriptStateTracker, self).HandleToken(token, + last_non_space_token) diff --git a/third_party/closure_linter/closure_linter/javascripttokenizer.py b/third_party/closure_linter/closure_linter/javascripttokenizer.py new file mode 100755 index 0000000..98f9184 --- /dev/null +++ b/third_party/closure_linter/closure_linter/javascripttokenizer.py @@ -0,0 +1,363 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Regular expression based JavaScript parsing classes.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import copy +import re + +from closure_linter import javascripttokens +from closure_linter.common import matcher +from closure_linter.common import tokenizer + +# Shorthand +Type = javascripttokens.JavaScriptTokenType +Matcher = matcher.Matcher + + +class JavaScriptModes(object): + """Enumeration of the different matcher modes used for JavaScript.""" + TEXT_MODE = 'text' + SINGLE_QUOTE_STRING_MODE = 'single_quote_string' + DOUBLE_QUOTE_STRING_MODE = 'double_quote_string' + BLOCK_COMMENT_MODE = 'block_comment' + DOC_COMMENT_MODE = 'doc_comment' + DOC_COMMENT_LEX_SPACES_MODE = 'doc_comment_spaces' + LINE_COMMENT_MODE = 'line_comment' + PARAMETER_MODE = 'parameter' + FUNCTION_MODE = 'function' + + +class JavaScriptTokenizer(tokenizer.Tokenizer): + """JavaScript tokenizer. + + Convert JavaScript code in to an array of tokens. + """ + + # Useful patterns for JavaScript parsing. + IDENTIFIER_CHAR = r'A-Za-z0-9_$.' + + # Number patterns based on: + # http://www.mozilla.org/js/language/js20-2000-07/formal/lexer-grammar.html + MANTISSA = r""" + (\d+(?!\.)) | # Matches '10' + (\d+\.(?!\d)) | # Matches '10.' + (\d*\.\d+) # Matches '.5' or '10.5' + """ + DECIMAL_LITERAL = r'(%s)([eE][-+]?\d+)?' % MANTISSA + HEX_LITERAL = r'0[xX][0-9a-fA-F]+' + NUMBER = re.compile(r""" + ((%s)|(%s)) + """ % (HEX_LITERAL, DECIMAL_LITERAL), re.VERBOSE) + + # Strings come in three parts - first we match the start of the string, then + # the contents, then the end. The contents consist of any character except a + # backslash or end of string, or a backslash followed by any character, or a + # backslash followed by end of line to support correct parsing of multi-line + # strings. + SINGLE_QUOTE = re.compile(r"'") + SINGLE_QUOTE_TEXT = re.compile(r"([^'\\]|\\(.|$))+") + DOUBLE_QUOTE = re.compile(r'"') + DOUBLE_QUOTE_TEXT = re.compile(r'([^"\\]|\\(.|$))+') + + START_SINGLE_LINE_COMMENT = re.compile(r'//') + END_OF_LINE_SINGLE_LINE_COMMENT = re.compile(r'//$') + + START_DOC_COMMENT = re.compile(r'/\*\*') + START_BLOCK_COMMENT = re.compile(r'/\*') + END_BLOCK_COMMENT = re.compile(r'\*/') + BLOCK_COMMENT_TEXT = re.compile(r'([^*]|\*(?!/))+') + + # Comment text is anything that we are not going to parse into another special + # token like (inline) flags or end comments. Complicated regex to match + # most normal characters, and '*', '{', '}', and '@' when we are sure that + # it is safe. Expression [^*{\s]@ must come first, or the other options will + # match everything before @, and we won't match @'s that aren't part of flags + # like in email addresses in the @author tag. + DOC_COMMENT_TEXT = re.compile(r'([^*{}\s]@|[^*{}@]|\*(?!/))+') + DOC_COMMENT_NO_SPACES_TEXT = re.compile(r'([^*{}\s]@|[^*{}@\s]|\*(?!/))+') + + # Match the prefix ' * ' that starts every line of jsdoc. Want to include + # spaces after the '*', but nothing else that occurs after a '*', and don't + # want to match the '*' in '*/'. + DOC_PREFIX = re.compile(r'\s*\*(\s+|(?!/))') + + START_BLOCK = re.compile('{') + END_BLOCK = re.compile('}') + + REGEX_CHARACTER_CLASS = r""" + \[ # Opening bracket + ([^\]\\]|\\.)* # Anything but a ] or \, + # or a backslash followed by anything + \] # Closing bracket + """ + # We ensure the regex is followed by one of the above tokens to avoid + # incorrectly parsing something like x / y / z as x REGEX(/ y /) z + POST_REGEX_LIST = [ + ';', ',', r'\.', r'\)', r'\]', '$', r'\/\/', r'\/\*', ':', '}'] + + REGEX = re.compile(r""" + / # opening slash + (?!\*) # not the start of a comment + (\\.|[^\[\/\\]|(%s))* # a backslash followed by anything, + # or anything but a / or [ or \, + # or a character class + / # closing slash + [gimsx]* # optional modifiers + (?=\s*(%s)) + """ % (REGEX_CHARACTER_CLASS, '|'.join(POST_REGEX_LIST)), + re.VERBOSE) + + ANYTHING = re.compile(r'.*') + PARAMETERS = re.compile(r'[^\)]+') + CLOSING_PAREN_WITH_SPACE = re.compile(r'\)\s*') + + FUNCTION_DECLARATION = re.compile(r'\bfunction\b') + + OPENING_PAREN = re.compile(r'\(') + CLOSING_PAREN = re.compile(r'\)') + + OPENING_BRACKET = re.compile(r'\[') + CLOSING_BRACKET = re.compile(r'\]') + + # We omit these JS keywords from the list: + # function - covered by FUNCTION_DECLARATION. + # delete, in, instanceof, new, typeof - included as operators. + # this - included in identifiers. + # null, undefined - not included, should go in some "special constant" list. + KEYWORD_LIST = ['break', 'case', 'catch', 'continue', 'default', 'do', 'else', + 'finally', 'for', 'if', 'return', 'switch', 'throw', 'try', 'var', + 'while', 'with'] + # Match a keyword string followed by a non-identifier character in order to + # not match something like doSomething as do + Something. + KEYWORD = re.compile('(%s)((?=[^%s])|$)' % ( + '|'.join(KEYWORD_LIST), IDENTIFIER_CHAR)) + + # List of regular expressions to match as operators. Some notes: for our + # purposes, the comma behaves similarly enough to a normal operator that we + # include it here. r'\bin\b' actually matches 'in' surrounded by boundary + # characters - this may not match some very esoteric uses of the in operator. + # Operators that are subsets of larger operators must come later in this list + # for proper matching, e.g., '>>' must come AFTER '>>>'. + OPERATOR_LIST = [',', r'\+\+', '===', '!==', '>>>=', '>>>', '==', '>=', '<=', + '!=', '<<=', '>>=', '<<', '>>', '>', '<', r'\+=', r'\+', + '--', '\^=', '-=', '-', '/=', '/', r'\*=', r'\*', '%=', '%', + '&&', r'\|\|', '&=', '&', r'\|=', r'\|', '=', '!', ':', '\?', + r'\bdelete\b', r'\bin\b', r'\binstanceof\b', r'\bnew\b', + r'\btypeof\b', r'\bvoid\b'] + OPERATOR = re.compile('|'.join(OPERATOR_LIST)) + + WHITESPACE = re.compile(r'\s+') + SEMICOLON = re.compile(r';') + # Technically JavaScript identifiers can't contain '.', but we treat a set of + # nested identifiers as a single identifier. + NESTED_IDENTIFIER = r'[a-zA-Z_$][%s.]*' % IDENTIFIER_CHAR + IDENTIFIER = re.compile(NESTED_IDENTIFIER) + + SIMPLE_LVALUE = re.compile(r""" + (?P<identifier>%s) # a valid identifier + (?=\s* # optional whitespace + \= # look ahead to equal sign + (?!=)) # not follwed by equal + """ % NESTED_IDENTIFIER, re.VERBOSE) + + # A doc flag is a @ sign followed by non-space characters that appears at the + # beginning of the line, after whitespace, or after a '{'. The look-behind + # check is necessary to not match someone@google.com as a flag. + DOC_FLAG = re.compile(r'(^|(?<=\s))@(?P<name>[a-zA-Z]+)') + # To properly parse parameter names, we need to tokenize whitespace into a + # token. + DOC_FLAG_LEX_SPACES = re.compile(r'(^|(?<=\s))@(?P<name>%s)\b' % + '|'.join(['param'])) + + DOC_INLINE_FLAG = re.compile(r'(?<={)@(?P<name>[a-zA-Z]+)') + + # Star followed by non-slash, i.e a star that does not end a comment. + # This is used for TYPE_GROUP below. + SAFE_STAR = r'(\*(?!/))' + + COMMON_DOC_MATCHERS = [ + # Find the end of the comment. + Matcher(END_BLOCK_COMMENT, Type.END_DOC_COMMENT, + JavaScriptModes.TEXT_MODE), + + # Tokenize documented flags like @private. + Matcher(DOC_INLINE_FLAG, Type.DOC_INLINE_FLAG), + Matcher(DOC_FLAG_LEX_SPACES, Type.DOC_FLAG, + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE), + + # Encountering a doc flag should leave lex spaces mode. + Matcher(DOC_FLAG, Type.DOC_FLAG, JavaScriptModes.DOC_COMMENT_MODE), + + # Tokenize braces so we can find types. + Matcher(START_BLOCK, Type.DOC_START_BRACE), + Matcher(END_BLOCK, Type.DOC_END_BRACE), + Matcher(DOC_PREFIX, Type.DOC_PREFIX, None, True)] + + + # The token matcher groups work as follows: it is an list of Matcher objects. + # The matchers will be tried in this order, and the first to match will be + # returned. Hence the order is important because the matchers that come first + # overrule the matchers that come later. + JAVASCRIPT_MATCHERS = { + # Matchers for basic text mode. + JavaScriptModes.TEXT_MODE: [ + # Check a big group - strings, starting comments, and regexes - all + # of which could be intertwined. 'string with /regex/', + # /regex with 'string'/, /* comment with /regex/ and string */ (and so + # on) + Matcher(START_DOC_COMMENT, Type.START_DOC_COMMENT, + JavaScriptModes.DOC_COMMENT_MODE), + Matcher(START_BLOCK_COMMENT, Type.START_BLOCK_COMMENT, + JavaScriptModes.BLOCK_COMMENT_MODE), + Matcher(END_OF_LINE_SINGLE_LINE_COMMENT, + Type.START_SINGLE_LINE_COMMENT), + Matcher(START_SINGLE_LINE_COMMENT, Type.START_SINGLE_LINE_COMMENT, + JavaScriptModes.LINE_COMMENT_MODE), + Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_START, + JavaScriptModes.SINGLE_QUOTE_STRING_MODE), + Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_START, + JavaScriptModes.DOUBLE_QUOTE_STRING_MODE), + Matcher(REGEX, Type.REGEX), + + # Next we check for start blocks appearing outside any of the items + # above. + Matcher(START_BLOCK, Type.START_BLOCK), + Matcher(END_BLOCK, Type.END_BLOCK), + + # Then we search for function declarations. + Matcher(FUNCTION_DECLARATION, Type.FUNCTION_DECLARATION, + JavaScriptModes.FUNCTION_MODE), + + # Next, we convert non-function related parens to tokens. + Matcher(OPENING_PAREN, Type.START_PAREN), + Matcher(CLOSING_PAREN, Type.END_PAREN), + + # Next, we convert brackets to tokens. + Matcher(OPENING_BRACKET, Type.START_BRACKET), + Matcher(CLOSING_BRACKET, Type.END_BRACKET), + + # Find numbers. This has to happen before operators because scientific + # notation numbers can have + and - in them. + Matcher(NUMBER, Type.NUMBER), + + # Find operators and simple assignments + Matcher(SIMPLE_LVALUE, Type.SIMPLE_LVALUE), + Matcher(OPERATOR, Type.OPERATOR), + + # Find key words and whitespace. + Matcher(KEYWORD, Type.KEYWORD), + Matcher(WHITESPACE, Type.WHITESPACE), + + # Find identifiers. + Matcher(IDENTIFIER, Type.IDENTIFIER), + + # Finally, we convert semicolons to tokens. + Matcher(SEMICOLON, Type.SEMICOLON)], + + # Matchers for single quote strings. + JavaScriptModes.SINGLE_QUOTE_STRING_MODE: [ + Matcher(SINGLE_QUOTE_TEXT, Type.STRING_TEXT), + Matcher(SINGLE_QUOTE, Type.SINGLE_QUOTE_STRING_END, + JavaScriptModes.TEXT_MODE)], + + # Matchers for double quote strings. + JavaScriptModes.DOUBLE_QUOTE_STRING_MODE: [ + Matcher(DOUBLE_QUOTE_TEXT, Type.STRING_TEXT), + Matcher(DOUBLE_QUOTE, Type.DOUBLE_QUOTE_STRING_END, + JavaScriptModes.TEXT_MODE)], + + # Matchers for block comments. + JavaScriptModes.BLOCK_COMMENT_MODE: [ + # First we check for exiting a block comment. + Matcher(END_BLOCK_COMMENT, Type.END_BLOCK_COMMENT, + JavaScriptModes.TEXT_MODE), + + # Match non-comment-ending text.. + Matcher(BLOCK_COMMENT_TEXT, Type.COMMENT)], + + # Matchers for doc comments. + JavaScriptModes.DOC_COMMENT_MODE: COMMON_DOC_MATCHERS + [ + Matcher(DOC_COMMENT_TEXT, Type.COMMENT)], + + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: COMMON_DOC_MATCHERS + [ + Matcher(WHITESPACE, Type.COMMENT), + Matcher(DOC_COMMENT_NO_SPACES_TEXT, Type.COMMENT)], + + # Matchers for single line comments. + JavaScriptModes.LINE_COMMENT_MODE: [ + # We greedy match until the end of the line in line comment mode. + Matcher(ANYTHING, Type.COMMENT, JavaScriptModes.TEXT_MODE)], + + # Matchers for code after the function keyword. + JavaScriptModes.FUNCTION_MODE: [ + # Must match open paren before anything else and move into parameter + # mode, otherwise everything inside the parameter list is parsed + # incorrectly. + Matcher(OPENING_PAREN, Type.START_PARAMETERS, + JavaScriptModes.PARAMETER_MODE), + Matcher(WHITESPACE, Type.WHITESPACE), + Matcher(IDENTIFIER, Type.FUNCTION_NAME)], + + # Matchers for function parameters + JavaScriptModes.PARAMETER_MODE: [ + # When in function parameter mode, a closing paren is treated specially. + # Everything else is treated as lines of parameters. + Matcher(CLOSING_PAREN_WITH_SPACE, Type.END_PARAMETERS, + JavaScriptModes.TEXT_MODE), + Matcher(PARAMETERS, Type.PARAMETERS, JavaScriptModes.PARAMETER_MODE)]} + + # When text is not matched, it is given this default type based on mode. + # If unspecified in this map, the default default is Type.NORMAL. + JAVASCRIPT_DEFAULT_TYPES = { + JavaScriptModes.DOC_COMMENT_MODE: Type.COMMENT, + JavaScriptModes.DOC_COMMENT_LEX_SPACES_MODE: Type.COMMENT + } + + def __init__(self, parse_js_doc = True): + """Create a tokenizer object. + + Args: + parse_js_doc: Whether to do detailed parsing of javascript doc comments, + or simply treat them as normal comments. Defaults to parsing JsDoc. + """ + matchers = self.JAVASCRIPT_MATCHERS + if not parse_js_doc: + # Make a copy so the original doesn't get modified. + matchers = copy.deepcopy(matchers) + matchers[JavaScriptModes.DOC_COMMENT_MODE] = matchers[ + JavaScriptModes.BLOCK_COMMENT_MODE] + + tokenizer.Tokenizer.__init__(self, JavaScriptModes.TEXT_MODE, matchers, + self.JAVASCRIPT_DEFAULT_TYPES) + + def _CreateToken(self, string, token_type, line, line_number, values=None): + """Creates a new JavaScriptToken object. + + Args: + string: The string of input the token contains. + token_type: The type of token. + line: The text of the line this token is in. + line_number: The line number of the token. + values: A dict of named values within the token. For instance, a + function declaration may have a value called 'name' which captures the + name of the function. + """ + return javascripttokens.JavaScriptToken(string, token_type, line, + line_number, values) diff --git a/third_party/closure_linter/closure_linter/javascripttokens.py b/third_party/closure_linter/closure_linter/javascripttokens.py new file mode 100755 index 0000000..f46d4e1 --- /dev/null +++ b/third_party/closure_linter/closure_linter/javascripttokens.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# +# Copyright 2008 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes to represent JavaScript tokens.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +from closure_linter.common import tokens + +class JavaScriptTokenType(tokens.TokenType): + """Enumeration of JavaScript token types, and useful sets of token types.""" + NUMBER = 'number' + START_SINGLE_LINE_COMMENT = '//' + START_BLOCK_COMMENT = '/*' + START_DOC_COMMENT = '/**' + END_BLOCK_COMMENT = '*/' + END_DOC_COMMENT = 'doc */' + COMMENT = 'comment' + SINGLE_QUOTE_STRING_START = "'string" + SINGLE_QUOTE_STRING_END = "string'" + DOUBLE_QUOTE_STRING_START = '"string' + DOUBLE_QUOTE_STRING_END = 'string"' + STRING_TEXT = 'string' + START_BLOCK = '{' + END_BLOCK = '}' + START_PAREN = '(' + END_PAREN = ')' + START_BRACKET = '[' + END_BRACKET = ']' + REGEX = '/regex/' + FUNCTION_DECLARATION = 'function(...)' + FUNCTION_NAME = 'function functionName(...)' + START_PARAMETERS = 'startparams(' + PARAMETERS = 'pa,ra,ms' + END_PARAMETERS = ')endparams' + SEMICOLON = ';' + DOC_FLAG = '@flag' + DOC_INLINE_FLAG = '{@flag ...}' + DOC_START_BRACE = 'doc {' + DOC_END_BRACE = 'doc }' + DOC_PREFIX = 'comment prefix: * ' + SIMPLE_LVALUE = 'lvalue=' + KEYWORD = 'keyword' + OPERATOR = 'operator' + IDENTIFIER = 'identifier' + + STRING_TYPES = frozenset([ + SINGLE_QUOTE_STRING_START, SINGLE_QUOTE_STRING_END, + DOUBLE_QUOTE_STRING_START, DOUBLE_QUOTE_STRING_END, STRING_TEXT]) + + COMMENT_TYPES = frozenset([START_SINGLE_LINE_COMMENT, COMMENT, + START_BLOCK_COMMENT, START_DOC_COMMENT, + END_BLOCK_COMMENT, END_DOC_COMMENT, + DOC_START_BRACE, DOC_END_BRACE, + DOC_FLAG, DOC_INLINE_FLAG, DOC_PREFIX]) + + FLAG_DESCRIPTION_TYPES = frozenset([ + DOC_INLINE_FLAG, COMMENT, DOC_START_BRACE, DOC_END_BRACE]) + + FLAG_ENDING_TYPES = frozenset([DOC_FLAG, END_DOC_COMMENT]) + + NON_CODE_TYPES = COMMENT_TYPES | frozenset([ + tokens.TokenType.WHITESPACE, tokens.TokenType.BLANK_LINE]) + + UNARY_OPERATORS = ['!', 'new', 'delete', 'typeof', 'void'] + + UNARY_OK_OPERATORS = ['--', '++', '-', '+'] + UNARY_OPERATORS + + UNARY_POST_OPERATORS = ['--', '++'] + + # An expression ender is any token that can end an object - i.e. we could have + # x.y or [1, 2], or (10 + 9) or {a: 10}. + EXPRESSION_ENDER_TYPES = [tokens.TokenType.NORMAL, IDENTIFIER, NUMBER, + SIMPLE_LVALUE, END_BRACKET, END_PAREN, END_BLOCK, + SINGLE_QUOTE_STRING_END, DOUBLE_QUOTE_STRING_END] + + +class JavaScriptToken(tokens.Token): + """JavaScript token subclass of Token, provides extra instance checks. + + The following token types have data in attached_object: + - All JsDoc flags: a parser.JsDocFlag object. + """ + + def IsKeyword(self, keyword): + """Tests if this token is the given keyword. + + Args: + keyword: The keyword to compare to. + + Returns: + True if this token is a keyword token with the given name. + """ + return self.type == JavaScriptTokenType.KEYWORD and self.string == keyword + + def IsOperator(self, operator): + """Tests if this token is the given operator. + + Args: + operator: The operator to compare to. + + Returns: + True if this token is a operator token with the given name. + """ + return self.type == JavaScriptTokenType.OPERATOR and self.string == operator + + def IsAssignment(self): + """Tests if this token is an assignment operator. + + Returns: + True if this token is an assignment operator. + """ + return (self.type == JavaScriptTokenType.OPERATOR and + self.string.endswith('=') and + self.string not in ('==', '!=', '>=', '<=', '===', '!==')) + + def IsComment(self): + """Tests if this token is any part of a comment. + + Returns: + True if this token is any part of a comment. + """ + return self.type in JavaScriptTokenType.COMMENT_TYPES + + def IsCode(self): + """Tests if this token is code, as opposed to a comment or whitespace.""" + return self.type not in JavaScriptTokenType.NON_CODE_TYPES + + def __repr__(self): + return '<JavaScriptToken: %d, %s, "%s", %r, %r>' % (self.line_number, + self.type, self.string, + self.values, + self.metadata) diff --git a/third_party/closure_linter/closure_linter/not_strict_test.py b/third_party/closure_linter/closure_linter/not_strict_test.py new file mode 100755 index 0000000..8df8efc --- /dev/null +++ b/third_party/closure_linter/closure_linter/not_strict_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# +# Copyright 2011 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for gjslint --nostrict. + +Tests errors that can be thrown by gjslint when not in strict mode. +""" + + + +import os +import sys +import unittest + +import gflags as flags +import unittest as googletest + +from closure_linter import checker +from closure_linter import errors +from closure_linter.common import filetestcase + +_RESOURCE_PREFIX = 'closure_linter/testdata' + +flags.FLAGS.strict = False +flags.FLAGS.custom_jsdoc_tags = ('customtag', 'requires') +flags.FLAGS.closurized_namespaces = ('goog', 'dummy') +flags.FLAGS.limited_doc_files = ('externs.js', 'dummy.js', + 'limited_doc_checks.js') + + +# List of files under testdata to test. +# We need to list files explicitly since pyglib can't list directories. +_TEST_FILES = [ + 'not_strict.js' + ] + + +class GJsLintTestSuite(unittest.TestSuite): + """Test suite to run a GJsLintTest for each of several files. + + If sys.argv[1:] is non-empty, it is interpreted as a list of filenames in + testdata to test. Otherwise, _TEST_FILES is used. + """ + + def __init__(self, tests=()): + unittest.TestSuite.__init__(self, tests) + + argv = sys.argv and sys.argv[1:] or [] + if argv: + test_files = argv + else: + test_files = _TEST_FILES + for test_file in test_files: + resource_path = os.path.join(_RESOURCE_PREFIX, test_file) + self.addTest(filetestcase.AnnotatedFileTestCase(resource_path, + checker.GJsLintRunner(), + errors.ByName)) + +if __name__ == '__main__': + # Don't let main parse args; it happens in the TestSuite. + googletest.main(argv=sys.argv[0:1], defaultTest='GJsLintTestSuite') diff --git a/third_party/closure_linter/closure_linter/requireprovidesorter.py b/third_party/closure_linter/closure_linter/requireprovidesorter.py new file mode 100755 index 0000000..6dda3ae --- /dev/null +++ b/third_party/closure_linter/closure_linter/requireprovidesorter.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# +# Copyright 2011 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains logic for sorting goog.provide and goog.require statements. + +Closurized JavaScript files use goog.provide and goog.require statements at the +top of the file to manage dependencies. These statements should be sorted +alphabetically, however, it is common for them to be accompanied by inline +comments or suppression annotations. In order to sort these statements without +disrupting their comments and annotations, the association between statements +and comments/annotations must be maintained while sorting. + + RequireProvideSorter: Handles checking/fixing of provide/require statements. +""" + + + +from closure_linter import javascripttokens +from closure_linter import tokenutil + +# Shorthand +Type = javascripttokens.JavaScriptTokenType + + +class RequireProvideSorter(object): + """Checks for and fixes alphabetization of provide and require statements. + + When alphabetizing, comments on the same line or comments directly above a + goog.provide or goog.require statement are associated with that statement and + stay with the statement as it gets sorted. + """ + + def CheckProvides(self, token): + """Checks alphabetization of goog.provide statements. + + Iterates over tokens in given token stream, identifies goog.provide tokens, + and checks that they occur in alphabetical order by the object being + provided. + + Args: + token: A token in the token stream before any goog.provide tokens. + + Returns: + A tuple containing the first provide token in the token stream and a list + of provided objects sorted alphabetically. For example: + + (JavaScriptToken, ['object.a', 'object.b', ...]) + + None is returned if all goog.provide statements are already sorted. + """ + provide_tokens = self._GetRequireOrProvideTokens(token, 'goog.provide') + provide_strings = self._GetRequireOrProvideTokenStrings(provide_tokens) + sorted_provide_strings = sorted(provide_strings) + if provide_strings != sorted_provide_strings: + return [provide_tokens[0], sorted_provide_strings] + return None + + def CheckRequires(self, token): + """Checks alphabetization of goog.require statements. + + Iterates over tokens in given token stream, identifies goog.require tokens, + and checks that they occur in alphabetical order by the dependency being + required. + + Args: + token: A token in the token stream before any goog.require tokens. + + Returns: + A tuple containing the first require token in the token stream and a list + of required dependencies sorted alphabetically. For example: + + (JavaScriptToken, ['object.a', 'object.b', ...]) + + None is returned if all goog.require statements are already sorted. + """ + require_tokens = self._GetRequireOrProvideTokens(token, 'goog.require') + require_strings = self._GetRequireOrProvideTokenStrings(require_tokens) + sorted_require_strings = sorted(require_strings) + if require_strings != sorted_require_strings: + return (require_tokens[0], sorted_require_strings) + return None + + def FixProvides(self, token): + """Sorts goog.provide statements in the given token stream alphabetically. + + Args: + token: The first token in the token stream. + """ + self._FixProvidesOrRequires( + self._GetRequireOrProvideTokens(token, 'goog.provide')) + + def FixRequires(self, token): + """Sorts goog.require statements in the given token stream alphabetically. + + Args: + token: The first token in the token stream. + """ + self._FixProvidesOrRequires( + self._GetRequireOrProvideTokens(token, 'goog.require')) + + def _FixProvidesOrRequires(self, tokens): + """Sorts goog.provide or goog.require statements. + + Args: + tokens: A list of goog.provide or goog.require tokens in the order they + appear in the token stream. i.e. the first token in this list must + be the first goog.provide or goog.require token. + """ + strings = self._GetRequireOrProvideTokenStrings(tokens) + sorted_strings = sorted(strings) + + # Make a separate pass to remove any blank lines between goog.require/ + # goog.provide tokens. + first_token = tokens[0] + last_token = tokens[-1] + i = last_token + while i != first_token: + if i.type is Type.BLANK_LINE: + tokenutil.DeleteToken(i) + i = i.previous + + # A map from required/provided object name to tokens that make up the line + # it was on, including any comments immediately before it or after it on the + # same line. + tokens_map = self._GetTokensMap(tokens) + + # Iterate over the map removing all tokens. + for name in tokens_map: + tokens_to_delete = tokens_map[name] + for i in tokens_to_delete: + tokenutil.DeleteToken(i) + + # Re-add all tokens in the map in alphabetical order. + insert_after = tokens[0].previous + for string in sorted_strings: + for i in tokens_map[string]: + tokenutil.InsertTokenAfter(i, insert_after) + insert_after = i + + def _GetRequireOrProvideTokens(self, token, token_string): + """Gets all goog.provide or goog.require tokens in the given token stream. + + Args: + token: The first token in the token stream. + token_string: One of 'goog.provide' or 'goog.require' to indicate which + tokens to find. + + Returns: + A list of goog.provide or goog.require tokens in the order they appear in + the token stream. + """ + tokens = [] + while token: + if token.type == Type.IDENTIFIER: + if token.string == token_string: + tokens.append(token) + elif token.string not in ['goog.require', 'goog.provide']: + # The goog.provide and goog.require identifiers are at the top of the + # file. So if any other identifier is encountered, return. + break + token = token.next + + return tokens + + def _GetRequireOrProvideTokenStrings(self, tokens): + """Gets a list of strings corresponding to the given list of tokens. + + The string will be the next string in the token stream after each token in + tokens. This is used to find the object being provided/required by a given + goog.provide or goog.require token. + + Args: + tokens: A list of goog.provide or goog.require tokens. + + Returns: + A list of object names that are being provided or required by the given + list of tokens. For example: + + ['object.a', 'object.c', 'object.b'] + """ + token_strings = [] + for token in tokens: + name = tokenutil.Search(token, Type.STRING_TEXT).string + token_strings.append(name) + return token_strings + + def _GetTokensMap(self, tokens): + """Gets a map from object name to tokens associated with that object. + + Starting from the goog.provide/goog.require token, searches backwards in the + token stream for any lines that start with a comment. These lines are + associated with the goog.provide/goog.require token. Also associates any + tokens on the same line as the goog.provide/goog.require token with that + token. + + Args: + tokens: A list of goog.provide or goog.require tokens. + + Returns: + A dictionary that maps object names to the tokens associated with the + goog.provide or goog.require of that object name. For example: + + { + 'object.a': [JavaScriptToken, JavaScriptToken, ...], + 'object.b': [...] + } + + The list of tokens includes any comment lines above the goog.provide or + goog.require statement and everything after the statement on the same + line. For example, all of the following would be associated with + 'object.a': + + /** @suppress {extraRequire} */ + goog.require('object.a'); // Some comment. + """ + tokens_map = {} + for token in tokens: + object_name = tokenutil.Search(token, Type.STRING_TEXT).string + # If the previous line starts with a comment, presume that the comment + # relates to the goog.require or goog.provide and keep them together when + # sorting. + first_token = token + previous_first_token = tokenutil.GetFirstTokenInPreviousLine(first_token) + while previous_first_token.IsAnyType(Type.COMMENT_TYPES): + first_token = previous_first_token + previous_first_token = tokenutil.GetFirstTokenInPreviousLine( + first_token) + + # Find the last token on the line. + last_token = tokenutil.GetLastTokenInSameLine(token) + + all_tokens = self._GetTokenList(first_token, last_token) + tokens_map[object_name] = all_tokens + return tokens_map + + def _GetTokenList(self, first_token, last_token): + """Gets a list of all tokens from first_token to last_token, inclusive. + + Args: + first_token: The first token to get. + last_token: The last token to get. + + Returns: + A list of all tokens between first_token and last_token, including both + first_token and last_token. + + Raises: + Exception: If the token stream ends before last_token is reached. + """ + token_list = [] + token = first_token + while token != last_token: + if not token: + raise Exception('ran out of tokens') + token_list.append(token) + token = token.next + token_list.append(last_token) + + return token_list diff --git a/third_party/closure_linter/closure_linter/requireprovidesorter_test.py b/third_party/closure_linter/closure_linter/requireprovidesorter_test.py new file mode 100644 index 0000000..d1d61dc --- /dev/null +++ b/third_party/closure_linter/closure_linter/requireprovidesorter_test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# +# Copyright 2012 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for RequireProvideSorter.""" + + + +import unittest as googletest +from closure_linter import ecmametadatapass +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import requireprovidesorter + +# pylint: disable-msg=C6409 +TokenType = javascripttokens.JavaScriptTokenType + + +class RequireProvideSorterTest(googletest.TestCase): + """Tests for RequireProvideSorter.""" + + _tokenizer = javascripttokenizer.JavaScriptTokenizer() + _metadata_pass = ecmametadatapass.EcmaMetaDataPass() + + def testFixRequires_removeBlankLines(self): + """Tests that blank lines are omitted in sorted goog.require statements.""" + input_lines = [ + 'goog.provide(\'package.subpackage.Whatever\');', + '', + 'goog.require(\'package.subpackage.ClassB\');', + '', + 'goog.require(\'package.subpackage.ClassA\');' + ] + expected_lines = [ + 'goog.provide(\'package.subpackage.Whatever\');', + '', + 'goog.require(\'package.subpackage.ClassA\');', + 'goog.require(\'package.subpackage.ClassB\');' + ] + token = self._tokenizer.TokenizeFile(input_lines) + self._metadata_pass.Reset() + self._metadata_pass.Process(token) + + sorter = requireprovidesorter.RequireProvideSorter() + sorter.FixRequires(token) + + self.assertEquals(expected_lines, self._GetLines(token)) + + def _GetLines(self, token): + """Returns an array of lines based on the specified token stream.""" + lines = [] + line = '' + while token: + line += token.string + if token.IsLastInLine(): + lines.append(line) + line = '' + token = token.next + return lines + +if __name__ == '__main__': + googletest.main() diff --git a/third_party/closure_linter/closure_linter/statetracker.py b/third_party/closure_linter/closure_linter/statetracker.py new file mode 100755 index 0000000..52e86a9 --- /dev/null +++ b/third_party/closure_linter/closure_linter/statetracker.py @@ -0,0 +1,1008 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Light weight EcmaScript state tracker that reads tokens and tracks state.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import re + +from closure_linter import javascripttokenizer +from closure_linter import javascripttokens +from closure_linter import tokenutil + +# Shorthand +Type = javascripttokens.JavaScriptTokenType + + +class DocFlag(object): + """Generic doc flag object. + + Attribute: + flag_type: param, return, define, type, etc. + flag_token: The flag token. + type_start_token: The first token specifying the flag type, + including braces. + type_end_token: The last token specifying the flag type, + including braces. + type: The type spec. + name_token: The token specifying the flag name. + name: The flag name + description_start_token: The first token in the description. + description_end_token: The end token in the description. + description: The description. + """ + + # Please keep these lists alphabetized. + + # The list of standard jsdoc tags is from + STANDARD_DOC = frozenset([ + 'author', + 'bug', + 'const', + 'constructor', + 'define', + 'deprecated', + 'enum', + 'export', + 'extends', + 'externs', + 'fileoverview', + 'implements', + 'implicitCast', + 'interface', + 'lends', + 'license', + 'noalias', + 'nocompile', + 'nosideeffects', + 'override', + 'owner', + 'param', + 'preserve', + 'private', + 'return', + 'see', + 'supported', + 'template', + 'this', + 'type', + 'typedef', + ]) + + ANNOTATION = frozenset(['preserveTry', 'suppress']) + + LEGAL_DOC = STANDARD_DOC | ANNOTATION + + # Includes all Closure Compiler @suppress types. + # Not all of these annotations are interpreted by Closure Linter. + # + # Specific cases: + # - accessControls is supported by the compiler at the expression + # and method level to suppress warnings about private/protected + # access (method level applies to all references in the method). + # The linter mimics the compiler behavior. + SUPPRESS_TYPES = frozenset([ + 'accessControls', + 'ambiguousFunctionDecl', + 'checkRegExp', + 'checkTypes', + 'checkVars', + 'const', + 'constantProperty', + 'deprecated', + 'duplicate', + 'es5Strict', + 'externsValidation', + 'extraProvide', + 'extraRequire', + 'fileoverviewTags', + 'globalThis', + 'internetExplorerChecks', + 'invalidCasts', + 'missingProperties', + 'missingProvide', + 'missingRequire', + 'nonStandardJsDocs', + 'strictModuleDepCheck', + 'tweakValidation', + 'typeInvalidation', + 'undefinedNames', + 'undefinedVars', + 'underscore', + 'unknownDefines', + 'uselessCode', + 'visibility', + 'with']) + + HAS_DESCRIPTION = frozenset([ + 'define', 'deprecated', 'desc', 'fileoverview', 'license', 'param', + 'preserve', 'return', 'supported']) + + HAS_TYPE = frozenset([ + 'define', 'enum', 'extends', 'implements', 'param', 'return', 'type', + 'suppress']) + + TYPE_ONLY = frozenset(['enum', 'extends', 'implements', 'suppress', 'type']) + + HAS_NAME = frozenset(['param']) + + EMPTY_COMMENT_LINE = re.compile(r'^\s*\*?\s*$') + EMPTY_STRING = re.compile(r'^\s*$') + + def __init__(self, flag_token): + """Creates the DocFlag object and attaches it to the given start token. + + Args: + flag_token: The starting token of the flag. + """ + self.flag_token = flag_token + self.flag_type = flag_token.string.strip().lstrip('@') + + # Extract type, if applicable. + self.type = None + self.type_start_token = None + self.type_end_token = None + if self.flag_type in self.HAS_TYPE: + brace = tokenutil.SearchUntil(flag_token, [Type.DOC_START_BRACE], + Type.FLAG_ENDING_TYPES) + if brace: + end_token, contents = _GetMatchingEndBraceAndContents(brace) + self.type = contents + self.type_start_token = brace + self.type_end_token = end_token + elif (self.flag_type in self.TYPE_ONLY and + flag_token.next.type not in Type.FLAG_ENDING_TYPES): + self.type_start_token = flag_token.next + self.type_end_token, self.type = _GetEndTokenAndContents( + self.type_start_token) + if self.type is not None: + self.type = self.type.strip() + + # Extract name, if applicable. + self.name_token = None + self.name = None + if self.flag_type in self.HAS_NAME: + # Handle bad case, name could be immediately after flag token. + self.name_token = _GetNextIdentifierToken(flag_token) + + # Handle good case, if found token is after type start, look for + # identifier after type end, since types contain identifiers. + if (self.type and self.name_token and + tokenutil.Compare(self.name_token, self.type_start_token) > 0): + self.name_token = _GetNextIdentifierToken(self.type_end_token) + + if self.name_token: + self.name = self.name_token.string + + # Extract description, if applicable. + self.description_start_token = None + self.description_end_token = None + self.description = None + if self.flag_type in self.HAS_DESCRIPTION: + search_start_token = flag_token + if self.name_token and self.type_end_token: + if tokenutil.Compare(self.type_end_token, self.name_token) > 0: + search_start_token = self.type_end_token + else: + search_start_token = self.name_token + elif self.name_token: + search_start_token = self.name_token + elif self.type: + search_start_token = self.type_end_token + + interesting_token = tokenutil.Search(search_start_token, + Type.FLAG_DESCRIPTION_TYPES | Type.FLAG_ENDING_TYPES) + if interesting_token.type in Type.FLAG_DESCRIPTION_TYPES: + self.description_start_token = interesting_token + self.description_end_token, self.description = ( + _GetEndTokenAndContents(interesting_token)) + + +class DocComment(object): + """JavaScript doc comment object. + + Attributes: + ordered_params: Ordered list of parameters documented. + start_token: The token that starts the doc comment. + end_token: The token that ends the doc comment. + suppressions: Map of suppression type to the token that added it. + """ + def __init__(self, start_token): + """Create the doc comment object. + + Args: + start_token: The first token in the doc comment. + """ + self.__params = {} + self.ordered_params = [] + self.__flags = {} + self.start_token = start_token + self.end_token = None + self.suppressions = {} + self.invalidated = False + + def Invalidate(self): + """Indicate that the JSDoc is well-formed but we had problems parsing it. + + This is a short-circuiting mechanism so that we don't emit false + positives about well-formed doc comments just because we don't support + hot new syntaxes. + """ + self.invalidated = True + + def IsInvalidated(self): + """Test whether Invalidate() has been called.""" + return self.invalidated + + def AddParam(self, name, param_type): + """Add a new documented parameter. + + Args: + name: The name of the parameter to document. + param_type: The parameter's declared JavaScript type. + """ + self.ordered_params.append(name) + self.__params[name] = param_type + + def AddSuppression(self, token): + """Add a new error suppression flag. + + Args: + token: The suppression flag token. + """ + #TODO(user): Error if no braces + brace = tokenutil.SearchUntil(token, [Type.DOC_START_BRACE], + [Type.DOC_FLAG]) + if brace: + end_token, contents = _GetMatchingEndBraceAndContents(brace) + for suppression in contents.split('|'): + self.suppressions[suppression] = token + + def SuppressionOnly(self): + """Returns whether this comment contains only suppression flags.""" + for flag_type in self.__flags.keys(): + if flag_type != 'suppress': + return False + return True + + def AddFlag(self, flag): + """Add a new document flag. + + Args: + flag: DocFlag object. + """ + self.__flags[flag.flag_type] = flag + + def InheritsDocumentation(self): + """Test if the jsdoc implies documentation inheritance. + + Returns: + True if documentation may be pulled off the superclass. + """ + return self.HasFlag('inheritDoc') or self.HasFlag('override') + + def HasFlag(self, flag_type): + """Test if the given flag has been set. + + Args: + flag_type: The type of the flag to check. + + Returns: + True if the flag is set. + """ + return flag_type in self.__flags + + def GetFlag(self, flag_type): + """Gets the last flag of the given type. + + Args: + flag_type: The type of the flag to get. + + Returns: + The last instance of the given flag type in this doc comment. + """ + return self.__flags[flag_type] + + def CompareParameters(self, params): + """Computes the edit distance and list from the function params to the docs. + + Uses the Levenshtein edit distance algorithm, with code modified from + http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python + + Args: + params: The parameter list for the function declaration. + + Returns: + The edit distance, the edit list. + """ + source_len, target_len = len(self.ordered_params), len(params) + edit_lists = [[]] + distance = [[]] + for i in range(target_len+1): + edit_lists[0].append(['I'] * i) + distance[0].append(i) + + for j in range(1, source_len+1): + edit_lists.append([['D'] * j]) + distance.append([j]) + + for i in range(source_len): + for j in range(target_len): + cost = 1 + if self.ordered_params[i] == params[j]: + cost = 0 + + deletion = distance[i][j+1] + 1 + insertion = distance[i+1][j] + 1 + substitution = distance[i][j] + cost + + edit_list = None + best = None + if deletion <= insertion and deletion <= substitution: + # Deletion is best. + best = deletion + edit_list = list(edit_lists[i][j+1]) + edit_list.append('D') + + elif insertion <= substitution: + # Insertion is best. + best = insertion + edit_list = list(edit_lists[i+1][j]) + edit_list.append('I') + edit_lists[i+1].append(edit_list) + + else: + # Substitution is best. + best = substitution + edit_list = list(edit_lists[i][j]) + if cost: + edit_list.append('S') + else: + edit_list.append('=') + + edit_lists[i+1].append(edit_list) + distance[i+1].append(best) + + return distance[source_len][target_len], edit_lists[source_len][target_len] + + def __repr__(self): + """Returns a string representation of this object. + + Returns: + A string representation of this object. + """ + return '<DocComment: %s, %s>' % (str(self.__params), str(self.__flags)) + + +# +# Helper methods used by DocFlag and DocComment to parse out flag information. +# + + +def _GetMatchingEndBraceAndContents(start_brace): + """Returns the matching end brace and contents between the two braces. + + If any FLAG_ENDING_TYPE token is encountered before a matching end brace, then + that token is used as the matching ending token. Contents will have all + comment prefixes stripped out of them, and all comment prefixes in between the + start and end tokens will be split out into separate DOC_PREFIX tokens. + + Args: + start_brace: The DOC_START_BRACE token immediately before desired contents. + + Returns: + The matching ending token (DOC_END_BRACE or FLAG_ENDING_TYPE) and a string + of the contents between the matching tokens, minus any comment prefixes. + """ + open_count = 1 + close_count = 0 + contents = [] + + # We don't consider the start brace part of the type string. + token = start_brace.next + while open_count != close_count: + if token.type == Type.DOC_START_BRACE: + open_count += 1 + elif token.type == Type.DOC_END_BRACE: + close_count += 1 + + if token.type != Type.DOC_PREFIX: + contents.append(token.string) + + if token.type in Type.FLAG_ENDING_TYPES: + break + token = token.next + + #Don't include the end token (end brace, end doc comment, etc.) in type. + token = token.previous + contents = contents[:-1] + + return token, ''.join(contents) + + +def _GetNextIdentifierToken(start_token): + """Searches for and returns the first identifier at the beginning of a token. + + Searches each token after the start to see if it starts with an identifier. + If found, will split the token into at most 3 piecies: leading whitespace, + identifier, rest of token, returning the identifier token. If no identifier is + found returns None and changes no tokens. Search is abandoned when a + FLAG_ENDING_TYPE token is found. + + Args: + start_token: The token to start searching after. + + Returns: + The identifier token is found, None otherwise. + """ + token = start_token.next + + while token and not token.type in Type.FLAG_ENDING_TYPES: + match = javascripttokenizer.JavaScriptTokenizer.IDENTIFIER.match( + token.string) + if (match is not None and token.type == Type.COMMENT and + len(token.string) == len(match.group(0))): + return token + + token = token.next + + return None + + +def _GetEndTokenAndContents(start_token): + """Returns last content token and all contents before FLAG_ENDING_TYPE token. + + Comment prefixes are split into DOC_PREFIX tokens and stripped from the + returned contents. + + Args: + start_token: The token immediately before the first content token. + + Returns: + The last content token and a string of all contents including start and + end tokens, with comment prefixes stripped. + """ + iterator = start_token + last_line = iterator.line_number + last_token = None + contents = '' + doc_depth = 0 + while not iterator.type in Type.FLAG_ENDING_TYPES or doc_depth > 0: + if (iterator.IsFirstInLine() and + DocFlag.EMPTY_COMMENT_LINE.match(iterator.line)): + # If we have a blank comment line, consider that an implicit + # ending of the description. This handles a case like: + # + # * @return {boolean} True + # * + # * Note: This is a sentence. + # + # The note is not part of the @return description, but there was + # no definitive ending token. Rather there was a line containing + # only a doc comment prefix or whitespace. + break + + # b/2983692 + # don't prematurely match against a @flag if inside a doc flag + # need to think about what is the correct behavior for unterminated + # inline doc flags + if (iterator.type == Type.DOC_START_BRACE and + iterator.next.type == Type.DOC_INLINE_FLAG): + doc_depth += 1 + elif (iterator.type == Type.DOC_END_BRACE and + doc_depth > 0): + doc_depth -= 1 + + if iterator.type in Type.FLAG_DESCRIPTION_TYPES: + contents += iterator.string + last_token = iterator + + iterator = iterator.next + if iterator.line_number != last_line: + contents += '\n' + last_line = iterator.line_number + + end_token = last_token + if DocFlag.EMPTY_STRING.match(contents): + contents = None + else: + # Strip trailing newline. + contents = contents[:-1] + + return end_token, contents + + +class Function(object): + """Data about a JavaScript function. + + Attributes: + block_depth: Block depth the function began at. + doc: The DocComment associated with the function. + has_return: If the function has a return value. + has_this: If the function references the 'this' object. + is_assigned: If the function is part of an assignment. + is_constructor: If the function is a constructor. + name: The name of the function, whether given in the function keyword or + as the lvalue the function is assigned to. + """ + + def __init__(self, block_depth, is_assigned, doc, name): + self.block_depth = block_depth + self.is_assigned = is_assigned + self.is_constructor = doc and doc.HasFlag('constructor') + self.is_interface = doc and doc.HasFlag('interface') + self.has_return = False + self.has_throw = False + self.has_this = False + self.name = name + self.doc = doc + + +class StateTracker(object): + """EcmaScript state tracker. + + Tracks block depth, function names, etc. within an EcmaScript token stream. + """ + + OBJECT_LITERAL = 'o' + CODE = 'c' + + def __init__(self, doc_flag=DocFlag): + """Initializes a JavaScript token stream state tracker. + + Args: + doc_flag: An optional custom DocFlag used for validating + documentation flags. + """ + self._doc_flag = doc_flag + self.Reset() + + def Reset(self): + """Resets the state tracker to prepare for processing a new page.""" + self._block_depth = 0 + self._is_block_close = False + self._paren_depth = 0 + self._functions = [] + self._functions_by_name = {} + self._last_comment = None + self._doc_comment = None + self._cumulative_params = None + self._block_types = [] + self._last_non_space_token = None + self._last_line = None + self._first_token = None + self._documented_identifiers = set() + + def InFunction(self): + """Returns true if the current token is within a function. + + Returns: + True if the current token is within a function. + """ + return bool(self._functions) + + def InConstructor(self): + """Returns true if the current token is within a constructor. + + Returns: + True if the current token is within a constructor. + """ + return self.InFunction() and self._functions[-1].is_constructor + + def InInterfaceMethod(self): + """Returns true if the current token is within an interface method. + + Returns: + True if the current token is within an interface method. + """ + if self.InFunction(): + if self._functions[-1].is_interface: + return True + else: + name = self._functions[-1].name + prototype_index = name.find('.prototype.') + if prototype_index != -1: + class_function_name = name[0:prototype_index] + if (class_function_name in self._functions_by_name and + self._functions_by_name[class_function_name].is_interface): + return True + + return False + + def InTopLevelFunction(self): + """Returns true if the current token is within a top level function. + + Returns: + True if the current token is within a top level function. + """ + return len(self._functions) == 1 and self.InTopLevel() + + def InAssignedFunction(self): + """Returns true if the current token is within a function variable. + + Returns: + True if if the current token is within a function variable + """ + return self.InFunction() and self._functions[-1].is_assigned + + def IsFunctionOpen(self): + """Returns true if the current token is a function block open. + + Returns: + True if the current token is a function block open. + """ + return (self._functions and + self._functions[-1].block_depth == self._block_depth - 1) + + def IsFunctionClose(self): + """Returns true if the current token is a function block close. + + Returns: + True if the current token is a function block close. + """ + return (self._functions and + self._functions[-1].block_depth == self._block_depth) + + def InBlock(self): + """Returns true if the current token is within a block. + + Returns: + True if the current token is within a block. + """ + return bool(self._block_depth) + + def IsBlockClose(self): + """Returns true if the current token is a block close. + + Returns: + True if the current token is a block close. + """ + return self._is_block_close + + def InObjectLiteral(self): + """Returns true if the current token is within an object literal. + + Returns: + True if the current token is within an object literal. + """ + return self._block_depth and self._block_types[-1] == self.OBJECT_LITERAL + + def InObjectLiteralDescendant(self): + """Returns true if the current token has an object literal ancestor. + + Returns: + True if the current token has an object literal ancestor. + """ + return self.OBJECT_LITERAL in self._block_types + + def InParentheses(self): + """Returns true if the current token is within parentheses. + + Returns: + True if the current token is within parentheses. + """ + return bool(self._paren_depth) + + def InTopLevel(self): + """Whether we are at the top level in the class. + + This function call is language specific. In some languages like + JavaScript, a function is top level if it is not inside any parenthesis. + In languages such as ActionScript, a function is top level if it is directly + within a class. + """ + raise TypeError('Abstract method InTopLevel not implemented') + + def GetBlockType(self, token): + """Determine the block type given a START_BLOCK token. + + Code blocks come after parameters, keywords like else, and closing parens. + + Args: + token: The current token. Can be assumed to be type START_BLOCK. + Returns: + Code block type for current token. + """ + raise TypeError('Abstract method GetBlockType not implemented') + + def GetParams(self): + """Returns the accumulated input params as an array. + + In some EcmasSript languages, input params are specified like + (param:Type, param2:Type2, ...) + in other they are specified just as + (param, param2) + We handle both formats for specifying parameters here and leave + it to the compilers for each language to detect compile errors. + This allows more code to be reused between lint checkers for various + EcmaScript languages. + + Returns: + The accumulated input params as an array. + """ + params = [] + if self._cumulative_params: + params = re.compile(r'\s+').sub('', self._cumulative_params).split(',') + # Strip out the type from parameters of the form name:Type. + params = map(lambda param: param.split(':')[0], params) + + return params + + def GetLastComment(self): + """Return the last plain comment that could be used as documentation. + + Returns: + The last plain comment that could be used as documentation. + """ + return self._last_comment + + def GetDocComment(self): + """Return the most recent applicable documentation comment. + + Returns: + The last applicable documentation comment. + """ + return self._doc_comment + + def HasDocComment(self, identifier): + """Returns whether the identifier has been documented yet. + + Args: + identifier: The identifier. + + Returns: + Whether the identifier has been documented yet. + """ + return identifier in self._documented_identifiers + + def InDocComment(self): + """Returns whether the current token is in a doc comment. + + Returns: + Whether the current token is in a doc comment. + """ + return self._doc_comment and self._doc_comment.end_token is None + + def GetDocFlag(self): + """Returns the current documentation flags. + + Returns: + The current documentation flags. + """ + return self._doc_flag + + def IsTypeToken(self, t): + if self.InDocComment() and t.type not in (Type.START_DOC_COMMENT, + Type.DOC_FLAG, Type.DOC_INLINE_FLAG, Type.DOC_PREFIX): + f = tokenutil.SearchUntil(t, [Type.DOC_FLAG], [Type.START_DOC_COMMENT], + None, True) + if f and f.attached_object.type_start_token is not None: + return (tokenutil.Compare(t, f.attached_object.type_start_token) > 0 and + tokenutil.Compare(t, f.attached_object.type_end_token) < 0) + return False + + def GetFunction(self): + """Return the function the current code block is a part of. + + Returns: + The current Function object. + """ + if self._functions: + return self._functions[-1] + + def GetBlockDepth(self): + """Return the block depth. + + Returns: + The current block depth. + """ + return self._block_depth + + def GetLastNonSpaceToken(self): + """Return the last non whitespace token.""" + return self._last_non_space_token + + def GetLastLine(self): + """Return the last line.""" + return self._last_line + + def GetFirstToken(self): + """Return the very first token in the file.""" + return self._first_token + + def HandleToken(self, token, last_non_space_token): + """Handles the given token and updates state. + + Args: + token: The token to handle. + last_non_space_token: + """ + self._is_block_close = False + + if not self._first_token: + self._first_token = token + + # Track block depth. + type = token.type + if type == Type.START_BLOCK: + self._block_depth += 1 + + # Subclasses need to handle block start very differently because + # whether a block is a CODE or OBJECT_LITERAL block varies significantly + # by language. + self._block_types.append(self.GetBlockType(token)) + + # Track block depth. + elif type == Type.END_BLOCK: + self._is_block_close = not self.InObjectLiteral() + self._block_depth -= 1 + self._block_types.pop() + + # Track parentheses depth. + elif type == Type.START_PAREN: + self._paren_depth += 1 + + # Track parentheses depth. + elif type == Type.END_PAREN: + self._paren_depth -= 1 + + elif type == Type.COMMENT: + self._last_comment = token.string + + elif type == Type.START_DOC_COMMENT: + self._last_comment = None + self._doc_comment = DocComment(token) + + elif type == Type.END_DOC_COMMENT: + self._doc_comment.end_token = token + + elif type in (Type.DOC_FLAG, Type.DOC_INLINE_FLAG): + flag = self._doc_flag(token) + token.attached_object = flag + self._doc_comment.AddFlag(flag) + + if flag.flag_type == 'param' and flag.name: + self._doc_comment.AddParam(flag.name, flag.type) + elif flag.flag_type == 'suppress': + self._doc_comment.AddSuppression(token) + + elif type == Type.FUNCTION_DECLARATION: + last_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES, None, + True) + doc = None + # Only functions outside of parens are eligible for documentation. + if not self._paren_depth: + doc = self._doc_comment + + name = '' + is_assigned = last_code and (last_code.IsOperator('=') or + last_code.IsOperator('||') or last_code.IsOperator('&&') or + (last_code.IsOperator(':') and not self.InObjectLiteral())) + if is_assigned: + # TODO(robbyw): This breaks for x[2] = ... + # Must use loop to find full function name in the case of line-wrapped + # declarations (bug 1220601) like: + # my.function.foo. + # bar = function() ... + identifier = tokenutil.Search(last_code, Type.SIMPLE_LVALUE, None, True) + while identifier and identifier.type in ( + Type.IDENTIFIER, Type.SIMPLE_LVALUE): + name = identifier.string + name + # Traverse behind us, skipping whitespace and comments. + while True: + identifier = identifier.previous + if not identifier or not identifier.type in Type.NON_CODE_TYPES: + break + + else: + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + while next_token and next_token.IsType(Type.FUNCTION_NAME): + name += next_token.string + next_token = tokenutil.Search(next_token, Type.FUNCTION_NAME, 2) + + function = Function(self._block_depth, is_assigned, doc, name) + self._functions.append(function) + self._functions_by_name[name] = function + + elif type == Type.START_PARAMETERS: + self._cumulative_params = '' + + elif type == Type.PARAMETERS: + self._cumulative_params += token.string + + elif type == Type.KEYWORD and token.string == 'return': + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if not next_token.IsType(Type.SEMICOLON): + function = self.GetFunction() + if function: + function.has_return = True + + elif type == Type.KEYWORD and token.string == 'throw': + function = self.GetFunction() + if function: + function.has_throw = True + + elif type == Type.SIMPLE_LVALUE: + identifier = token.values['identifier'] + jsdoc = self.GetDocComment() + if jsdoc: + self._documented_identifiers.add(identifier) + + self._HandleIdentifier(identifier, True) + + elif type == Type.IDENTIFIER: + self._HandleIdentifier(token.string, False) + + # Detect documented non-assignments. + next_token = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES) + if next_token.IsType(Type.SEMICOLON): + if (self._last_non_space_token and + self._last_non_space_token.IsType(Type.END_DOC_COMMENT)): + self._documented_identifiers.add(token.string) + + def _HandleIdentifier(self, identifier, is_assignment): + """Process the given identifier. + + Currently checks if it references 'this' and annotates the function + accordingly. + + Args: + identifier: The identifer to process. + is_assignment: Whether the identifer is being written to. + """ + if identifier == 'this' or identifier.startswith('this.'): + function = self.GetFunction() + if function: + function.has_this = True + + + def HandleAfterToken(self, token): + """Handle updating state after a token has been checked. + + This function should be used for destructive state changes such as + deleting a tracked object. + + Args: + token: The token to handle. + """ + type = token.type + if type == Type.SEMICOLON or type == Type.END_PAREN or ( + type == Type.END_BRACKET and + self._last_non_space_token.type not in ( + Type.SINGLE_QUOTE_STRING_END, Type.DOUBLE_QUOTE_STRING_END)): + # We end on any numeric array index, but keep going for string based + # array indices so that we pick up manually exported identifiers. + self._doc_comment = None + self._last_comment = None + + elif type == Type.END_BLOCK: + self._doc_comment = None + self._last_comment = None + + if self.InFunction() and self.IsFunctionClose(): + # TODO(robbyw): Detect the function's name for better errors. + self._functions.pop() + + elif type == Type.END_PARAMETERS and self._doc_comment: + self._doc_comment = None + self._last_comment = None + + if not token.IsAnyType(Type.WHITESPACE, Type.BLANK_LINE): + self._last_non_space_token = token + + self._last_line = token.line diff --git a/third_party/closure_linter/closure_linter/tokenutil.py b/third_party/closure_linter/closure_linter/tokenutil.py new file mode 100755 index 0000000..92ff16b --- /dev/null +++ b/third_party/closure_linter/closure_linter/tokenutil.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python +# +# Copyright 2007 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Token utility functions.""" + +__author__ = ('robbyw@google.com (Robert Walker)', + 'ajp@google.com (Andy Perelson)') + +import copy + +from closure_linter import javascripttokens +from closure_linter.common import tokens + +# Shorthand +JavaScriptToken = javascripttokens.JavaScriptToken +Type = tokens.TokenType + + +def GetFirstTokenInSameLine(token): + """Returns the first token in the same line as token. + + Args: + token: Any token in the line. + + Returns: + The first token in the same line as token. + """ + while not token.IsFirstInLine(): + token = token.previous + return token + + +def GetFirstTokenInPreviousLine(token): + """Returns the first token in the previous line as token. + + Args: + token: Any token in the line. + + Returns: + The first token in the previous line as token, or None if token is on the + first line. + """ + first_in_line = GetFirstTokenInSameLine(token) + if first_in_line.previous: + return GetFirstTokenInSameLine(first_in_line.previous) + + return None + + +def GetLastTokenInSameLine(token): + """Returns the last token in the same line as token. + + Args: + token: Any token in the line. + + Returns: + The last token in the same line as token. + """ + while not token.IsLastInLine(): + token = token.next + return token + + +def GetAllTokensInSameLine(token): + """Returns all tokens in the same line as the given token. + + Args: + token: Any token in the line. + + Returns: + All tokens on the same line as the given token. + """ + first_token = GetFirstTokenInSameLine(token) + last_token = GetLastTokenInSameLine(token) + + tokens_in_line = [] + while first_token != last_token: + tokens_in_line.append(first_token) + first_token = first_token.next + tokens_in_line.append(last_token) + + return tokens_in_line + + +def CustomSearch(start_token, func, end_func=None, distance=None, + reverse=False): + """Returns the first token where func is True within distance of this token. + + Args: + start_token: The token to start searching from + func: The function to call to test a token for applicability + end_func: The function to call to test a token to determine whether to abort + the search. + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token matching func within distance of this token, or None if no + such token is found. + """ + token = start_token + if reverse: + while token and (distance is None or distance > 0): + previous = token.previous + if previous: + if func(previous): + return previous + if end_func and end_func(previous): + return None + + token = previous + if distance is not None: + distance -= 1 + + else: + while token and (distance is None or distance > 0): + next_token = token.next + if next_token: + if func(next_token): + return next_token + if end_func and end_func(next_token): + return None + + token = next_token + if distance is not None: + distance -= 1 + + return None + + +def Search(start_token, token_types, distance=None, reverse=False): + """Returns the first token of type in token_types within distance. + + Args: + start_token: The token to start searching from + token_types: The allowable types of the token being searched for + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token of any type in token_types within distance of this token, or + None if no such token is found. + """ + return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), + None, distance, reverse) + + +def SearchExcept(start_token, token_types, distance=None, reverse=False): + """Returns the first token not of any type in token_types within distance. + + Args: + start_token: The token to start searching from + token_types: The unallowable types of the token being searched for + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token of any type in token_types within distance of this token, or + None if no such token is found. + """ + return CustomSearch(start_token, + lambda token: not token.IsAnyType(token_types), + None, distance, reverse) + + +def SearchUntil(start_token, token_types, end_types, distance=None, + reverse=False): + """Returns the first token of type in token_types before a token of end_type. + + Args: + start_token: The token to start searching from. + token_types: The allowable types of the token being searched for. + end_types: Types of tokens to abort search if we find. + distance: The number of tokens to look through before failing search. Must + be positive. If unspecified, will search until the end of the token + chain + reverse: When true, search the tokens before this one instead of the tokens + after it + + Returns: + The first token of any type in token_types within distance of this token + before any tokens of type in end_type, or None if no such token is found. + """ + return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), + lambda token: token.IsAnyType(end_types), + distance, reverse) + + +def DeleteToken(token): + """Deletes the given token from the linked list. + + Args: + token: The token to delete + """ + if token.previous: + token.previous.next = token.next + + if token.next: + token.next.previous = token.previous + + following_token = token.next + while following_token and following_token.metadata.last_code == token: + following_token.metadata.last_code = token.metadata.last_code + following_token = following_token.next + + +def DeleteTokens(token, token_count): + """Deletes the given number of tokens starting with the given token. + + Args: + token: The token to start deleting at. + token_count: The total number of tokens to delete. + """ + for i in xrange(1, token_count): + DeleteToken(token.next) + DeleteToken(token) + + +def InsertTokenAfter(new_token, token): + """Insert new_token after token. + + Args: + new_token: A token to be added to the stream + token: A token already in the stream + """ + new_token.previous = token + new_token.next = token.next + + new_token.metadata = copy.copy(token.metadata) + + if token.IsCode(): + new_token.metadata.last_code = token + + if new_token.IsCode(): + following_token = token.next + while following_token and following_token.metadata.last_code == token: + following_token.metadata.last_code = new_token + following_token = following_token.next + + token.next = new_token + if new_token.next: + new_token.next.previous = new_token + + if new_token.start_index is None: + if new_token.line_number == token.line_number: + new_token.start_index = token.start_index + len(token.string) + else: + new_token.start_index = 0 + + iterator = new_token.next + while iterator and iterator.line_number == new_token.line_number: + iterator.start_index += len(new_token.string) + iterator = iterator.next + + +def InsertTokensAfter(new_tokens, token): + """Insert multiple tokens after token. + + Args: + new_tokens: An array of tokens to be added to the stream + token: A token already in the stream + """ + # TODO(user): It would be nicer to have InsertTokenAfter defer to here + # instead of vice-versa. + current_token = token + for new_token in new_tokens: + InsertTokenAfter(new_token, current_token) + current_token = new_token + + +def InsertSpaceTokenAfter(token): + """Inserts a space token after the given token. + + Args: + token: The token to insert a space token after + + Returns: + A single space token + """ + space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line, + token.line_number) + InsertTokenAfter(space_token, token) + + +def InsertBlankLineAfter(token): + """Inserts a blank line after the given token. + + Args: + token: The token to insert a blank line after + + Returns: + A single space token + """ + blank_token = JavaScriptToken('', Type.BLANK_LINE, '', + token.line_number + 1) + InsertLineAfter(token, [blank_token]) + + +def InsertLineAfter(token, new_tokens): + """Inserts a new line consisting of new_tokens after the given token. + + Args: + token: The token to insert after. + new_tokens: The tokens that will make up the new line. + """ + insert_location = token + for new_token in new_tokens: + InsertTokenAfter(new_token, insert_location) + insert_location = new_token + + # Update all subsequent line numbers. + next_token = new_tokens[-1].next + while next_token: + next_token.line_number += 1 + next_token = next_token.next + + +def SplitToken(token, position): + """Splits the token into two tokens at position. + + Args: + token: The token to split + position: The position to split at. Will be the beginning of second token. + + Returns: + The new second token. + """ + new_string = token.string[position:] + token.string = token.string[:position] + + new_token = JavaScriptToken(new_string, token.type, token.line, + token.line_number) + InsertTokenAfter(new_token, token) + + return new_token + + +def Compare(token1, token2): + """Compares two tokens and determines their relative order. + + Args: + token1: The first token to compare. + token2: The second token to compare. + + Returns: + A negative integer, zero, or a positive integer as the first token is + before, equal, or after the second in the token stream. + """ + if token2.line_number != token1.line_number: + return token1.line_number - token2.line_number + else: + return token1.start_index - token2.start_index diff --git a/third_party/closure_linter/setup.py b/third_party/closure_linter/setup.py new file mode 100755 index 0000000..aa2ca4b --- /dev/null +++ b/third_party/closure_linter/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# +# Copyright 2010 The Closure Linter Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +setup(name='closure_linter', + version='2.3.5', + description='Closure Linter', + license='Apache', + author='The Closure Linter Authors', + author_email='opensource@google.com', + url='http://code.google.com/p/closure-linter', + install_requires=['python-gflags'], + package_dir={'closure_linter': 'closure_linter'}, + packages=['closure_linter', 'closure_linter.common'], + entry_points = { + 'console_scripts': [ + 'gjslint = closure_linter.gjslint:main', + 'fixjsstyle = closure_linter.fixjsstyle:main' + ] + } +) diff --git a/third_party/python_gflags/AUTHORS b/third_party/python_gflags/AUTHORS new file mode 100644 index 0000000..887918b --- /dev/null +++ b/third_party/python_gflags/AUTHORS @@ -0,0 +1,2 @@ +google-gflags@googlegroups.com + diff --git a/third_party/python_gflags/COPYING b/third_party/python_gflags/COPYING new file mode 100644 index 0000000..d15b0c2 --- /dev/null +++ b/third_party/python_gflags/COPYING @@ -0,0 +1,28 @@ +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/python_gflags/ChangeLog b/third_party/python_gflags/ChangeLog new file mode 100644 index 0000000..87732a2 --- /dev/null +++ b/third_party/python_gflags/ChangeLog @@ -0,0 +1,62 @@ +Wed Jan 18 13:57:39 2012 Google Inc. <google-gflags@googlegroups.com> + + * python-gflags: version 2.0 + * No changes from version 1.8. + +Wed Jan 18 11:54:03 2012 Google Inc. <google-gflags@googlegroups.com> + + * python-gflags: version 1.8 + * Don't raise DuplicateFlag when re-importing a module (mmcdonald) + * Changed the 'official' python-gflags email in setup.py/etc + * Changed copyright text to reflect Google's relinquished ownership + +Tue Dec 20 17:10:41 2011 Google Inc. <opensource@google.com> + + * python-gflags: version 1.7 + * Prepare gflags for python 3.x, keeping 2.4 compatibility (twouters) + * If output is a tty, use terminal's width to wrap help-text (wiesmann) + * PORTING: Fix ImportError for non-Unix platforms (kdeus) + * PORTING: Run correctly when termios isn't available (shines) + * Add unicode support to flags (csilvers) + +Fri Jul 29 12:24:08 2011 Google Inc. <opensource@google.com> + + * python-gflags: version 1.6 + * Document FlagValues.UseGnuGetOpt (garymm) + * replace fchmod with chmod to work on python 2.4 (mshields) + * Fix bug in flag decl reporting for dup flags (craigcitro) + * Add multi_float, and tests for multi_float/int (simonf) + * Make flagfiles expand in place, to follow docs (dmlynch) + * Raise exception if --flagfile can't be read (tlim) + +Wed Jan 26 13:50:46 2011 Google Inc. <opensource@google.com> + + * python-gflags: version 1.5.1 + * Fix manifest and setup.py to include new files + +Mon Jan 24 16:58:10 2011 Google Inc. <opensource@google.com> + + * python-gflags: version 1.5 + * Add support for flag validators (olexiy) + * Better reporting on UnrecognizedFlagError (sorenj) + * Cache ArgumentParser, to save space (tmarek) + +Wed Oct 13 17:40:12 2010 Google Inc. <opensource@google.com> + + * python-gflags: version 1.4 + * Unregister per-command flags after running the command (dnr) + * Allow key-flags to work with special flags (salcianu) + * Allow printing flags of a specific module (mikecurtis) + * BUGFIX: Fix an error message for float flags (olexiy) + * BUGFIX: Can now import while defining flags (salcianu) + * BUGFIX: Fix flagfile parsing in python (chronos) + * DOC: Better explain the format of --helpxml output (salcianu) + * DOC: Better error message on parse failure (tstromberg) + * Better test coverage under python 2.2 (mshields) + * Added a Makefile for building the packages. + +Mon Jan 4 18:46:29 2010 Tim 'mithro' Ansell <mithro@mithis.com> + + * python-gflags: version 1.3 + * Fork from the C++ package (google-gflags 1.3) + * Add debian packaging diff --git a/third_party/python_gflags/MANIFEST.in b/third_party/python_gflags/MANIFEST.in new file mode 100644 index 0000000..17851bf --- /dev/null +++ b/third_party/python_gflags/MANIFEST.in @@ -0,0 +1,19 @@ +include AUTHORS +include COPYING +include ChangeLog +include MANIFEST.in +include Makefile +include NEWS +include README +include debian/README +include debian/changelog +include debian/compat +include debian/control +include debian/copyright +include debian/docs +include debian/rules +include gflags.py +include gflags2man.py +include gflags_validators.py +include setup.py +recursive-include tests *.py diff --git a/third_party/python_gflags/Makefile b/third_party/python_gflags/Makefile new file mode 100644 index 0000000..6627c32 --- /dev/null +++ b/third_party/python_gflags/Makefile @@ -0,0 +1,69 @@ + +prep: + @echo + # Install needed packages + sudo apt-get install subversion fakeroot python-setuptools python-subversion + # + @echo + # Check that the person has .pypirc + @if [ ! -e ~/.pypirc ]; then \ + echo "Please create a ~/.pypirc with the following contents:"; \ + echo "[server-login]"; \ + echo "username:google_opensource"; \ + echo "password:<see valentine>"; \ + fi + # + @echo + # FIXME(tansell): Check that the person has .dputrc for PPA + +clean: + # Clean up any build files. + python setup.py clean --all + # + # Clean up the debian stuff + fakeroot ./debian/rules clean + # + # Clean up everything else + rm MANIFEST || true + rm -rf build-* + # + # Clean up the egg files + rm -rf *egg* + # + # Remove dist + rm -rf dist + +dist: + # Generate the tarball based on MANIFEST.in + python setup.py sdist + # + # Build the debian packages + fakeroot ./debian/rules binary + mv ../python-gflags*.deb ./dist/ + # + # Build the python Egg + python setup.py bdist_egg + # + @echo + @echo "Files to upload:" + @echo "--------------------------" + @ls -l ./dist/ + +push: + # Send the updates to svn + # Upload the source package to code.google.com + - /home/build/opensource/tools/googlecode_upload.py \ + -p python-gflags ./dist/* + # + # Upload the package to PyPi + - python setup.py sdist upload + - python setup.py bdist_egg upload + # + # Upload the package to the ppa + # FIXME(tansell): dput should run here + +check: + # Run all the tests. + for test in tests/*.py; do PYTHONPATH=. python $$test || exit 1; done + +.PHONY: prep dist clean push check diff --git a/third_party/python_gflags/NEWS b/third_party/python_gflags/NEWS new file mode 100644 index 0000000..8aaa72b --- /dev/null +++ b/third_party/python_gflags/NEWS @@ -0,0 +1,78 @@ +== 18 January 2012 == + +[Prependum:] I just realized I should have named the new version 2.0, +to reflect the new ownership and status as a community run project. +Not too late, I guess. I've just released python-gflags 2.0, which is +identical to python-gflags 1.8 except for the version number. + +I've just released python-gflags 1.8. This fixes a bug, allowing +modules defining flags to be re-imported without raising duplicate +flag errors. + +Administrative note: In the coming weeks, I'll be stepping down as +maintainer for the python-gflags project, and as part of that Google +is relinquishing ownership of the project; it will now be entirely +community run. The remaining +[http://python-gflags.googlecode.com/svn/tags/python-gflags-1.8/ChangeLog changes] +in this release reflect that shift. + + +=== 20 December 2011 === + +I've just released python-gflags 1.7. The major change here is +improved unicode support, in both flag default values and +help-strings. We've also made big steps toward making gflags work +with python 3.x (while keeping 2.4 compatibility), and improving +--help output in the common case where output is a tty. + +For a full list of changes since last release, see the +[http://python-gflags.googlecode.com/svn/tags/python-gflags-1.7/ChangeLog ChangeLog]. + +=== 29 July 2011 === + +I've just released python-gflags 1.6. This release has only minor +changes, including support for multi_float flags. The full list of +changes is in the +[http://python-gflags.googlecode.com/svn/tags/python-gflags-1.6/ChangeLog ChangeLog]. + +The major change with this release is procedural: I've changed the +internal tools used to integrate Google-supplied patches for gflags +into the opensource release. These new tools should result in more +frequent updates with better change descriptions. They will also +result in future `ChangeLog` entries being much more verbose (for +better or for worse). + +=== 26 January 2011 === + +I've just released python-gflags 1.5.1. I had improperly packaged +python-gflags 1.5, so it probably doesn't work. All users who have +updated to python-gflags 1.5 are encouraged to update again to 1.5.1. + +=== 24 January 2011 === + +I've just released python-gflags 1.5. This release adds support for +flag verifiers: small functions you can associate with flags, that are +called whenever the flag value is set or modified, and can verify that +the new value is legal. It also has other, minor changes, described +in the +[http://python-gflags.googlecode.com/svn/tags/python-gflags-1.5/ChangeLog ChangeLog]. + +=== 11 October 2010 === + +I've just released python-gflags 1.4. This release has only minor +changes from 1.3, including support for printing flags of a specific +module, allowing key-flags to work with special flags, somewhat better +error messaging, and +[http://python-gflags.googlecode.com/svn/tags/python-gflags-1.4/ChangeLog so forth]. +If 1.3 is working well for you, there's no particular reason to upgrade. + +=== 4 January 2010 === + +I just released python-gflags 1.3. This is the first python-gflags +release; it is version 1.3 because this code is forked from the 1.3 +release of google-gflags. + +I don't have a tarball or .deb file up quite yet, so for now you will +have to get the source files by browsing under the 'source' +tag. Downloadable files will be available soon. + diff --git a/third_party/python_gflags/README b/third_party/python_gflags/README new file mode 100644 index 0000000..81daa7a --- /dev/null +++ b/third_party/python_gflags/README @@ -0,0 +1,23 @@ +This repository contains a python implementation of the Google commandline +flags module. + + GFlags defines a *distributed* command line system, replacing systems like + getopt(), optparse and manual argument processing. Rather than an application + having to define all flags in or near main(), each python module defines flags + that are useful to it. When one python module imports another, it gains + access to the other's flags. + + It includes the ability to define flag types (boolean, float, interger, list), + autogeneration of help (in both human and machine readable format) and reading + arguments from a file. It also includes the ability to automatically generate + man pages from the help flags. + +Documentation for implementation is at the top of gflags.py file. + +To install the python module, run + python ./setup.py install + +When you install this library, you also get a helper application, +gflags2man.py, installed into /usr/local/bin. You can run gflags2man.py to +create an instant man page, with all the commandline flags and their docs, for +any C++ or python program you've written using the gflags library. diff --git a/third_party/python_gflags/README.chromium b/third_party/python_gflags/README.chromium new file mode 100644 index 0000000..cc749e4 --- /dev/null +++ b/third_party/python_gflags/README.chromium @@ -0,0 +1,25 @@ +Name: python-gflags +URL: http://code.google.com/p/python-gflags/ +Version: 2.0 +Date: 15 Feb 2012 +Revision: 41 +License: BSD 3-Clause License +License File: COPYING +Security Critical: no + +Description: + This project is the python equivalent of google-gflags, a Google commandline + flag implementation for C++. It is intended to be used in situations where a + project wants to mimic the command-line flag handling of a C++ app that uses + google-gflags, or for a Python app that, via swig or some other means, is + linked with a C++ app that uses google-gflags. + + The gflags package contains a library that implements commandline flags + processing. As such it's a replacement for getopt(). It has increased + flexibility, including built-in support for Python types, and the ability to + define flags in the source file in which they're used. (This last is its + major difference from OptParse.) + +Local modifications: + Removed tests/ + Removed debian/ diff --git a/third_party/python_gflags/gflags.py b/third_party/python_gflags/gflags.py new file mode 100644 index 0000000..822256a --- /dev/null +++ b/third_party/python_gflags/gflags.py @@ -0,0 +1,2862 @@ +#!/usr/bin/env python +# +# Copyright (c) 2002, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# --- +# Author: Chad Lester +# Design and style contributions by: +# Amit Patel, Bogdan Cocosel, Daniel Dulitz, Eric Tiedemann, +# Eric Veach, Laurence Gonsalves, Matthew Springer +# Code reorganized a bit by Craig Silverstein + +"""This module is used to define and parse command line flags. + +This module defines a *distributed* flag-definition policy: rather than +an application having to define all flags in or near main(), each python +module defines flags that are useful to it. When one python module +imports another, it gains access to the other's flags. (This is +implemented by having all modules share a common, global registry object +containing all the flag information.) + +Flags are defined through the use of one of the DEFINE_xxx functions. +The specific function used determines how the flag is parsed, checked, +and optionally type-converted, when it's seen on the command line. + + +IMPLEMENTATION: DEFINE_* creates a 'Flag' object and registers it with a +'FlagValues' object (typically the global FlagValues FLAGS, defined +here). The 'FlagValues' object can scan the command line arguments and +pass flag arguments to the corresponding 'Flag' objects for +value-checking and type conversion. The converted flag values are +available as attributes of the 'FlagValues' object. + +Code can access the flag through a FlagValues object, for instance +gflags.FLAGS.myflag. Typically, the __main__ module passes the command +line arguments to gflags.FLAGS for parsing. + +At bottom, this module calls getopt(), so getopt functionality is +supported, including short- and long-style flags, and the use of -- to +terminate flags. + +Methods defined by the flag module will throw 'FlagsError' exceptions. +The exception argument will be a human-readable string. + + +FLAG TYPES: This is a list of the DEFINE_*'s that you can do. All flags +take a name, default value, help-string, and optional 'short' name +(one-letter name). Some flags have other arguments, which are described +with the flag. + +DEFINE_string: takes any input, and interprets it as a string. + +DEFINE_bool or +DEFINE_boolean: typically does not take an argument: say --myflag to + set FLAGS.myflag to true, or --nomyflag to set + FLAGS.myflag to false. Alternately, you can say + --myflag=true or --myflag=t or --myflag=1 or + --myflag=false or --myflag=f or --myflag=0 + +DEFINE_float: takes an input and interprets it as a floating point + number. Takes optional args lower_bound and upper_bound; + if the number specified on the command line is out of + range, it will raise a FlagError. + +DEFINE_integer: takes an input and interprets it as an integer. Takes + optional args lower_bound and upper_bound as for floats. + +DEFINE_enum: takes a list of strings which represents legal values. If + the command-line value is not in this list, raise a flag + error. Otherwise, assign to FLAGS.flag as a string. + +DEFINE_list: Takes a comma-separated list of strings on the commandline. + Stores them in a python list object. + +DEFINE_spaceseplist: Takes a space-separated list of strings on the + commandline. Stores them in a python list object. + Example: --myspacesepflag "foo bar baz" + +DEFINE_multistring: The same as DEFINE_string, except the flag can be + specified more than once on the commandline. The + result is a python list object (list of strings), + even if the flag is only on the command line once. + +DEFINE_multi_int: The same as DEFINE_integer, except the flag can be + specified more than once on the commandline. The + result is a python list object (list of ints), even if + the flag is only on the command line once. + + +SPECIAL FLAGS: There are a few flags that have special meaning: + --help prints a list of all the flags in a human-readable fashion + --helpshort prints a list of all key flags (see below). + --helpxml prints a list of all flags, in XML format. DO NOT parse + the output of --help and --helpshort. Instead, parse + the output of --helpxml. For more info, see + "OUTPUT FOR --helpxml" below. + --flagfile=foo read flags from file foo. + --undefok=f1,f2 ignore unrecognized option errors for f1,f2. + For boolean flags, you should use --undefok=boolflag, and + --boolflag and --noboolflag will be accepted. Do not use + --undefok=noboolflag. + -- as in getopt(), terminates flag-processing + + +FLAGS VALIDATORS: If your program: + - requires flag X to be specified + - needs flag Y to match a regular expression + - or requires any more general constraint to be satisfied +then validators are for you! + +Each validator represents a constraint over one flag, which is enforced +starting from the initial parsing of the flags and until the program +terminates. + +Also, lower_bound and upper_bound for numerical flags are enforced using flag +validators. + +Howto: +If you want to enforce a constraint over one flag, use + +gflags.RegisterValidator(flag_name, + checker, + message='Flag validation failed', + flag_values=FLAGS) + +After flag values are initially parsed, and after any change to the specified +flag, method checker(flag_value) will be executed. If constraint is not +satisfied, an IllegalFlagValue exception will be raised. See +RegisterValidator's docstring for a detailed explanation on how to construct +your own checker. + + +EXAMPLE USAGE: + +FLAGS = gflags.FLAGS + +gflags.DEFINE_integer('my_version', 0, 'Version number.') +gflags.DEFINE_string('filename', None, 'Input file name', short_name='f') + +gflags.RegisterValidator('my_version', + lambda value: value % 2 == 0, + message='--my_version must be divisible by 2') +gflags.MarkFlagAsRequired('filename') + + +NOTE ON --flagfile: + +Flags may be loaded from text files in addition to being specified on +the commandline. + +Any flags you don't feel like typing, throw them in a file, one flag per +line, for instance: + --myflag=myvalue + --nomyboolean_flag +You then specify your file with the special flag '--flagfile=somefile'. +You CAN recursively nest flagfile= tokens OR use multiple files on the +command line. Lines beginning with a single hash '#' or a double slash +'//' are comments in your flagfile. + +Any flagfile=<file> will be interpreted as having a relative path from +the current working directory rather than from the place the file was +included from: + myPythonScript.py --flagfile=config/somefile.cfg + +If somefile.cfg includes further --flagfile= directives, these will be +referenced relative to the original CWD, not from the directory the +including flagfile was found in! + +The caveat applies to people who are including a series of nested files +in a different dir than they are executing out of. Relative path names +are always from CWD, not from the directory of the parent include +flagfile. We do now support '~' expanded directory names. + +Absolute path names ALWAYS work! + + +EXAMPLE USAGE: + + + FLAGS = gflags.FLAGS + + # Flag names are globally defined! So in general, we need to be + # careful to pick names that are unlikely to be used by other libraries. + # If there is a conflict, we'll get an error at import time. + gflags.DEFINE_string('name', 'Mr. President', 'your name') + gflags.DEFINE_integer('age', None, 'your age in years', lower_bound=0) + gflags.DEFINE_boolean('debug', False, 'produces debugging output') + gflags.DEFINE_enum('gender', 'male', ['male', 'female'], 'your gender') + + def main(argv): + try: + argv = FLAGS(argv) # parse flags + except gflags.FlagsError, e: + print '%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS) + sys.exit(1) + if FLAGS.debug: print 'non-flag arguments:', argv + print 'Happy Birthday', FLAGS.name + if FLAGS.age is not None: + print 'You are a %d year old %s' % (FLAGS.age, FLAGS.gender) + + if __name__ == '__main__': + main(sys.argv) + + +KEY FLAGS: + +As we already explained, each module gains access to all flags defined +by all the other modules it transitively imports. In the case of +non-trivial scripts, this means a lot of flags ... For documentation +purposes, it is good to identify the flags that are key (i.e., really +important) to a module. Clearly, the concept of "key flag" is a +subjective one. When trying to determine whether a flag is key to a +module or not, assume that you are trying to explain your module to a +potential user: which flags would you really like to mention first? + +We'll describe shortly how to declare which flags are key to a module. +For the moment, assume we know the set of key flags for each module. +Then, if you use the app.py module, you can use the --helpshort flag to +print only the help for the flags that are key to the main module, in a +human-readable format. + +NOTE: If you need to parse the flag help, do NOT use the output of +--help / --helpshort. That output is meant for human consumption, and +may be changed in the future. Instead, use --helpxml; flags that are +key for the main module are marked there with a <key>yes</key> element. + +The set of key flags for a module M is composed of: + +1. Flags defined by module M by calling a DEFINE_* function. + +2. Flags that module M explictly declares as key by using the function + + DECLARE_key_flag(<flag_name>) + +3. Key flags of other modules that M specifies by using the function + + ADOPT_module_key_flags(<other_module>) + + This is a "bulk" declaration of key flags: each flag that is key for + <other_module> becomes key for the current module too. + +Notice that if you do not use the functions described at points 2 and 3 +above, then --helpshort prints information only about the flags defined +by the main module of our script. In many cases, this behavior is good +enough. But if you move part of the main module code (together with the +related flags) into a different module, then it is nice to use +DECLARE_key_flag / ADOPT_module_key_flags and make sure --helpshort +lists all relevant flags (otherwise, your code refactoring may confuse +your users). + +Note: each of DECLARE_key_flag / ADOPT_module_key_flags has its own +pluses and minuses: DECLARE_key_flag is more targeted and may lead a +more focused --helpshort documentation. ADOPT_module_key_flags is good +for cases when an entire module is considered key to the current script. +Also, it does not require updates to client scripts when a new flag is +added to the module. + + +EXAMPLE USAGE 2 (WITH KEY FLAGS): + +Consider an application that contains the following three files (two +auxiliary modules and a main module) + +File libfoo.py: + + import gflags + + gflags.DEFINE_integer('num_replicas', 3, 'Number of replicas to start') + gflags.DEFINE_boolean('rpc2', True, 'Turn on the usage of RPC2.') + + ... some code ... + +File libbar.py: + + import gflags + + gflags.DEFINE_string('bar_gfs_path', '/gfs/path', + 'Path to the GFS files for libbar.') + gflags.DEFINE_string('email_for_bar_errors', 'bar-team@google.com', + 'Email address for bug reports about module libbar.') + gflags.DEFINE_boolean('bar_risky_hack', False, + 'Turn on an experimental and buggy optimization.') + + ... some code ... + +File myscript.py: + + import gflags + import libfoo + import libbar + + gflags.DEFINE_integer('num_iterations', 0, 'Number of iterations.') + + # Declare that all flags that are key for libfoo are + # key for this module too. + gflags.ADOPT_module_key_flags(libfoo) + + # Declare that the flag --bar_gfs_path (defined in libbar) is key + # for this module. + gflags.DECLARE_key_flag('bar_gfs_path') + + ... some code ... + +When myscript is invoked with the flag --helpshort, the resulted help +message lists information about all the key flags for myscript: +--num_iterations, --num_replicas, --rpc2, and --bar_gfs_path. + +Of course, myscript uses all the flags declared by it (in this case, +just --num_replicas) or by any of the modules it transitively imports +(e.g., the modules libfoo, libbar). E.g., it can access the value of +FLAGS.bar_risky_hack, even if --bar_risky_hack is not declared as a key +flag for myscript. + + +OUTPUT FOR --helpxml: + +The --helpxml flag generates output with the following structure: + +<?xml version="1.0"?> +<AllFlags> + <program>PROGRAM_BASENAME</program> + <usage>MAIN_MODULE_DOCSTRING</usage> + (<flag> + [<key>yes</key>] + <file>DECLARING_MODULE</file> + <name>FLAG_NAME</name> + <meaning>FLAG_HELP_MESSAGE</meaning> + <default>DEFAULT_FLAG_VALUE</default> + <current>CURRENT_FLAG_VALUE</current> + <type>FLAG_TYPE</type> + [OPTIONAL_ELEMENTS] + </flag>)* +</AllFlags> + +Notes: + +1. The output is intentionally similar to the output generated by the +C++ command-line flag library. The few differences are due to the +Python flags that do not have a C++ equivalent (at least not yet), +e.g., DEFINE_list. + +2. New XML elements may be added in the future. + +3. DEFAULT_FLAG_VALUE is in serialized form, i.e., the string you can +pass for this flag on the command-line. E.g., for a flag defined +using DEFINE_list, this field may be foo,bar, not ['foo', 'bar']. + +4. CURRENT_FLAG_VALUE is produced using str(). This means that the +string 'false' will be represented in the same way as the boolean +False. Using repr() would have removed this ambiguity and simplified +parsing, but would have broken the compatibility with the C++ +command-line flags. + +5. OPTIONAL_ELEMENTS describe elements relevant for certain kinds of +flags: lower_bound, upper_bound (for flags that specify bounds), +enum_value (for enum flags), list_separator (for flags that consist of +a list of values, separated by a special token). + +6. We do not provide any example here: please use --helpxml instead. + +This module requires at least python 2.2.1 to run. +""" + +import cgi +import getopt +import os +import re +import string +import struct +import sys +# pylint: disable-msg=C6204 +try: + import fcntl +except ImportError: + fcntl = None +try: + # Importing termios will fail on non-unix platforms. + import termios +except ImportError: + termios = None + +import gflags_validators +# pylint: enable-msg=C6204 + + +# Are we running under pychecker? +_RUNNING_PYCHECKER = 'pychecker.python' in sys.modules + + +def _GetCallingModuleObjectAndName(): + """Returns the module that's calling into this module. + + We generally use this function to get the name of the module calling a + DEFINE_foo... function. + """ + # Walk down the stack to find the first globals dict that's not ours. + for depth in range(1, sys.getrecursionlimit()): + if not sys._getframe(depth).f_globals is globals(): + globals_for_frame = sys._getframe(depth).f_globals + module, module_name = _GetModuleObjectAndName(globals_for_frame) + if module_name is not None: + return module, module_name + raise AssertionError("No module was found") + + +def _GetCallingModule(): + """Returns the name of the module that's calling into this module.""" + return _GetCallingModuleObjectAndName()[1] + + +def _GetThisModuleObjectAndName(): + """Returns: (module object, module name) for this module.""" + return _GetModuleObjectAndName(globals()) + + +# module exceptions: +class FlagsError(Exception): + """The base class for all flags errors.""" + pass + + +class DuplicateFlag(FlagsError): + """Raised if there is a flag naming conflict.""" + pass + +class CantOpenFlagFileError(FlagsError): + """Raised if flagfile fails to open: doesn't exist, wrong permissions, etc.""" + pass + + +class DuplicateFlagCannotPropagateNoneToSwig(DuplicateFlag): + """Special case of DuplicateFlag -- SWIG flag value can't be set to None. + + This can be raised when a duplicate flag is created. Even if allow_override is + True, we still abort if the new value is None, because it's currently + impossible to pass None default value back to SWIG. See FlagValues.SetDefault + for details. + """ + pass + + +class DuplicateFlagError(DuplicateFlag): + """A DuplicateFlag whose message cites the conflicting definitions. + + A DuplicateFlagError conveys more information than a DuplicateFlag, + namely the modules where the conflicting definitions occur. This + class was created to avoid breaking external modules which depend on + the existing DuplicateFlags interface. + """ + + def __init__(self, flagname, flag_values, other_flag_values=None): + """Create a DuplicateFlagError. + + Args: + flagname: Name of the flag being redefined. + flag_values: FlagValues object containing the first definition of + flagname. + other_flag_values: If this argument is not None, it should be the + FlagValues object where the second definition of flagname occurs. + If it is None, we assume that we're being called when attempting + to create the flag a second time, and we use the module calling + this one as the source of the second definition. + """ + self.flagname = flagname + first_module = flag_values.FindModuleDefiningFlag( + flagname, default='<unknown>') + if other_flag_values is None: + second_module = _GetCallingModule() + else: + second_module = other_flag_values.FindModuleDefiningFlag( + flagname, default='<unknown>') + msg = "The flag '%s' is defined twice. First from %s, Second from %s" % ( + self.flagname, first_module, second_module) + DuplicateFlag.__init__(self, msg) + + +class IllegalFlagValue(FlagsError): + """The flag command line argument is illegal.""" + pass + + +class UnrecognizedFlag(FlagsError): + """Raised if a flag is unrecognized.""" + pass + + +# An UnrecognizedFlagError conveys more information than an UnrecognizedFlag. +# Since there are external modules that create DuplicateFlags, the interface to +# DuplicateFlag shouldn't change. The flagvalue will be assigned the full value +# of the flag and its argument, if any, allowing handling of unrecognized flags +# in an exception handler. +# If flagvalue is the empty string, then this exception is an due to a +# reference to a flag that was not already defined. +class UnrecognizedFlagError(UnrecognizedFlag): + def __init__(self, flagname, flagvalue=''): + self.flagname = flagname + self.flagvalue = flagvalue + UnrecognizedFlag.__init__( + self, "Unknown command line flag '%s'" % flagname) + +# Global variable used by expvar +_exported_flags = {} +_help_width = 80 # width of help output + + +def GetHelpWidth(): + """Returns: an integer, the width of help lines that is used in TextWrap.""" + if (not sys.stdout.isatty()) or (termios is None) or (fcntl is None): + return _help_width + try: + data = fcntl.ioctl(sys.stdout, termios.TIOCGWINSZ, '1234') + columns = struct.unpack('hh', data)[1] + # Emacs mode returns 0. + # Here we assume that any value below 40 is unreasonable + if columns >= 40: + return columns + # Returning an int as default is fine, int(int) just return the int. + return int(os.getenv('COLUMNS', _help_width)) + + except (TypeError, IOError, struct.error): + return _help_width + + +def CutCommonSpacePrefix(text): + """Removes a common space prefix from the lines of a multiline text. + + If the first line does not start with a space, it is left as it is and + only in the remaining lines a common space prefix is being searched + for. That means the first line will stay untouched. This is especially + useful to turn doc strings into help texts. This is because some + people prefer to have the doc comment start already after the + apostrophe and then align the following lines while others have the + apostrophes on a separate line. + + The function also drops trailing empty lines and ignores empty lines + following the initial content line while calculating the initial + common whitespace. + + Args: + text: text to work on + + Returns: + the resulting text + """ + text_lines = text.splitlines() + # Drop trailing empty lines + while text_lines and not text_lines[-1]: + text_lines = text_lines[:-1] + if text_lines: + # We got some content, is the first line starting with a space? + if text_lines[0] and text_lines[0][0].isspace(): + text_first_line = [] + else: + text_first_line = [text_lines.pop(0)] + # Calculate length of common leading whitespace (only over content lines) + common_prefix = os.path.commonprefix([line for line in text_lines if line]) + space_prefix_len = len(common_prefix) - len(common_prefix.lstrip()) + # If we have a common space prefix, drop it from all lines + if space_prefix_len: + for index in xrange(len(text_lines)): + if text_lines[index]: + text_lines[index] = text_lines[index][space_prefix_len:] + return '\n'.join(text_first_line + text_lines) + return '' + + +def TextWrap(text, length=None, indent='', firstline_indent=None, tabs=' '): + """Wraps a given text to a maximum line length and returns it. + + We turn lines that only contain whitespace into empty lines. We keep + new lines and tabs (e.g., we do not treat tabs as spaces). + + Args: + text: text to wrap + length: maximum length of a line, includes indentation + if this is None then use GetHelpWidth() + indent: indent for all but first line + firstline_indent: indent for first line; if None, fall back to indent + tabs: replacement for tabs + + Returns: + wrapped text + + Raises: + FlagsError: if indent not shorter than length + FlagsError: if firstline_indent not shorter than length + """ + # Get defaults where callee used None + if length is None: + length = GetHelpWidth() + if indent is None: + indent = '' + if len(indent) >= length: + raise FlagsError('Indent must be shorter than length') + # In line we will be holding the current line which is to be started + # with indent (or firstline_indent if available) and then appended + # with words. + if firstline_indent is None: + firstline_indent = '' + line = indent + else: + line = firstline_indent + if len(firstline_indent) >= length: + raise FlagsError('First line indent must be shorter than length') + + # If the callee does not care about tabs we simply convert them to + # spaces If callee wanted tabs to be single space then we do that + # already here. + if not tabs or tabs == ' ': + text = text.replace('\t', ' ') + else: + tabs_are_whitespace = not tabs.strip() + + line_regex = re.compile('([ ]*)(\t*)([^ \t]+)', re.MULTILINE) + + # Split the text into lines and the lines with the regex above. The + # resulting lines are collected in result[]. For each split we get the + # spaces, the tabs and the next non white space (e.g. next word). + result = [] + for text_line in text.splitlines(): + # Store result length so we can find out whether processing the next + # line gave any new content + old_result_len = len(result) + # Process next line with line_regex. For optimization we do an rstrip(). + # - process tabs (changes either line or word, see below) + # - process word (first try to squeeze on line, then wrap or force wrap) + # Spaces found on the line are ignored, they get added while wrapping as + # needed. + for spaces, current_tabs, word in line_regex.findall(text_line.rstrip()): + # If tabs weren't converted to spaces, handle them now + if current_tabs: + # If the last thing we added was a space anyway then drop + # it. But let's not get rid of the indentation. + if (((result and line != indent) or + (not result and line != firstline_indent)) and line[-1] == ' '): + line = line[:-1] + # Add the tabs, if that means adding whitespace, just add it at + # the line, the rstrip() code while shorten the line down if + # necessary + if tabs_are_whitespace: + line += tabs * len(current_tabs) + else: + # if not all tab replacement is whitespace we prepend it to the word + word = tabs * len(current_tabs) + word + # Handle the case where word cannot be squeezed onto current last line + if len(line) + len(word) > length and len(indent) + len(word) <= length: + result.append(line.rstrip()) + line = indent + word + word = '' + # No space left on line or can we append a space? + if len(line) + 1 >= length: + result.append(line.rstrip()) + line = indent + else: + line += ' ' + # Add word and shorten it up to allowed line length. Restart next + # line with indent and repeat, or add a space if we're done (word + # finished) This deals with words that cannot fit on one line + # (e.g. indent + word longer than allowed line length). + while len(line) + len(word) >= length: + line += word + result.append(line[:length]) + word = line[length:] + line = indent + # Default case, simply append the word and a space + if word: + line += word + ' ' + # End of input line. If we have content we finish the line. If the + # current line is just the indent but we had content in during this + # original line then we need to add an empty line. + if (result and line != indent) or (not result and line != firstline_indent): + result.append(line.rstrip()) + elif len(result) == old_result_len: + result.append('') + line = indent + + return '\n'.join(result) + + +def DocToHelp(doc): + """Takes a __doc__ string and reformats it as help.""" + + # Get rid of starting and ending white space. Using lstrip() or even + # strip() could drop more than maximum of first line and right space + # of last line. + doc = doc.strip() + + # Get rid of all empty lines + whitespace_only_line = re.compile('^[ \t]+$', re.M) + doc = whitespace_only_line.sub('', doc) + + # Cut out common space at line beginnings + doc = CutCommonSpacePrefix(doc) + + # Just like this module's comment, comments tend to be aligned somehow. + # In other words they all start with the same amount of white space + # 1) keep double new lines + # 2) keep ws after new lines if not empty line + # 3) all other new lines shall be changed to a space + # Solution: Match new lines between non white space and replace with space. + doc = re.sub('(?<=\S)\n(?=\S)', ' ', doc, re.M) + + return doc + + +def _GetModuleObjectAndName(globals_dict): + """Returns the module that defines a global environment, and its name. + + Args: + globals_dict: A dictionary that should correspond to an environment + providing the values of the globals. + + Returns: + A pair consisting of (1) module object and (2) module name (a + string). Returns (None, None) if the module could not be + identified. + """ + # The use of .items() (instead of .iteritems()) is NOT a mistake: if + # a parallel thread imports a module while we iterate over + # .iteritems() (not nice, but possible), we get a RuntimeError ... + # Hence, we use the slightly slower but safer .items(). + for name, module in sys.modules.items(): + if getattr(module, '__dict__', None) is globals_dict: + if name == '__main__': + # Pick a more informative name for the main module. + name = sys.argv[0] + return (module, name) + return (None, None) + + +def _GetMainModule(): + """Returns: string, name of the module from which execution started.""" + # First, try to use the same logic used by _GetCallingModuleObjectAndName(), + # i.e., call _GetModuleObjectAndName(). For that we first need to + # find the dictionary that the main module uses to store the + # globals. + # + # That's (normally) the same dictionary object that the deepest + # (oldest) stack frame is using for globals. + deepest_frame = sys._getframe(0) + while deepest_frame.f_back is not None: + deepest_frame = deepest_frame.f_back + globals_for_main_module = deepest_frame.f_globals + main_module_name = _GetModuleObjectAndName(globals_for_main_module)[1] + # The above strategy fails in some cases (e.g., tools that compute + # code coverage by redefining, among other things, the main module). + # If so, just use sys.argv[0]. We can probably always do this, but + # it's safest to try to use the same logic as _GetCallingModuleObjectAndName() + if main_module_name is None: + main_module_name = sys.argv[0] + return main_module_name + + +class FlagValues: + """Registry of 'Flag' objects. + + A 'FlagValues' can then scan command line arguments, passing flag + arguments through to the 'Flag' objects that it owns. It also + provides easy access to the flag values. Typically only one + 'FlagValues' object is needed by an application: gflags.FLAGS + + This class is heavily overloaded: + + 'Flag' objects are registered via __setitem__: + FLAGS['longname'] = x # register a new flag + + The .value attribute of the registered 'Flag' objects can be accessed + as attributes of this 'FlagValues' object, through __getattr__. Both + the long and short name of the original 'Flag' objects can be used to + access its value: + FLAGS.longname # parsed flag value + FLAGS.x # parsed flag value (short name) + + Command line arguments are scanned and passed to the registered 'Flag' + objects through the __call__ method. Unparsed arguments, including + argv[0] (e.g. the program name) are returned. + argv = FLAGS(sys.argv) # scan command line arguments + + The original registered Flag objects can be retrieved through the use + of the dictionary-like operator, __getitem__: + x = FLAGS['longname'] # access the registered Flag object + + The str() operator of a 'FlagValues' object provides help for all of + the registered 'Flag' objects. + """ + + def __init__(self): + # Since everything in this class is so heavily overloaded, the only + # way of defining and using fields is to access __dict__ directly. + + # Dictionary: flag name (string) -> Flag object. + self.__dict__['__flags'] = {} + # Dictionary: module name (string) -> list of Flag objects that are defined + # by that module. + self.__dict__['__flags_by_module'] = {} + # Dictionary: module id (int) -> list of Flag objects that are defined by + # that module. + self.__dict__['__flags_by_module_id'] = {} + # Dictionary: module name (string) -> list of Flag objects that are + # key for that module. + self.__dict__['__key_flags_by_module'] = {} + + # Set if we should use new style gnu_getopt rather than getopt when parsing + # the args. Only possible with Python 2.3+ + self.UseGnuGetOpt(False) + + def UseGnuGetOpt(self, use_gnu_getopt=True): + """Use GNU-style scanning. Allows mixing of flag and non-flag arguments. + + See http://docs.python.org/library/getopt.html#getopt.gnu_getopt + + Args: + use_gnu_getopt: wether or not to use GNU style scanning. + """ + self.__dict__['__use_gnu_getopt'] = use_gnu_getopt + + def IsGnuGetOpt(self): + return self.__dict__['__use_gnu_getopt'] + + def FlagDict(self): + return self.__dict__['__flags'] + + def FlagsByModuleDict(self): + """Returns the dictionary of module_name -> list of defined flags. + + Returns: + A dictionary. Its keys are module names (strings). Its values + are lists of Flag objects. + """ + return self.__dict__['__flags_by_module'] + + def FlagsByModuleIdDict(self): + """Returns the dictionary of module_id -> list of defined flags. + + Returns: + A dictionary. Its keys are module IDs (ints). Its values + are lists of Flag objects. + """ + return self.__dict__['__flags_by_module_id'] + + def KeyFlagsByModuleDict(self): + """Returns the dictionary of module_name -> list of key flags. + + Returns: + A dictionary. Its keys are module names (strings). Its values + are lists of Flag objects. + """ + return self.__dict__['__key_flags_by_module'] + + def _RegisterFlagByModule(self, module_name, flag): + """Records the module that defines a specific flag. + + We keep track of which flag is defined by which module so that we + can later sort the flags by module. + + Args: + module_name: A string, the name of a Python module. + flag: A Flag object, a flag that is key to the module. + """ + flags_by_module = self.FlagsByModuleDict() + flags_by_module.setdefault(module_name, []).append(flag) + + def _RegisterFlagByModuleId(self, module_id, flag): + """Records the module that defines a specific flag. + + Args: + module_id: An int, the ID of the Python module. + flag: A Flag object, a flag that is key to the module. + """ + flags_by_module_id = self.FlagsByModuleIdDict() + flags_by_module_id.setdefault(module_id, []).append(flag) + + def _RegisterKeyFlagForModule(self, module_name, flag): + """Specifies that a flag is a key flag for a module. + + Args: + module_name: A string, the name of a Python module. + flag: A Flag object, a flag that is key to the module. + """ + key_flags_by_module = self.KeyFlagsByModuleDict() + # The list of key flags for the module named module_name. + key_flags = key_flags_by_module.setdefault(module_name, []) + # Add flag, but avoid duplicates. + if flag not in key_flags: + key_flags.append(flag) + + def _GetFlagsDefinedByModule(self, module): + """Returns the list of flags defined by a module. + + Args: + module: A module object or a module name (a string). + + Returns: + A new list of Flag objects. Caller may update this list as he + wishes: none of those changes will affect the internals of this + FlagValue object. + """ + if not isinstance(module, str): + module = module.__name__ + + return list(self.FlagsByModuleDict().get(module, [])) + + def _GetKeyFlagsForModule(self, module): + """Returns the list of key flags for a module. + + Args: + module: A module object or a module name (a string) + + Returns: + A new list of Flag objects. Caller may update this list as he + wishes: none of those changes will affect the internals of this + FlagValue object. + """ + if not isinstance(module, str): + module = module.__name__ + + # Any flag is a key flag for the module that defined it. NOTE: + # key_flags is a fresh list: we can update it without affecting the + # internals of this FlagValues object. + key_flags = self._GetFlagsDefinedByModule(module) + + # Take into account flags explicitly declared as key for a module. + for flag in self.KeyFlagsByModuleDict().get(module, []): + if flag not in key_flags: + key_flags.append(flag) + return key_flags + + def FindModuleDefiningFlag(self, flagname, default=None): + """Return the name of the module defining this flag, or default. + + Args: + flagname: Name of the flag to lookup. + default: Value to return if flagname is not defined. Defaults + to None. + + Returns: + The name of the module which registered the flag with this name. + If no such module exists (i.e. no flag with this name exists), + we return default. + """ + for module, flags in self.FlagsByModuleDict().iteritems(): + for flag in flags: + if flag.name == flagname or flag.short_name == flagname: + return module + return default + + def FindModuleIdDefiningFlag(self, flagname, default=None): + """Return the ID of the module defining this flag, or default. + + Args: + flagname: Name of the flag to lookup. + default: Value to return if flagname is not defined. Defaults + to None. + + Returns: + The ID of the module which registered the flag with this name. + If no such module exists (i.e. no flag with this name exists), + we return default. + """ + for module_id, flags in self.FlagsByModuleIdDict().iteritems(): + for flag in flags: + if flag.name == flagname or flag.short_name == flagname: + return module_id + return default + + def AppendFlagValues(self, flag_values): + """Appends flags registered in another FlagValues instance. + + Args: + flag_values: registry to copy from + """ + for flag_name, flag in flag_values.FlagDict().iteritems(): + # Each flags with shortname appears here twice (once under its + # normal name, and again with its short name). To prevent + # problems (DuplicateFlagError) with double flag registration, we + # perform a check to make sure that the entry we're looking at is + # for its normal name. + if flag_name == flag.name: + try: + self[flag_name] = flag + except DuplicateFlagError: + raise DuplicateFlagError(flag_name, self, + other_flag_values=flag_values) + + def RemoveFlagValues(self, flag_values): + """Remove flags that were previously appended from another FlagValues. + + Args: + flag_values: registry containing flags to remove. + """ + for flag_name in flag_values.FlagDict(): + self.__delattr__(flag_name) + + def __setitem__(self, name, flag): + """Registers a new flag variable.""" + fl = self.FlagDict() + if not isinstance(flag, Flag): + raise IllegalFlagValue(flag) + if not isinstance(name, type("")): + raise FlagsError("Flag name must be a string") + if len(name) == 0: + raise FlagsError("Flag name cannot be empty") + # If running under pychecker, duplicate keys are likely to be + # defined. Disable check for duplicate keys when pycheck'ing. + if (name in fl and not flag.allow_override and + not fl[name].allow_override and not _RUNNING_PYCHECKER): + module, module_name = _GetCallingModuleObjectAndName() + if (self.FindModuleDefiningFlag(name) == module_name and + id(module) != self.FindModuleIdDefiningFlag(name)): + # If the flag has already been defined by a module with the same name, + # but a different ID, we can stop here because it indicates that the + # module is simply being imported a subsequent time. + return + raise DuplicateFlagError(name, self) + short_name = flag.short_name + if short_name is not None: + if (short_name in fl and not flag.allow_override and + not fl[short_name].allow_override and not _RUNNING_PYCHECKER): + raise DuplicateFlagError(short_name, self) + fl[short_name] = flag + fl[name] = flag + global _exported_flags + _exported_flags[name] = flag + + def __getitem__(self, name): + """Retrieves the Flag object for the flag --name.""" + return self.FlagDict()[name] + + def __getattr__(self, name): + """Retrieves the 'value' attribute of the flag --name.""" + fl = self.FlagDict() + if name not in fl: + raise AttributeError(name) + return fl[name].value + + def __setattr__(self, name, value): + """Sets the 'value' attribute of the flag --name.""" + fl = self.FlagDict() + fl[name].value = value + self._AssertValidators(fl[name].validators) + return value + + def _AssertAllValidators(self): + all_validators = set() + for flag in self.FlagDict().itervalues(): + for validator in flag.validators: + all_validators.add(validator) + self._AssertValidators(all_validators) + + def _AssertValidators(self, validators): + """Assert if all validators in the list are satisfied. + + Asserts validators in the order they were created. + Args: + validators: Iterable(gflags_validators.Validator), validators to be + verified + Raises: + AttributeError: if validators work with a non-existing flag. + IllegalFlagValue: if validation fails for at least one validator + """ + for validator in sorted( + validators, key=lambda validator: validator.insertion_index): + try: + validator.Verify(self) + except gflags_validators.Error, e: + message = validator.PrintFlagsWithValues(self) + raise IllegalFlagValue('%s: %s' % (message, str(e))) + + def _FlagIsRegistered(self, flag_obj): + """Checks whether a Flag object is registered under some name. + + Note: this is non trivial: in addition to its normal name, a flag + may have a short name too. In self.FlagDict(), both the normal and + the short name are mapped to the same flag object. E.g., calling + only "del FLAGS.short_name" is not unregistering the corresponding + Flag object (it is still registered under the longer name). + + Args: + flag_obj: A Flag object. + + Returns: + A boolean: True iff flag_obj is registered under some name. + """ + flag_dict = self.FlagDict() + # Check whether flag_obj is registered under its long name. + name = flag_obj.name + if flag_dict.get(name, None) == flag_obj: + return True + # Check whether flag_obj is registered under its short name. + short_name = flag_obj.short_name + if (short_name is not None and + flag_dict.get(short_name, None) == flag_obj): + return True + # The flag cannot be registered under any other name, so we do not + # need to do a full search through the values of self.FlagDict(). + return False + + def __delattr__(self, flag_name): + """Deletes a previously-defined flag from a flag object. + + This method makes sure we can delete a flag by using + + del flag_values_object.<flag_name> + + E.g., + + gflags.DEFINE_integer('foo', 1, 'Integer flag.') + del gflags.FLAGS.foo + + Args: + flag_name: A string, the name of the flag to be deleted. + + Raises: + AttributeError: When there is no registered flag named flag_name. + """ + fl = self.FlagDict() + if flag_name not in fl: + raise AttributeError(flag_name) + + flag_obj = fl[flag_name] + del fl[flag_name] + + if not self._FlagIsRegistered(flag_obj): + # If the Flag object indicated by flag_name is no longer + # registered (please see the docstring of _FlagIsRegistered), then + # we delete the occurrences of the flag object in all our internal + # dictionaries. + self.__RemoveFlagFromDictByModule(self.FlagsByModuleDict(), flag_obj) + self.__RemoveFlagFromDictByModule(self.FlagsByModuleIdDict(), flag_obj) + self.__RemoveFlagFromDictByModule(self.KeyFlagsByModuleDict(), flag_obj) + + def __RemoveFlagFromDictByModule(self, flags_by_module_dict, flag_obj): + """Removes a flag object from a module -> list of flags dictionary. + + Args: + flags_by_module_dict: A dictionary that maps module names to lists of + flags. + flag_obj: A flag object. + """ + for unused_module, flags_in_module in flags_by_module_dict.iteritems(): + # while (as opposed to if) takes care of multiple occurrences of a + # flag in the list for the same module. + while flag_obj in flags_in_module: + flags_in_module.remove(flag_obj) + + def SetDefault(self, name, value): + """Changes the default value of the named flag object.""" + fl = self.FlagDict() + if name not in fl: + raise AttributeError(name) + fl[name].SetDefault(value) + self._AssertValidators(fl[name].validators) + + def __contains__(self, name): + """Returns True if name is a value (flag) in the dict.""" + return name in self.FlagDict() + + has_key = __contains__ # a synonym for __contains__() + + def __iter__(self): + return iter(self.FlagDict()) + + def __call__(self, argv): + """Parses flags from argv; stores parsed flags into this FlagValues object. + + All unparsed arguments are returned. Flags are parsed using the GNU + Program Argument Syntax Conventions, using getopt: + + http://www.gnu.org/software/libc/manual/html_mono/libc.html#Getopt + + Args: + argv: argument list. Can be of any type that may be converted to a list. + + Returns: + The list of arguments not parsed as options, including argv[0] + + Raises: + FlagsError: on any parsing error + """ + # Support any sequence type that can be converted to a list + argv = list(argv) + + shortopts = "" + longopts = [] + + fl = self.FlagDict() + + # This pre parses the argv list for --flagfile=<> options. + argv = argv[:1] + self.ReadFlagsFromFiles(argv[1:], force_gnu=False) + + # Correct the argv to support the google style of passing boolean + # parameters. Boolean parameters may be passed by using --mybool, + # --nomybool, --mybool=(true|false|1|0). getopt does not support + # having options that may or may not have a parameter. We replace + # instances of the short form --mybool and --nomybool with their + # full forms: --mybool=(true|false). + original_argv = list(argv) # list() makes a copy + shortest_matches = None + for name, flag in fl.items(): + if not flag.boolean: + continue + if shortest_matches is None: + # Determine the smallest allowable prefix for all flag names + shortest_matches = self.ShortestUniquePrefixes(fl) + no_name = 'no' + name + prefix = shortest_matches[name] + no_prefix = shortest_matches[no_name] + + # Replace all occurrences of this boolean with extended forms + for arg_idx in range(1, len(argv)): + arg = argv[arg_idx] + if arg.find('=') >= 0: continue + if arg.startswith('--'+prefix) and ('--'+name).startswith(arg): + argv[arg_idx] = ('--%s=true' % name) + elif arg.startswith('--'+no_prefix) and ('--'+no_name).startswith(arg): + argv[arg_idx] = ('--%s=false' % name) + + # Loop over all of the flags, building up the lists of short options + # and long options that will be passed to getopt. Short options are + # specified as a string of letters, each letter followed by a colon + # if it takes an argument. Long options are stored in an array of + # strings. Each string ends with an '=' if it takes an argument. + for name, flag in fl.items(): + longopts.append(name + "=") + if len(name) == 1: # one-letter option: allow short flag type also + shortopts += name + if not flag.boolean: + shortopts += ":" + + longopts.append('undefok=') + undefok_flags = [] + + # In case --undefok is specified, loop to pick up unrecognized + # options one by one. + unrecognized_opts = [] + args = argv[1:] + while True: + try: + if self.__dict__['__use_gnu_getopt']: + optlist, unparsed_args = getopt.gnu_getopt(args, shortopts, longopts) + else: + optlist, unparsed_args = getopt.getopt(args, shortopts, longopts) + break + except getopt.GetoptError, e: + if not e.opt or e.opt in fl: + # Not an unrecognized option, re-raise the exception as a FlagsError + raise FlagsError(e) + # Remove offender from args and try again + for arg_index in range(len(args)): + if ((args[arg_index] == '--' + e.opt) or + (args[arg_index] == '-' + e.opt) or + (args[arg_index].startswith('--' + e.opt + '='))): + unrecognized_opts.append((e.opt, args[arg_index])) + args = args[0:arg_index] + args[arg_index+1:] + break + else: + # We should have found the option, so we don't expect to get + # here. We could assert, but raising the original exception + # might work better. + raise FlagsError(e) + + for name, arg in optlist: + if name == '--undefok': + flag_names = arg.split(',') + undefok_flags.extend(flag_names) + # For boolean flags, if --undefok=boolflag is specified, then we should + # also accept --noboolflag, in addition to --boolflag. + # Since we don't know the type of the undefok'd flag, this will affect + # non-boolean flags as well. + # NOTE: You shouldn't use --undefok=noboolflag, because then we will + # accept --nonoboolflag here. We are choosing not to do the conversion + # from noboolflag -> boolflag because of the ambiguity that flag names + # can start with 'no'. + undefok_flags.extend('no' + name for name in flag_names) + continue + if name.startswith('--'): + # long option + name = name[2:] + short_option = 0 + else: + # short option + name = name[1:] + short_option = 1 + if name in fl: + flag = fl[name] + if flag.boolean and short_option: arg = 1 + flag.Parse(arg) + + # If there were unrecognized options, raise an exception unless + # the options were named via --undefok. + for opt, value in unrecognized_opts: + if opt not in undefok_flags: + raise UnrecognizedFlagError(opt, value) + + if unparsed_args: + if self.__dict__['__use_gnu_getopt']: + # if using gnu_getopt just return the program name + remainder of argv. + ret_val = argv[:1] + unparsed_args + else: + # unparsed_args becomes the first non-flag detected by getopt to + # the end of argv. Because argv may have been modified above, + # return original_argv for this region. + ret_val = argv[:1] + original_argv[-len(unparsed_args):] + else: + ret_val = argv[:1] + + self._AssertAllValidators() + return ret_val + + def Reset(self): + """Resets the values to the point before FLAGS(argv) was called.""" + for f in self.FlagDict().values(): + f.Unparse() + + def RegisteredFlags(self): + """Returns: a list of the names and short names of all registered flags.""" + return list(self.FlagDict()) + + def FlagValuesDict(self): + """Returns: a dictionary that maps flag names to flag values.""" + flag_values = {} + + for flag_name in self.RegisteredFlags(): + flag = self.FlagDict()[flag_name] + flag_values[flag_name] = flag.value + + return flag_values + + def __str__(self): + """Generates a help string for all known flags.""" + return self.GetHelp() + + def GetHelp(self, prefix=''): + """Generates a help string for all known flags.""" + helplist = [] + + flags_by_module = self.FlagsByModuleDict() + if flags_by_module: + + modules = sorted(flags_by_module) + + # Print the help for the main module first, if possible. + main_module = _GetMainModule() + if main_module in modules: + modules.remove(main_module) + modules = [main_module] + modules + + for module in modules: + self.__RenderOurModuleFlags(module, helplist) + + self.__RenderModuleFlags('gflags', + _SPECIAL_FLAGS.FlagDict().values(), + helplist) + + else: + # Just print one long list of flags. + self.__RenderFlagList( + self.FlagDict().values() + _SPECIAL_FLAGS.FlagDict().values(), + helplist, prefix) + + return '\n'.join(helplist) + + def __RenderModuleFlags(self, module, flags, output_lines, prefix=""): + """Generates a help string for a given module.""" + if not isinstance(module, str): + module = module.__name__ + output_lines.append('\n%s%s:' % (prefix, module)) + self.__RenderFlagList(flags, output_lines, prefix + " ") + + def __RenderOurModuleFlags(self, module, output_lines, prefix=""): + """Generates a help string for a given module.""" + flags = self._GetFlagsDefinedByModule(module) + if flags: + self.__RenderModuleFlags(module, flags, output_lines, prefix) + + def __RenderOurModuleKeyFlags(self, module, output_lines, prefix=""): + """Generates a help string for the key flags of a given module. + + Args: + module: A module object or a module name (a string). + output_lines: A list of strings. The generated help message + lines will be appended to this list. + prefix: A string that is prepended to each generated help line. + """ + key_flags = self._GetKeyFlagsForModule(module) + if key_flags: + self.__RenderModuleFlags(module, key_flags, output_lines, prefix) + + def ModuleHelp(self, module): + """Describe the key flags of a module. + + Args: + module: A module object or a module name (a string). + + Returns: + string describing the key flags of a module. + """ + helplist = [] + self.__RenderOurModuleKeyFlags(module, helplist) + return '\n'.join(helplist) + + def MainModuleHelp(self): + """Describe the key flags of the main module. + + Returns: + string describing the key flags of a module. + """ + return self.ModuleHelp(_GetMainModule()) + + def __RenderFlagList(self, flaglist, output_lines, prefix=" "): + fl = self.FlagDict() + special_fl = _SPECIAL_FLAGS.FlagDict() + flaglist = [(flag.name, flag) for flag in flaglist] + flaglist.sort() + flagset = {} + for (name, flag) in flaglist: + # It's possible this flag got deleted or overridden since being + # registered in the per-module flaglist. Check now against the + # canonical source of current flag information, the FlagDict. + if fl.get(name, None) != flag and special_fl.get(name, None) != flag: + # a different flag is using this name now + continue + # only print help once + if flag in flagset: continue + flagset[flag] = 1 + flaghelp = "" + if flag.short_name: flaghelp += "-%s," % flag.short_name + if flag.boolean: + flaghelp += "--[no]%s" % flag.name + ":" + else: + flaghelp += "--%s" % flag.name + ":" + flaghelp += " " + if flag.help: + flaghelp += flag.help + flaghelp = TextWrap(flaghelp, indent=prefix+" ", + firstline_indent=prefix) + if flag.default_as_str: + flaghelp += "\n" + flaghelp += TextWrap("(default: %s)" % flag.default_as_str, + indent=prefix+" ") + if flag.parser.syntactic_help: + flaghelp += "\n" + flaghelp += TextWrap("(%s)" % flag.parser.syntactic_help, + indent=prefix+" ") + output_lines.append(flaghelp) + + def get(self, name, default): + """Returns the value of a flag (if not None) or a default value. + + Args: + name: A string, the name of a flag. + default: Default value to use if the flag value is None. + """ + + value = self.__getattr__(name) + if value is not None: # Can't do if not value, b/c value might be '0' or "" + return value + else: + return default + + def ShortestUniquePrefixes(self, fl): + """Returns: dictionary; maps flag names to their shortest unique prefix.""" + # Sort the list of flag names + sorted_flags = [] + for name, flag in fl.items(): + sorted_flags.append(name) + if flag.boolean: + sorted_flags.append('no%s' % name) + sorted_flags.sort() + + # For each name in the sorted list, determine the shortest unique + # prefix by comparing itself to the next name and to the previous + # name (the latter check uses cached info from the previous loop). + shortest_matches = {} + prev_idx = 0 + for flag_idx in range(len(sorted_flags)): + curr = sorted_flags[flag_idx] + if flag_idx == (len(sorted_flags) - 1): + next = None + else: + next = sorted_flags[flag_idx+1] + next_len = len(next) + for curr_idx in range(len(curr)): + if (next is None + or curr_idx >= next_len + or curr[curr_idx] != next[curr_idx]): + # curr longer than next or no more chars in common + shortest_matches[curr] = curr[:max(prev_idx, curr_idx) + 1] + prev_idx = curr_idx + break + else: + # curr shorter than (or equal to) next + shortest_matches[curr] = curr + prev_idx = curr_idx + 1 # next will need at least one more char + return shortest_matches + + def __IsFlagFileDirective(self, flag_string): + """Checks whether flag_string contain a --flagfile=<foo> directive.""" + if isinstance(flag_string, type("")): + if flag_string.startswith('--flagfile='): + return 1 + elif flag_string == '--flagfile': + return 1 + elif flag_string.startswith('-flagfile='): + return 1 + elif flag_string == '-flagfile': + return 1 + else: + return 0 + return 0 + + def ExtractFilename(self, flagfile_str): + """Returns filename from a flagfile_str of form -[-]flagfile=filename. + + The cases of --flagfile foo and -flagfile foo shouldn't be hitting + this function, as they are dealt with in the level above this + function. + """ + if flagfile_str.startswith('--flagfile='): + return os.path.expanduser((flagfile_str[(len('--flagfile=')):]).strip()) + elif flagfile_str.startswith('-flagfile='): + return os.path.expanduser((flagfile_str[(len('-flagfile=')):]).strip()) + else: + raise FlagsError('Hit illegal --flagfile type: %s' % flagfile_str) + + def __GetFlagFileLines(self, filename, parsed_file_list): + """Returns the useful (!=comments, etc) lines from a file with flags. + + Args: + filename: A string, the name of the flag file. + parsed_file_list: A list of the names of the files we have + already read. MUTATED BY THIS FUNCTION. + + Returns: + List of strings. See the note below. + + NOTE(springer): This function checks for a nested --flagfile=<foo> + tag and handles the lower file recursively. It returns a list of + all the lines that _could_ contain command flags. This is + EVERYTHING except whitespace lines and comments (lines starting + with '#' or '//'). + """ + line_list = [] # All line from flagfile. + flag_line_list = [] # Subset of lines w/o comments, blanks, flagfile= tags. + try: + file_obj = open(filename, 'r') + except IOError, e_msg: + raise CantOpenFlagFileError('ERROR:: Unable to open flagfile: %s' % e_msg) + + line_list = file_obj.readlines() + file_obj.close() + parsed_file_list.append(filename) + + # This is where we check each line in the file we just read. + for line in line_list: + if line.isspace(): + pass + # Checks for comment (a line that starts with '#'). + elif line.startswith('#') or line.startswith('//'): + pass + # Checks for a nested "--flagfile=<bar>" flag in the current file. + # If we find one, recursively parse down into that file. + elif self.__IsFlagFileDirective(line): + sub_filename = self.ExtractFilename(line) + # We do a little safety check for reparsing a file we've already done. + if not sub_filename in parsed_file_list: + included_flags = self.__GetFlagFileLines(sub_filename, + parsed_file_list) + flag_line_list.extend(included_flags) + else: # Case of hitting a circularly included file. + sys.stderr.write('Warning: Hit circular flagfile dependency: %s\n' % + (sub_filename,)) + else: + # Any line that's not a comment or a nested flagfile should get + # copied into 2nd position. This leaves earlier arguments + # further back in the list, thus giving them higher priority. + flag_line_list.append(line.strip()) + return flag_line_list + + def ReadFlagsFromFiles(self, argv, force_gnu=True): + """Processes command line args, but also allow args to be read from file. + + Args: + argv: A list of strings, usually sys.argv[1:], which may contain one or + more flagfile directives of the form --flagfile="./filename". + Note that the name of the program (sys.argv[0]) should be omitted. + force_gnu: If False, --flagfile parsing obeys normal flag semantics. + If True, --flagfile parsing instead follows gnu_getopt semantics. + *** WARNING *** force_gnu=False may become the future default! + + Returns: + + A new list which has the original list combined with what we read + from any flagfile(s). + + References: Global gflags.FLAG class instance. + + This function should be called before the normal FLAGS(argv) call. + This function scans the input list for a flag that looks like: + --flagfile=<somefile>. Then it opens <somefile>, reads all valid key + and value pairs and inserts them into the input list between the + first item of the list and any subsequent items in the list. + + Note that your application's flags are still defined the usual way + using gflags DEFINE_flag() type functions. + + Notes (assuming we're getting a commandline of some sort as our input): + --> Flags from the command line argv _should_ always take precedence! + --> A further "--flagfile=<otherfile.cfg>" CAN be nested in a flagfile. + It will be processed after the parent flag file is done. + --> For duplicate flags, first one we hit should "win". + --> In a flagfile, a line beginning with # or // is a comment. + --> Entirely blank lines _should_ be ignored. + """ + parsed_file_list = [] + rest_of_args = argv + new_argv = [] + while rest_of_args: + current_arg = rest_of_args[0] + rest_of_args = rest_of_args[1:] + if self.__IsFlagFileDirective(current_arg): + # This handles the case of -(-)flagfile foo. In this case the + # next arg really is part of this one. + if current_arg == '--flagfile' or current_arg == '-flagfile': + if not rest_of_args: + raise IllegalFlagValue('--flagfile with no argument') + flag_filename = os.path.expanduser(rest_of_args[0]) + rest_of_args = rest_of_args[1:] + else: + # This handles the case of (-)-flagfile=foo. + flag_filename = self.ExtractFilename(current_arg) + new_argv.extend( + self.__GetFlagFileLines(flag_filename, parsed_file_list)) + else: + new_argv.append(current_arg) + # Stop parsing after '--', like getopt and gnu_getopt. + if current_arg == '--': + break + # Stop parsing after a non-flag, like getopt. + if not current_arg.startswith('-'): + if not force_gnu and not self.__dict__['__use_gnu_getopt']: + break + + if rest_of_args: + new_argv.extend(rest_of_args) + + return new_argv + + def FlagsIntoString(self): + """Returns a string with the flags assignments from this FlagValues object. + + This function ignores flags whose value is None. Each flag + assignment is separated by a newline. + + NOTE: MUST mirror the behavior of the C++ CommandlineFlagsIntoString + from http://code.google.com/p/google-gflags + """ + s = '' + for flag in self.FlagDict().values(): + if flag.value is not None: + s += flag.Serialize() + '\n' + return s + + def AppendFlagsIntoFile(self, filename): + """Appends all flags assignments from this FlagInfo object to a file. + + Output will be in the format of a flagfile. + + NOTE: MUST mirror the behavior of the C++ AppendFlagsIntoFile + from http://code.google.com/p/google-gflags + """ + out_file = open(filename, 'a') + out_file.write(self.FlagsIntoString()) + out_file.close() + + def WriteHelpInXMLFormat(self, outfile=None): + """Outputs flag documentation in XML format. + + NOTE: We use element names that are consistent with those used by + the C++ command-line flag library, from + http://code.google.com/p/google-gflags + We also use a few new elements (e.g., <key>), but we do not + interfere / overlap with existing XML elements used by the C++ + library. Please maintain this consistency. + + Args: + outfile: File object we write to. Default None means sys.stdout. + """ + outfile = outfile or sys.stdout + + outfile.write('<?xml version=\"1.0\"?>\n') + outfile.write('<AllFlags>\n') + indent = ' ' + _WriteSimpleXMLElement(outfile, 'program', os.path.basename(sys.argv[0]), + indent) + + usage_doc = sys.modules['__main__'].__doc__ + if not usage_doc: + usage_doc = '\nUSAGE: %s [flags]\n' % sys.argv[0] + else: + usage_doc = usage_doc.replace('%s', sys.argv[0]) + _WriteSimpleXMLElement(outfile, 'usage', usage_doc, indent) + + # Get list of key flags for the main module. + key_flags = self._GetKeyFlagsForModule(_GetMainModule()) + + # Sort flags by declaring module name and next by flag name. + flags_by_module = self.FlagsByModuleDict() + all_module_names = list(flags_by_module.keys()) + all_module_names.sort() + for module_name in all_module_names: + flag_list = [(f.name, f) for f in flags_by_module[module_name]] + flag_list.sort() + for unused_flag_name, flag in flag_list: + is_key = flag in key_flags + flag.WriteInfoInXMLFormat(outfile, module_name, + is_key=is_key, indent=indent) + + outfile.write('</AllFlags>\n') + outfile.flush() + + def AddValidator(self, validator): + """Register new flags validator to be checked. + + Args: + validator: gflags_validators.Validator + Raises: + AttributeError: if validators work with a non-existing flag. + """ + for flag_name in validator.GetFlagsNames(): + flag = self.FlagDict()[flag_name] + flag.validators.append(validator) + +# end of FlagValues definition + + +# The global FlagValues instance +FLAGS = FlagValues() + + +def _StrOrUnicode(value): + """Converts value to a python string or, if necessary, unicode-string.""" + try: + return str(value) + except UnicodeEncodeError: + return unicode(value) + + +def _MakeXMLSafe(s): + """Escapes <, >, and & from s, and removes XML 1.0-illegal chars.""" + s = cgi.escape(s) # Escape <, >, and & + # Remove characters that cannot appear in an XML 1.0 document + # (http://www.w3.org/TR/REC-xml/#charsets). + # + # NOTE: if there are problems with current solution, one may move to + # XML 1.1, which allows such chars, if they're entity-escaped (&#xHH;). + s = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', s) + # Convert non-ascii characters to entities. Note: requires python >=2.3 + s = s.encode('ascii', 'xmlcharrefreplace') # u'\xce\x88' -> 'uΈ' + return s + + +def _WriteSimpleXMLElement(outfile, name, value, indent): + """Writes a simple XML element. + + Args: + outfile: File object we write the XML element to. + name: A string, the name of XML element. + value: A Python object, whose string representation will be used + as the value of the XML element. + indent: A string, prepended to each line of generated output. + """ + value_str = _StrOrUnicode(value) + if isinstance(value, bool): + # Display boolean values as the C++ flag library does: no caps. + value_str = value_str.lower() + safe_value_str = _MakeXMLSafe(value_str) + outfile.write('%s<%s>%s</%s>\n' % (indent, name, safe_value_str, name)) + + +class Flag: + """Information about a command-line flag. + + 'Flag' objects define the following fields: + .name - the name for this flag + .default - the default value for this flag + .default_as_str - default value as repr'd string, e.g., "'true'" (or None) + .value - the most recent parsed value of this flag; set by Parse() + .help - a help string or None if no help is available + .short_name - the single letter alias for this flag (or None) + .boolean - if 'true', this flag does not accept arguments + .present - true if this flag was parsed from command line flags. + .parser - an ArgumentParser object + .serializer - an ArgumentSerializer object + .allow_override - the flag may be redefined without raising an error + + The only public method of a 'Flag' object is Parse(), but it is + typically only called by a 'FlagValues' object. The Parse() method is + a thin wrapper around the 'ArgumentParser' Parse() method. The parsed + value is saved in .value, and the .present attribute is updated. If + this flag was already present, a FlagsError is raised. + + Parse() is also called during __init__ to parse the default value and + initialize the .value attribute. This enables other python modules to + safely use flags even if the __main__ module neglects to parse the + command line arguments. The .present attribute is cleared after + __init__ parsing. If the default value is set to None, then the + __init__ parsing step is skipped and the .value attribute is + initialized to None. + + Note: The default value is also presented to the user in the help + string, so it is important that it be a legal value for this flag. + """ + + def __init__(self, parser, serializer, name, default, help_string, + short_name=None, boolean=0, allow_override=0): + self.name = name + + if not help_string: + help_string = '(no help available)' + + self.help = help_string + self.short_name = short_name + self.boolean = boolean + self.present = 0 + self.parser = parser + self.serializer = serializer + self.allow_override = allow_override + self.value = None + self.validators = [] + + self.SetDefault(default) + + def __hash__(self): + return hash(id(self)) + + def __eq__(self, other): + return self is other + + def __lt__(self, other): + if isinstance(other, Flag): + return id(self) < id(other) + return NotImplemented + + def __GetParsedValueAsString(self, value): + if value is None: + return None + if self.serializer: + return repr(self.serializer.Serialize(value)) + if self.boolean: + if value: + return repr('true') + else: + return repr('false') + return repr(_StrOrUnicode(value)) + + def Parse(self, argument): + try: + self.value = self.parser.Parse(argument) + except ValueError, e: # recast ValueError as IllegalFlagValue + raise IllegalFlagValue("flag --%s=%s: %s" % (self.name, argument, e)) + self.present += 1 + + def Unparse(self): + if self.default is None: + self.value = None + else: + self.Parse(self.default) + self.present = 0 + + def Serialize(self): + if self.value is None: + return '' + if self.boolean: + if self.value: + return "--%s" % self.name + else: + return "--no%s" % self.name + else: + if not self.serializer: + raise FlagsError("Serializer not present for flag %s" % self.name) + return "--%s=%s" % (self.name, self.serializer.Serialize(self.value)) + + def SetDefault(self, value): + """Changes the default value (and current value too) for this Flag.""" + # We can't allow a None override because it may end up not being + # passed to C++ code when we're overriding C++ flags. So we + # cowardly bail out until someone fixes the semantics of trying to + # pass None to a C++ flag. See swig_flags.Init() for details on + # this behavior. + # TODO(olexiy): Users can directly call this method, bypassing all flags + # validators (we don't have FlagValues here, so we can not check + # validators). + # The simplest solution I see is to make this method private. + # Another approach would be to store reference to the corresponding + # FlagValues with each flag, but this seems to be an overkill. + if value is None and self.allow_override: + raise DuplicateFlagCannotPropagateNoneToSwig(self.name) + + self.default = value + self.Unparse() + self.default_as_str = self.__GetParsedValueAsString(self.value) + + def Type(self): + """Returns: a string that describes the type of this Flag.""" + # NOTE: we use strings, and not the types.*Type constants because + # our flags can have more exotic types, e.g., 'comma separated list + # of strings', 'whitespace separated list of strings', etc. + return self.parser.Type() + + def WriteInfoInXMLFormat(self, outfile, module_name, is_key=False, indent=''): + """Writes common info about this flag, in XML format. + + This is information that is relevant to all flags (e.g., name, + meaning, etc.). If you defined a flag that has some other pieces of + info, then please override _WriteCustomInfoInXMLFormat. + + Please do NOT override this method. + + Args: + outfile: File object we write to. + module_name: A string, the name of the module that defines this flag. + is_key: A boolean, True iff this flag is key for main module. + indent: A string that is prepended to each generated line. + """ + outfile.write(indent + '<flag>\n') + inner_indent = indent + ' ' + if is_key: + _WriteSimpleXMLElement(outfile, 'key', 'yes', inner_indent) + _WriteSimpleXMLElement(outfile, 'file', module_name, inner_indent) + # Print flag features that are relevant for all flags. + _WriteSimpleXMLElement(outfile, 'name', self.name, inner_indent) + if self.short_name: + _WriteSimpleXMLElement(outfile, 'short_name', self.short_name, + inner_indent) + if self.help: + _WriteSimpleXMLElement(outfile, 'meaning', self.help, inner_indent) + # The default flag value can either be represented as a string like on the + # command line, or as a Python object. We serialize this value in the + # latter case in order to remain consistent. + if self.serializer and not isinstance(self.default, str): + default_serialized = self.serializer.Serialize(self.default) + else: + default_serialized = self.default + _WriteSimpleXMLElement(outfile, 'default', default_serialized, inner_indent) + _WriteSimpleXMLElement(outfile, 'current', self.value, inner_indent) + _WriteSimpleXMLElement(outfile, 'type', self.Type(), inner_indent) + # Print extra flag features this flag may have. + self._WriteCustomInfoInXMLFormat(outfile, inner_indent) + outfile.write(indent + '</flag>\n') + + def _WriteCustomInfoInXMLFormat(self, outfile, indent): + """Writes extra info about this flag, in XML format. + + "Extra" means "not already printed by WriteInfoInXMLFormat above." + + Args: + outfile: File object we write to. + indent: A string that is prepended to each generated line. + """ + # Usually, the parser knows the extra details about the flag, so + # we just forward the call to it. + self.parser.WriteCustomInfoInXMLFormat(outfile, indent) +# End of Flag definition + + +class _ArgumentParserCache(type): + """Metaclass used to cache and share argument parsers among flags.""" + + _instances = {} + + def __call__(mcs, *args, **kwargs): + """Returns an instance of the argument parser cls. + + This method overrides behavior of the __new__ methods in + all subclasses of ArgumentParser (inclusive). If an instance + for mcs with the same set of arguments exists, this instance is + returned, otherwise a new instance is created. + + If any keyword arguments are defined, or the values in args + are not hashable, this method always returns a new instance of + cls. + + Args: + args: Positional initializer arguments. + kwargs: Initializer keyword arguments. + + Returns: + An instance of cls, shared or new. + """ + if kwargs: + return type.__call__(mcs, *args, **kwargs) + else: + instances = mcs._instances + key = (mcs,) + tuple(args) + try: + return instances[key] + except KeyError: + # No cache entry for key exists, create a new one. + return instances.setdefault(key, type.__call__(mcs, *args)) + except TypeError: + # An object in args cannot be hashed, always return + # a new instance. + return type.__call__(mcs, *args) + + +class ArgumentParser(object): + """Base class used to parse and convert arguments. + + The Parse() method checks to make sure that the string argument is a + legal value and convert it to a native type. If the value cannot be + converted, it should throw a 'ValueError' exception with a human + readable explanation of why the value is illegal. + + Subclasses should also define a syntactic_help string which may be + presented to the user to describe the form of the legal values. + + Argument parser classes must be stateless, since instances are cached + and shared between flags. Initializer arguments are allowed, but all + member variables must be derived from initializer arguments only. + """ + __metaclass__ = _ArgumentParserCache + + syntactic_help = "" + + def Parse(self, argument): + """Default implementation: always returns its argument unmodified.""" + return argument + + def Type(self): + return 'string' + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + pass + + +class ArgumentSerializer: + """Base class for generating string representations of a flag value.""" + + def Serialize(self, value): + return _StrOrUnicode(value) + + +class ListSerializer(ArgumentSerializer): + + def __init__(self, list_sep): + self.list_sep = list_sep + + def Serialize(self, value): + return self.list_sep.join([_StrOrUnicode(x) for x in value]) + + +# Flags validators + + +def RegisterValidator(flag_name, + checker, + message='Flag validation failed', + flag_values=FLAGS): + """Adds a constraint, which will be enforced during program execution. + + The constraint is validated when flags are initially parsed, and after each + change of the corresponding flag's value. + Args: + flag_name: string, name of the flag to be checked. + checker: method to validate the flag. + input - value of the corresponding flag (string, boolean, etc. + This value will be passed to checker by the library). See file's + docstring for examples. + output - Boolean. + Must return True if validator constraint is satisfied. + If constraint is not satisfied, it should either return False or + raise gflags_validators.Error(desired_error_message). + message: error text to be shown to the user if checker returns False. + If checker raises gflags_validators.Error, message from the raised + Error will be shown. + flag_values: FlagValues + Raises: + AttributeError: if flag_name is not registered as a valid flag name. + """ + flag_values.AddValidator(gflags_validators.SimpleValidator(flag_name, + checker, + message)) + + +def MarkFlagAsRequired(flag_name, flag_values=FLAGS): + """Ensure that flag is not None during program execution. + + Registers a flag validator, which will follow usual validator + rules. + Args: + flag_name: string, name of the flag + flag_values: FlagValues + Raises: + AttributeError: if flag_name is not registered as a valid flag name. + """ + RegisterValidator(flag_name, + lambda value: value is not None, + message='Flag --%s must be specified.' % flag_name, + flag_values=flag_values) + + +def _RegisterBoundsValidatorIfNeeded(parser, name, flag_values): + """Enforce lower and upper bounds for numeric flags. + + Args: + parser: NumericParser (either FloatParser or IntegerParser). Provides lower + and upper bounds, and help text to display. + name: string, name of the flag + flag_values: FlagValues + """ + if parser.lower_bound is not None or parser.upper_bound is not None: + + def Checker(value): + if value is not None and parser.IsOutsideBounds(value): + message = '%s is not %s' % (value, parser.syntactic_help) + raise gflags_validators.Error(message) + return True + + RegisterValidator(name, + Checker, + flag_values=flag_values) + + +# The DEFINE functions are explained in mode details in the module doc string. + + +def DEFINE(parser, name, default, help, flag_values=FLAGS, serializer=None, + **args): + """Registers a generic Flag object. + + NOTE: in the docstrings of all DEFINE* functions, "registers" is short + for "creates a new flag and registers it". + + Auxiliary function: clients should use the specialized DEFINE_<type> + function instead. + + Args: + parser: ArgumentParser that is used to parse the flag arguments. + name: A string, the flag name. + default: The default value of the flag. + help: A help string. + flag_values: FlagValues object the flag will be registered with. + serializer: ArgumentSerializer that serializes the flag value. + args: Dictionary with extra keyword args that are passes to the + Flag __init__. + """ + DEFINE_flag(Flag(parser, serializer, name, default, help, **args), + flag_values) + + +def DEFINE_flag(flag, flag_values=FLAGS): + """Registers a 'Flag' object with a 'FlagValues' object. + + By default, the global FLAGS 'FlagValue' object is used. + + Typical users will use one of the more specialized DEFINE_xxx + functions, such as DEFINE_string or DEFINE_integer. But developers + who need to create Flag objects themselves should use this function + to register their flags. + """ + # copying the reference to flag_values prevents pychecker warnings + fv = flag_values + fv[flag.name] = flag + # Tell flag_values who's defining the flag. + if isinstance(flag_values, FlagValues): + # Regarding the above isinstance test: some users pass funny + # values of flag_values (e.g., {}) in order to avoid the flag + # registration (in the past, there used to be a flag_values == + # FLAGS test here) and redefine flags with the same name (e.g., + # debug). To avoid breaking their code, we perform the + # registration only if flag_values is a real FlagValues object. + module, module_name = _GetCallingModuleObjectAndName() + flag_values._RegisterFlagByModule(module_name, flag) + flag_values._RegisterFlagByModuleId(id(module), flag) + + +def _InternalDeclareKeyFlags(flag_names, + flag_values=FLAGS, key_flag_values=None): + """Declares a flag as key for the calling module. + + Internal function. User code should call DECLARE_key_flag or + ADOPT_module_key_flags instead. + + Args: + flag_names: A list of strings that are names of already-registered + Flag objects. + flag_values: A FlagValues object that the flags listed in + flag_names have registered with (the value of the flag_values + argument from the DEFINE_* calls that defined those flags). + This should almost never need to be overridden. + key_flag_values: A FlagValues object that (among possibly many + other things) keeps track of the key flags for each module. + Default None means "same as flag_values". This should almost + never need to be overridden. + + Raises: + UnrecognizedFlagError: when we refer to a flag that was not + defined yet. + """ + key_flag_values = key_flag_values or flag_values + + module = _GetCallingModule() + + for flag_name in flag_names: + if flag_name not in flag_values: + raise UnrecognizedFlagError(flag_name) + flag = flag_values.FlagDict()[flag_name] + key_flag_values._RegisterKeyFlagForModule(module, flag) + + +def DECLARE_key_flag(flag_name, flag_values=FLAGS): + """Declares one flag as key to the current module. + + Key flags are flags that are deemed really important for a module. + They are important when listing help messages; e.g., if the + --helpshort command-line flag is used, then only the key flags of the + main module are listed (instead of all flags, as in the case of + --help). + + Sample usage: + + gflags.DECLARED_key_flag('flag_1') + + Args: + flag_name: A string, the name of an already declared flag. + (Redeclaring flags as key, including flags implicitly key + because they were declared in this module, is a no-op.) + flag_values: A FlagValues object. This should almost never + need to be overridden. + """ + if flag_name in _SPECIAL_FLAGS: + # Take care of the special flags, e.g., --flagfile, --undefok. + # These flags are defined in _SPECIAL_FLAGS, and are treated + # specially during flag parsing, taking precedence over the + # user-defined flags. + _InternalDeclareKeyFlags([flag_name], + flag_values=_SPECIAL_FLAGS, + key_flag_values=flag_values) + return + _InternalDeclareKeyFlags([flag_name], flag_values=flag_values) + + +def ADOPT_module_key_flags(module, flag_values=FLAGS): + """Declares that all flags key to a module are key to the current module. + + Args: + module: A module object. + flag_values: A FlagValues object. This should almost never need + to be overridden. + + Raises: + FlagsError: When given an argument that is a module name (a + string), instead of a module object. + """ + # NOTE(salcianu): an even better test would be if not + # isinstance(module, types.ModuleType) but I didn't want to import + # types for such a tiny use. + if isinstance(module, str): + raise FlagsError('Received module name %s; expected a module object.' + % module) + _InternalDeclareKeyFlags( + [f.name for f in flag_values._GetKeyFlagsForModule(module.__name__)], + flag_values=flag_values) + # If module is this flag module, take _SPECIAL_FLAGS into account. + if module == _GetThisModuleObjectAndName()[0]: + _InternalDeclareKeyFlags( + # As we associate flags with _GetCallingModuleObjectAndName(), the + # special flags defined in this module are incorrectly registered with + # a different module. So, we can't use _GetKeyFlagsForModule. + # Instead, we take all flags from _SPECIAL_FLAGS (a private + # FlagValues, where no other module should register flags). + [f.name for f in _SPECIAL_FLAGS.FlagDict().values()], + flag_values=_SPECIAL_FLAGS, + key_flag_values=flag_values) + + +# +# STRING FLAGS +# + + +def DEFINE_string(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value can be any string.""" + parser = ArgumentParser() + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# BOOLEAN FLAGS +# + + +class BooleanParser(ArgumentParser): + """Parser of boolean values.""" + + def Convert(self, argument): + """Converts the argument to a boolean; raise ValueError on errors.""" + if type(argument) == str: + if argument.lower() in ['true', 't', '1']: + return True + elif argument.lower() in ['false', 'f', '0']: + return False + + bool_argument = bool(argument) + if argument == bool_argument: + # The argument is a valid boolean (True, False, 0, or 1), and not just + # something that always converts to bool (list, string, int, etc.). + return bool_argument + + raise ValueError('Non-boolean argument to boolean flag', argument) + + def Parse(self, argument): + val = self.Convert(argument) + return val + + def Type(self): + return 'bool' + + +class BooleanFlag(Flag): + """Basic boolean flag. + + Boolean flags do not take any arguments, and their value is either + True (1) or False (0). The false value is specified on the command + line by prepending the word 'no' to either the long or the short flag + name. + + For example, if a Boolean flag was created whose long name was + 'update' and whose short name was 'x', then this flag could be + explicitly unset through either --noupdate or --nox. + """ + + def __init__(self, name, default, help, short_name=None, **args): + p = BooleanParser() + Flag.__init__(self, p, None, name, default, help, short_name, 1, **args) + if not self.help: self.help = "a boolean value" + + +def DEFINE_boolean(name, default, help, flag_values=FLAGS, **args): + """Registers a boolean flag. + + Such a boolean flag does not take an argument. If a user wants to + specify a false value explicitly, the long option beginning with 'no' + must be used: i.e. --noflag + + This flag will have a value of None, True or False. None is possible + if default=None and the user does not specify the flag on the command + line. + """ + DEFINE_flag(BooleanFlag(name, default, help, **args), flag_values) + + +# Match C++ API to unconfuse C++ people. +DEFINE_bool = DEFINE_boolean + + +class HelpFlag(BooleanFlag): + """ + HelpFlag is a special boolean flag that prints usage information and + raises a SystemExit exception if it is ever found in the command + line arguments. Note this is called with allow_override=1, so other + apps can define their own --help flag, replacing this one, if they want. + """ + def __init__(self): + BooleanFlag.__init__(self, "help", 0, "show this help", + short_name="?", allow_override=1) + def Parse(self, arg): + if arg: + doc = sys.modules["__main__"].__doc__ + flags = str(FLAGS) + print doc or ("\nUSAGE: %s [flags]\n" % sys.argv[0]) + if flags: + print "flags:" + print flags + sys.exit(1) +class HelpXMLFlag(BooleanFlag): + """Similar to HelpFlag, but generates output in XML format.""" + def __init__(self): + BooleanFlag.__init__(self, 'helpxml', False, + 'like --help, but generates XML output', + allow_override=1) + def Parse(self, arg): + if arg: + FLAGS.WriteHelpInXMLFormat(sys.stdout) + sys.exit(1) +class HelpshortFlag(BooleanFlag): + """ + HelpshortFlag is a special boolean flag that prints usage + information for the "main" module, and rasies a SystemExit exception + if it is ever found in the command line arguments. Note this is + called with allow_override=1, so other apps can define their own + --helpshort flag, replacing this one, if they want. + """ + def __init__(self): + BooleanFlag.__init__(self, "helpshort", 0, + "show usage only for this module", allow_override=1) + def Parse(self, arg): + if arg: + doc = sys.modules["__main__"].__doc__ + flags = FLAGS.MainModuleHelp() + print doc or ("\nUSAGE: %s [flags]\n" % sys.argv[0]) + if flags: + print "flags:" + print flags + sys.exit(1) + +# +# Numeric parser - base class for Integer and Float parsers +# + + +class NumericParser(ArgumentParser): + """Parser of numeric values. + + Parsed value may be bounded to a given upper and lower bound. + """ + + def IsOutsideBounds(self, val): + return ((self.lower_bound is not None and val < self.lower_bound) or + (self.upper_bound is not None and val > self.upper_bound)) + + def Parse(self, argument): + val = self.Convert(argument) + if self.IsOutsideBounds(val): + raise ValueError("%s is not %s" % (val, self.syntactic_help)) + return val + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + if self.lower_bound is not None: + _WriteSimpleXMLElement(outfile, 'lower_bound', self.lower_bound, indent) + if self.upper_bound is not None: + _WriteSimpleXMLElement(outfile, 'upper_bound', self.upper_bound, indent) + + def Convert(self, argument): + """Default implementation: always returns its argument unmodified.""" + return argument + +# End of Numeric Parser + +# +# FLOAT FLAGS +# + + +class FloatParser(NumericParser): + """Parser of floating point values. + + Parsed value may be bounded to a given upper and lower bound. + """ + number_article = "a" + number_name = "number" + syntactic_help = " ".join((number_article, number_name)) + + def __init__(self, lower_bound=None, upper_bound=None): + super(FloatParser, self).__init__() + self.lower_bound = lower_bound + self.upper_bound = upper_bound + sh = self.syntactic_help + if lower_bound is not None and upper_bound is not None: + sh = ("%s in the range [%s, %s]" % (sh, lower_bound, upper_bound)) + elif lower_bound == 0: + sh = "a non-negative %s" % self.number_name + elif upper_bound == 0: + sh = "a non-positive %s" % self.number_name + elif upper_bound is not None: + sh = "%s <= %s" % (self.number_name, upper_bound) + elif lower_bound is not None: + sh = "%s >= %s" % (self.number_name, lower_bound) + self.syntactic_help = sh + + def Convert(self, argument): + """Converts argument to a float; raises ValueError on errors.""" + return float(argument) + + def Type(self): + return 'float' +# End of FloatParser + + +def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value must be a float. + + If lower_bound or upper_bound are set, then this flag must be + within the given range. + """ + parser = FloatParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + _RegisterBoundsValidatorIfNeeded(parser, name, flag_values=flag_values) + +# +# INTEGER FLAGS +# + + +class IntegerParser(NumericParser): + """Parser of an integer value. + + Parsed value may be bounded to a given upper and lower bound. + """ + number_article = "an" + number_name = "integer" + syntactic_help = " ".join((number_article, number_name)) + + def __init__(self, lower_bound=None, upper_bound=None): + super(IntegerParser, self).__init__() + self.lower_bound = lower_bound + self.upper_bound = upper_bound + sh = self.syntactic_help + if lower_bound is not None and upper_bound is not None: + sh = ("%s in the range [%s, %s]" % (sh, lower_bound, upper_bound)) + elif lower_bound == 1: + sh = "a positive %s" % self.number_name + elif upper_bound == -1: + sh = "a negative %s" % self.number_name + elif lower_bound == 0: + sh = "a non-negative %s" % self.number_name + elif upper_bound == 0: + sh = "a non-positive %s" % self.number_name + elif upper_bound is not None: + sh = "%s <= %s" % (self.number_name, upper_bound) + elif lower_bound is not None: + sh = "%s >= %s" % (self.number_name, lower_bound) + self.syntactic_help = sh + + def Convert(self, argument): + __pychecker__ = 'no-returnvalues' + if type(argument) == str: + base = 10 + if len(argument) > 2 and argument[0] == "0" and argument[1] == "x": + base = 16 + return int(argument, base) + else: + return int(argument) + + def Type(self): + return 'int' + + +def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value must be an integer. + + If lower_bound, or upper_bound are set, then this flag must be + within the given range. + """ + parser = IntegerParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE(parser, name, default, help, flag_values, serializer, **args) + _RegisterBoundsValidatorIfNeeded(parser, name, flag_values=flag_values) + + +# +# ENUM FLAGS +# + + +class EnumParser(ArgumentParser): + """Parser of a string enum value (a string value from a given set). + + If enum_values (see below) is not specified, any string is allowed. + """ + + def __init__(self, enum_values=None): + super(EnumParser, self).__init__() + self.enum_values = enum_values + + def Parse(self, argument): + if self.enum_values and argument not in self.enum_values: + raise ValueError("value should be one of <%s>" % + "|".join(self.enum_values)) + return argument + + def Type(self): + return 'string enum' + + +class EnumFlag(Flag): + """Basic enum flag; its value can be any string from list of enum_values.""" + + def __init__(self, name, default, help, enum_values=None, + short_name=None, **args): + enum_values = enum_values or [] + p = EnumParser(enum_values) + g = ArgumentSerializer() + Flag.__init__(self, p, g, name, default, help, short_name, **args) + if not self.help: self.help = "an enum string" + self.help = "<%s>: %s" % ("|".join(enum_values), self.help) + + def _WriteCustomInfoInXMLFormat(self, outfile, indent): + for enum_value in self.parser.enum_values: + _WriteSimpleXMLElement(outfile, 'enum_value', enum_value, indent) + + +def DEFINE_enum(name, default, enum_values, help, flag_values=FLAGS, + **args): + """Registers a flag whose value can be any string from enum_values.""" + DEFINE_flag(EnumFlag(name, default, help, enum_values, ** args), + flag_values) + + +# +# LIST FLAGS +# + + +class BaseListParser(ArgumentParser): + """Base class for a parser of lists of strings. + + To extend, inherit from this class; from the subclass __init__, call + + BaseListParser.__init__(self, token, name) + + where token is a character used to tokenize, and name is a description + of the separator. + """ + + def __init__(self, token=None, name=None): + assert name + super(BaseListParser, self).__init__() + self._token = token + self._name = name + self.syntactic_help = "a %s separated list" % self._name + + def Parse(self, argument): + if isinstance(argument, list): + return argument + elif argument == '': + return [] + else: + return [s.strip() for s in argument.split(self._token)] + + def Type(self): + return '%s separated list of strings' % self._name + + +class ListParser(BaseListParser): + """Parser for a comma-separated list of strings.""" + + def __init__(self): + BaseListParser.__init__(self, ',', 'comma') + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + BaseListParser.WriteCustomInfoInXMLFormat(self, outfile, indent) + _WriteSimpleXMLElement(outfile, 'list_separator', repr(','), indent) + + +class WhitespaceSeparatedListParser(BaseListParser): + """Parser for a whitespace-separated list of strings.""" + + def __init__(self): + BaseListParser.__init__(self, None, 'whitespace') + + def WriteCustomInfoInXMLFormat(self, outfile, indent): + BaseListParser.WriteCustomInfoInXMLFormat(self, outfile, indent) + separators = list(string.whitespace) + separators.sort() + for ws_char in string.whitespace: + _WriteSimpleXMLElement(outfile, 'list_separator', repr(ws_char), indent) + + +def DEFINE_list(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value is a comma-separated list of strings.""" + parser = ListParser() + serializer = ListSerializer(',') + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +def DEFINE_spaceseplist(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value is a whitespace-separated list of strings. + + Any whitespace can be used as a separator. + """ + parser = WhitespaceSeparatedListParser() + serializer = ListSerializer(' ') + DEFINE(parser, name, default, help, flag_values, serializer, **args) + + +# +# MULTI FLAGS +# + + +class MultiFlag(Flag): + """A flag that can appear multiple time on the command-line. + + The value of such a flag is a list that contains the individual values + from all the appearances of that flag on the command-line. + + See the __doc__ for Flag for most behavior of this class. Only + differences in behavior are described here: + + * The default value may be either a single value or a list of values. + A single value is interpreted as the [value] singleton list. + + * The value of the flag is always a list, even if the option was + only supplied once, and even if the default value is a single + value + """ + + def __init__(self, *args, **kwargs): + Flag.__init__(self, *args, **kwargs) + self.help += ';\n repeat this option to specify a list of values' + + def Parse(self, arguments): + """Parses one or more arguments with the installed parser. + + Args: + arguments: a single argument or a list of arguments (typically a + list of default values); a single argument is converted + internally into a list containing one item. + """ + if not isinstance(arguments, list): + # Default value may be a list of values. Most other arguments + # will not be, so convert them into a single-item list to make + # processing simpler below. + arguments = [arguments] + + if self.present: + # keep a backup reference to list of previously supplied option values + values = self.value + else: + # "erase" the defaults with an empty list + values = [] + + for item in arguments: + # have Flag superclass parse argument, overwriting self.value reference + Flag.Parse(self, item) # also increments self.present + values.append(self.value) + + # put list of option values back in the 'value' attribute + self.value = values + + def Serialize(self): + if not self.serializer: + raise FlagsError("Serializer not present for flag %s" % self.name) + if self.value is None: + return '' + + s = '' + + multi_value = self.value + + for self.value in multi_value: + if s: s += ' ' + s += Flag.Serialize(self) + + self.value = multi_value + + return s + + def Type(self): + return 'multi ' + self.parser.Type() + + +def DEFINE_multi(parser, serializer, name, default, help, flag_values=FLAGS, + **args): + """Registers a generic MultiFlag that parses its args with a given parser. + + Auxiliary function. Normal users should NOT use it directly. + + Developers who need to create their own 'Parser' classes for options + which can appear multiple times can call this module function to + register their flags. + """ + DEFINE_flag(MultiFlag(parser, serializer, name, default, help, **args), + flag_values) + + +def DEFINE_multistring(name, default, help, flag_values=FLAGS, **args): + """Registers a flag whose value can be a list of any strings. + + Use the flag on the command line multiple times to place multiple + string values into the list. The 'default' may be a single string + (which will be converted into a single-element list) or a list of + strings. + """ + parser = ArgumentParser() + serializer = ArgumentSerializer() + DEFINE_multi(parser, serializer, name, default, help, flag_values, **args) + + +def DEFINE_multi_int(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value can be a list of arbitrary integers. + + Use the flag on the command line multiple times to place multiple + integer values into the list. The 'default' may be a single integer + (which will be converted into a single-element list) or a list of + integers. + """ + parser = IntegerParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE_multi(parser, serializer, name, default, help, flag_values, **args) + + +def DEFINE_multi_float(name, default, help, lower_bound=None, upper_bound=None, + flag_values=FLAGS, **args): + """Registers a flag whose value can be a list of arbitrary floats. + + Use the flag on the command line multiple times to place multiple + float values into the list. The 'default' may be a single float + (which will be converted into a single-element list) or a list of + floats. + """ + parser = FloatParser(lower_bound, upper_bound) + serializer = ArgumentSerializer() + DEFINE_multi(parser, serializer, name, default, help, flag_values, **args) + + +# Now register the flags that we want to exist in all applications. +# These are all defined with allow_override=1, so user-apps can use +# these flagnames for their own purposes, if they want. +DEFINE_flag(HelpFlag()) +DEFINE_flag(HelpshortFlag()) +DEFINE_flag(HelpXMLFlag()) + +# Define special flags here so that help may be generated for them. +# NOTE: Please do NOT use _SPECIAL_FLAGS from outside this module. +_SPECIAL_FLAGS = FlagValues() + + +DEFINE_string( + 'flagfile', "", + "Insert flag definitions from the given file into the command line.", + _SPECIAL_FLAGS) + +DEFINE_string( + 'undefok', "", + "comma-separated list of flag names that it is okay to specify " + "on the command line even if the program does not define a flag " + "with that name. IMPORTANT: flags in this list that have " + "arguments MUST use the --flag=value format.", _SPECIAL_FLAGS) diff --git a/third_party/python_gflags/gflags2man.py b/third_party/python_gflags/gflags2man.py new file mode 100755 index 0000000..3a50f9e --- /dev/null +++ b/third_party/python_gflags/gflags2man.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python + +# Copyright (c) 2006, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""gflags2man runs a Google flags base program and generates a man page. + +Run the program, parse the output, and then format that into a man +page. + +Usage: + gflags2man <program> [program] ... +""" + +# TODO(csilvers): work with windows paths (\) as well as unix (/) + +# This may seem a bit of an end run, but it: doesn't bloat flags, can +# support python/java/C++, supports older executables, and can be +# extended to other document formats. +# Inspired by help2man. + + + +import os +import re +import sys +import stat +import time + +import gflags + +_VERSION = '0.1' + + +def _GetDefaultDestDir(): + home = os.environ.get('HOME', '') + homeman = os.path.join(home, 'man', 'man1') + if home and os.path.exists(homeman): + return homeman + else: + return os.environ.get('TMPDIR', '/tmp') + +FLAGS = gflags.FLAGS +gflags.DEFINE_string('dest_dir', _GetDefaultDestDir(), + 'Directory to write resulting manpage to.' + ' Specify \'-\' for stdout') +gflags.DEFINE_string('help_flag', '--help', + 'Option to pass to target program in to get help') +gflags.DEFINE_integer('v', 0, 'verbosity level to use for output') + + +_MIN_VALID_USAGE_MSG = 9 # if fewer lines than this, help is suspect + + +class Logging: + """A super-simple logging class""" + def error(self, msg): print >>sys.stderr, "ERROR: ", msg + def warn(self, msg): print >>sys.stderr, "WARNING: ", msg + def info(self, msg): print msg + def debug(self, msg): self.vlog(1, msg) + def vlog(self, level, msg): + if FLAGS.v >= level: print msg +logging = Logging() +class App: + def usage(self, shorthelp=0): + print >>sys.stderr, __doc__ + print >>sys.stderr, "flags:" + print >>sys.stderr, str(FLAGS) + def run(self): + main(sys.argv) +app = App() + + +def GetRealPath(filename): + """Given an executable filename, find in the PATH or find absolute path. + Args: + filename An executable filename (string) + Returns: + Absolute version of filename. + None if filename could not be found locally, absolutely, or in PATH + """ + if os.path.isabs(filename): # already absolute + return filename + + if filename.startswith('./') or filename.startswith('../'): # relative + return os.path.abspath(filename) + + path = os.getenv('PATH', '') + for directory in path.split(':'): + tryname = os.path.join(directory, filename) + if os.path.exists(tryname): + if not os.path.isabs(directory): # relative directory + return os.path.abspath(tryname) + return tryname + if os.path.exists(filename): + return os.path.abspath(filename) + return None # could not determine + +class Flag(object): + """The information about a single flag.""" + + def __init__(self, flag_desc, help): + """Create the flag object. + Args: + flag_desc The command line forms this could take. (string) + help The help text (string) + """ + self.desc = flag_desc # the command line forms + self.help = help # the help text + self.default = '' # default value + self.tips = '' # parsing/syntax tips + + +class ProgramInfo(object): + """All the information gleaned from running a program with --help.""" + + # Match a module block start, for python scripts --help + # "goopy.logging:" + module_py_re = re.compile(r'(\S.+):$') + # match the start of a flag listing + # " -v,--verbosity: Logging verbosity" + flag_py_re = re.compile(r'\s+(-\S+):\s+(.*)$') + # " (default: '0')" + flag_default_py_re = re.compile(r'\s+\(default:\s+\'(.*)\'\)$') + # " (an integer)" + flag_tips_py_re = re.compile(r'\s+\((.*)\)$') + + # Match a module block start, for c++ programs --help + # "google/base/commandlineflags": + module_c_re = re.compile(r'\s+Flags from (\S.+):$') + # match the start of a flag listing + # " -v,--verbosity: Logging verbosity" + flag_c_re = re.compile(r'\s+(-\S+)\s+(.*)$') + + # Match a module block start, for java programs --help + # "com.google.common.flags" + module_java_re = re.compile(r'\s+Flags for (\S.+):$') + # match the start of a flag listing + # " -v,--verbosity: Logging verbosity" + flag_java_re = re.compile(r'\s+(-\S+)\s+(.*)$') + + def __init__(self, executable): + """Create object with executable. + Args: + executable Program to execute (string) + """ + self.long_name = executable + self.name = os.path.basename(executable) # name + # Get name without extension (PAR files) + (self.short_name, self.ext) = os.path.splitext(self.name) + self.executable = GetRealPath(executable) # name of the program + self.output = [] # output from the program. List of lines. + self.desc = [] # top level description. List of lines + self.modules = {} # { section_name(string), [ flags ] } + self.module_list = [] # list of module names in their original order + self.date = time.localtime(time.time()) # default date info + + def Run(self): + """Run it and collect output. + + Returns: + 1 (true) If everything went well. + 0 (false) If there were problems. + """ + if not self.executable: + logging.error('Could not locate "%s"' % self.long_name) + return 0 + + finfo = os.stat(self.executable) + self.date = time.localtime(finfo[stat.ST_MTIME]) + + logging.info('Running: %s %s </dev/null 2>&1' + % (self.executable, FLAGS.help_flag)) + # --help output is often routed to stderr, so we combine with stdout. + # Re-direct stdin to /dev/null to encourage programs that + # don't understand --help to exit. + (child_stdin, child_stdout_and_stderr) = os.popen4( + [self.executable, FLAGS.help_flag]) + child_stdin.close() # '</dev/null' + self.output = child_stdout_and_stderr.readlines() + child_stdout_and_stderr.close() + if len(self.output) < _MIN_VALID_USAGE_MSG: + logging.error('Error: "%s %s" returned only %d lines: %s' + % (self.name, FLAGS.help_flag, + len(self.output), self.output)) + return 0 + return 1 + + def Parse(self): + """Parse program output.""" + (start_line, lang) = self.ParseDesc() + if start_line < 0: + return + if 'python' == lang: + self.ParsePythonFlags(start_line) + elif 'c' == lang: + self.ParseCFlags(start_line) + elif 'java' == lang: + self.ParseJavaFlags(start_line) + + def ParseDesc(self, start_line=0): + """Parse the initial description. + + This could be Python or C++. + + Returns: + (start_line, lang_type) + start_line Line to start parsing flags on (int) + lang_type Either 'python' or 'c' + (-1, '') if the flags start could not be found + """ + exec_mod_start = self.executable + ':' + + after_blank = 0 + start_line = 0 # ignore the passed-in arg for now (?) + for start_line in range(start_line, len(self.output)): # collect top description + line = self.output[start_line].rstrip() + # Python flags start with 'flags:\n' + if ('flags:' == line + and len(self.output) > start_line+1 + and '' == self.output[start_line+1].rstrip()): + start_line += 2 + logging.debug('Flags start (python): %s' % line) + return (start_line, 'python') + # SWIG flags just have the module name followed by colon. + if exec_mod_start == line: + logging.debug('Flags start (swig): %s' % line) + return (start_line, 'python') + # C++ flags begin after a blank line and with a constant string + if after_blank and line.startswith(' Flags from '): + logging.debug('Flags start (c): %s' % line) + return (start_line, 'c') + # java flags begin with a constant string + if line == 'where flags are': + logging.debug('Flags start (java): %s' % line) + start_line += 2 # skip "Standard flags:" + return (start_line, 'java') + + logging.debug('Desc: %s' % line) + self.desc.append(line) + after_blank = (line == '') + else: + logging.warn('Never found the start of the flags section for "%s"!' + % self.long_name) + return (-1, '') + + def ParsePythonFlags(self, start_line=0): + """Parse python/swig style flags.""" + modname = None # name of current module + modlist = [] + flag = None + for line_num in range(start_line, len(self.output)): # collect flags + line = self.output[line_num].rstrip() + if not line: # blank + continue + + mobj = self.module_py_re.match(line) + if mobj: # start of a new module + modname = mobj.group(1) + logging.debug('Module: %s' % line) + if flag: + modlist.append(flag) + self.module_list.append(modname) + self.modules.setdefault(modname, []) + modlist = self.modules[modname] + flag = None + continue + + mobj = self.flag_py_re.match(line) + if mobj: # start of a new flag + if flag: + modlist.append(flag) + logging.debug('Flag: %s' % line) + flag = Flag(mobj.group(1), mobj.group(2)) + continue + + if not flag: # continuation of a flag + logging.error('Flag info, but no current flag "%s"' % line) + mobj = self.flag_default_py_re.match(line) + if mobj: # (default: '...') + flag.default = mobj.group(1) + logging.debug('Fdef: %s' % line) + continue + mobj = self.flag_tips_py_re.match(line) + if mobj: # (tips) + flag.tips = mobj.group(1) + logging.debug('Ftip: %s' % line) + continue + if flag and flag.help: + flag.help += line # multiflags tack on an extra line + else: + logging.info('Extra: %s' % line) + if flag: + modlist.append(flag) + + def ParseCFlags(self, start_line=0): + """Parse C style flags.""" + modname = None # name of current module + modlist = [] + flag = None + for line_num in range(start_line, len(self.output)): # collect flags + line = self.output[line_num].rstrip() + if not line: # blank lines terminate flags + if flag: # save last flag + modlist.append(flag) + flag = None + continue + + mobj = self.module_c_re.match(line) + if mobj: # start of a new module + modname = mobj.group(1) + logging.debug('Module: %s' % line) + if flag: + modlist.append(flag) + self.module_list.append(modname) + self.modules.setdefault(modname, []) + modlist = self.modules[modname] + flag = None + continue + + mobj = self.flag_c_re.match(line) + if mobj: # start of a new flag + if flag: # save last flag + modlist.append(flag) + logging.debug('Flag: %s' % line) + flag = Flag(mobj.group(1), mobj.group(2)) + continue + + # append to flag help. type and default are part of the main text + if flag: + flag.help += ' ' + line.strip() + else: + logging.info('Extra: %s' % line) + if flag: + modlist.append(flag) + + def ParseJavaFlags(self, start_line=0): + """Parse Java style flags (com.google.common.flags).""" + # The java flags prints starts with a "Standard flags" "module" + # that doesn't follow the standard module syntax. + modname = 'Standard flags' # name of current module + self.module_list.append(modname) + self.modules.setdefault(modname, []) + modlist = self.modules[modname] + flag = None + + for line_num in range(start_line, len(self.output)): # collect flags + line = self.output[line_num].rstrip() + logging.vlog(2, 'Line: "%s"' % line) + if not line: # blank lines terminate module + if flag: # save last flag + modlist.append(flag) + flag = None + continue + + mobj = self.module_java_re.match(line) + if mobj: # start of a new module + modname = mobj.group(1) + logging.debug('Module: %s' % line) + if flag: + modlist.append(flag) + self.module_list.append(modname) + self.modules.setdefault(modname, []) + modlist = self.modules[modname] + flag = None + continue + + mobj = self.flag_java_re.match(line) + if mobj: # start of a new flag + if flag: # save last flag + modlist.append(flag) + logging.debug('Flag: %s' % line) + flag = Flag(mobj.group(1), mobj.group(2)) + continue + + # append to flag help. type and default are part of the main text + if flag: + flag.help += ' ' + line.strip() + else: + logging.info('Extra: %s' % line) + if flag: + modlist.append(flag) + + def Filter(self): + """Filter parsed data to create derived fields.""" + if not self.desc: + self.short_desc = '' + return + + for i in range(len(self.desc)): # replace full path with name + if self.desc[i].find(self.executable) >= 0: + self.desc[i] = self.desc[i].replace(self.executable, self.name) + + self.short_desc = self.desc[0] + word_list = self.short_desc.split(' ') + all_names = [ self.name, self.short_name, ] + # Since the short_desc is always listed right after the name, + # trim it from the short_desc + while word_list and (word_list[0] in all_names + or word_list[0].lower() in all_names): + del word_list[0] + self.short_desc = '' # signal need to reconstruct + if not self.short_desc and word_list: + self.short_desc = ' '.join(word_list) + + +class GenerateDoc(object): + """Base class to output flags information.""" + + def __init__(self, proginfo, directory='.'): + """Create base object. + Args: + proginfo A ProgramInfo object + directory Directory to write output into + """ + self.info = proginfo + self.dirname = directory + + def Output(self): + """Output all sections of the page.""" + self.Open() + self.Header() + self.Body() + self.Footer() + + def Open(self): raise NotImplementedError # define in subclass + def Header(self): raise NotImplementedError # define in subclass + def Body(self): raise NotImplementedError # define in subclass + def Footer(self): raise NotImplementedError # define in subclass + + +class GenerateMan(GenerateDoc): + """Output a man page.""" + + def __init__(self, proginfo, directory='.'): + """Create base object. + Args: + proginfo A ProgramInfo object + directory Directory to write output into + """ + GenerateDoc.__init__(self, proginfo, directory) + + def Open(self): + if self.dirname == '-': + logging.info('Writing to stdout') + self.fp = sys.stdout + else: + self.file_path = '%s.1' % os.path.join(self.dirname, self.info.name) + logging.info('Writing: %s' % self.file_path) + self.fp = open(self.file_path, 'w') + + def Header(self): + self.fp.write( + '.\\" DO NOT MODIFY THIS FILE! It was generated by gflags2man %s\n' + % _VERSION) + self.fp.write( + '.TH %s "1" "%s" "%s" "User Commands"\n' + % (self.info.name, time.strftime('%x', self.info.date), self.info.name)) + self.fp.write( + '.SH NAME\n%s \\- %s\n' % (self.info.name, self.info.short_desc)) + self.fp.write( + '.SH SYNOPSIS\n.B %s\n[\\fIFLAGS\\fR]...\n' % self.info.name) + + def Body(self): + self.fp.write( + '.SH DESCRIPTION\n.\\" Add any additional description here\n.PP\n') + for ln in self.info.desc: + self.fp.write('%s\n' % ln) + self.fp.write( + '.SH OPTIONS\n') + # This shows flags in the original order + for modname in self.info.module_list: + if modname.find(self.info.executable) >= 0: + mod = modname.replace(self.info.executable, self.info.name) + else: + mod = modname + self.fp.write('\n.P\n.I %s\n' % mod) + for flag in self.info.modules[modname]: + help_string = flag.help + if flag.default or flag.tips: + help_string += '\n.br\n' + if flag.default: + help_string += ' (default: \'%s\')' % flag.default + if flag.tips: + help_string += ' (%s)' % flag.tips + self.fp.write( + '.TP\n%s\n%s\n' % (flag.desc, help_string)) + + def Footer(self): + self.fp.write( + '.SH COPYRIGHT\nCopyright \(co %s Google.\n' + % time.strftime('%Y', self.info.date)) + self.fp.write('Gflags2man created this page from "%s %s" output.\n' + % (self.info.name, FLAGS.help_flag)) + self.fp.write('\nGflags2man was written by Dan Christian. ' + ' Note that the date on this' + ' page is the modification date of %s.\n' % self.info.name) + + +def main(argv): + argv = FLAGS(argv) # handles help as well + if len(argv) <= 1: + app.usage(shorthelp=1) + return 1 + + for arg in argv[1:]: + prog = ProgramInfo(arg) + if not prog.Run(): + continue + prog.Parse() + prog.Filter() + doc = GenerateMan(prog, FLAGS.dest_dir) + doc.Output() + return 0 + +if __name__ == '__main__': + app.run() diff --git a/third_party/python_gflags/gflags_validators.py b/third_party/python_gflags/gflags_validators.py new file mode 100755 index 0000000..d83058d5 --- /dev/null +++ b/third_party/python_gflags/gflags_validators.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python + +# Copyright (c) 2010, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Module to enforce different constraints on flags. + +A validator represents an invariant, enforced over a one or more flags. +See 'FLAGS VALIDATORS' in gflags.py's docstring for a usage manual. +""" + +__author__ = 'olexiy@google.com (Olexiy Oryeshko)' + + +class Error(Exception): + """Thrown If validator constraint is not satisfied.""" + + +class Validator(object): + """Base class for flags validators. + + Users should NOT overload these classes, and use gflags.Register... + methods instead. + """ + + # Used to assign each validator an unique insertion_index + validators_count = 0 + + def __init__(self, checker, message): + """Constructor to create all validators. + + Args: + checker: function to verify the constraint. + Input of this method varies, see SimpleValidator and + DictionaryValidator for a detailed description. + message: string, error message to be shown to the user + """ + self.checker = checker + self.message = message + Validator.validators_count += 1 + # Used to assert validators in the order they were registered (CL/18694236) + self.insertion_index = Validator.validators_count + + def Verify(self, flag_values): + """Verify that constraint is satisfied. + + flags library calls this method to verify Validator's constraint. + Args: + flag_values: gflags.FlagValues, containing all flags + Raises: + Error: if constraint is not satisfied. + """ + param = self._GetInputToCheckerFunction(flag_values) + if not self.checker(param): + raise Error(self.message) + + def GetFlagsNames(self): + """Return the names of the flags checked by this validator. + + Returns: + [string], names of the flags + """ + raise NotImplementedError('This method should be overloaded') + + def PrintFlagsWithValues(self, flag_values): + raise NotImplementedError('This method should be overloaded') + + def _GetInputToCheckerFunction(self, flag_values): + """Given flag values, construct the input to be given to checker. + + Args: + flag_values: gflags.FlagValues, containing all flags. + Returns: + Return type depends on the specific validator. + """ + raise NotImplementedError('This method should be overloaded') + + +class SimpleValidator(Validator): + """Validator behind RegisterValidator() method. + + Validates that a single flag passes its checker function. The checker function + takes the flag value and returns True (if value looks fine) or, if flag value + is not valid, either returns False or raises an Exception.""" + def __init__(self, flag_name, checker, message): + """Constructor. + + Args: + flag_name: string, name of the flag. + checker: function to verify the validator. + input - value of the corresponding flag (string, boolean, etc). + output - Boolean. Must return True if validator constraint is satisfied. + If constraint is not satisfied, it should either return False or + raise Error. + message: string, error message to be shown to the user if validator's + condition is not satisfied + """ + super(SimpleValidator, self).__init__(checker, message) + self.flag_name = flag_name + + def GetFlagsNames(self): + return [self.flag_name] + + def PrintFlagsWithValues(self, flag_values): + return 'flag --%s=%s' % (self.flag_name, flag_values[self.flag_name].value) + + def _GetInputToCheckerFunction(self, flag_values): + """Given flag values, construct the input to be given to checker. + + Args: + flag_values: gflags.FlagValues + Returns: + value of the corresponding flag. + """ + return flag_values[self.flag_name].value + + +class DictionaryValidator(Validator): + """Validator behind RegisterDictionaryValidator method. + + Validates that flag values pass their common checker function. The checker + function takes flag values and returns True (if values look fine) or, + if values are not valid, either returns False or raises an Exception. + """ + def __init__(self, flag_names, checker, message): + """Constructor. + + Args: + flag_names: [string], containing names of the flags used by checker. + checker: function to verify the validator. + input - dictionary, with keys() being flag_names, and value for each + key being the value of the corresponding flag (string, boolean, etc). + output - Boolean. Must return True if validator constraint is satisfied. + If constraint is not satisfied, it should either return False or + raise Error. + message: string, error message to be shown to the user if validator's + condition is not satisfied + """ + super(DictionaryValidator, self).__init__(checker, message) + self.flag_names = flag_names + + def _GetInputToCheckerFunction(self, flag_values): + """Given flag values, construct the input to be given to checker. + + Args: + flag_values: gflags.FlagValues + Returns: + dictionary, with keys() being self.lag_names, and value for each key + being the value of the corresponding flag (string, boolean, etc). + """ + return dict([key, flag_values[key].value] for key in self.flag_names) + + def PrintFlagsWithValues(self, flag_values): + prefix = 'flags ' + flags_with_values = [] + for key in self.flag_names: + flags_with_values.append('%s=%s' % (key, flag_values[key].value)) + return prefix + ', '.join(flags_with_values) + + def GetFlagsNames(self): + return self.flag_names diff --git a/third_party/python_gflags/setup.py b/third_party/python_gflags/setup.py new file mode 100755 index 0000000..573db2d --- /dev/null +++ b/third_party/python_gflags/setup.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# Copyright (c) 2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from setuptools import setup + +setup(name='python-gflags', + version='2.0', + description='Google Commandline Flags Module', + license='BSD', + author='Google Inc. and others', + author_email='google-gflags@googlegroups.com', + url='http://code.google.com/p/python-gflags', + py_modules=["gflags", "gflags_validators"], + data_files=[("bin", ["gflags2man.py"])], + include_package_data=True, + ) |