#!/usr/bin/python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Checks third-party licenses for the purposes of the Android WebView build.

The Android tree includes a snapshot of Chromium in order to power the system
WebView.  This tool checks that all code uses open-source licenses compatible
with Android, and that we meet the requirements of those licenses. It can also
be used to generate an Android NOTICE file for the third-party code.

It makes use of src/tools/licenses.py and the README.chromium files on which
it depends. It also makes use of a data file, third_party_files_whitelist.txt,
which whitelists indicidual files which contain third-party code but which
aren't in a third-party directory with a README.chromium file.
"""

import glob
import imp
import multiprocessing
import optparse
import os
import re
import sys
import textwrap


REPOSITORY_ROOT = os.path.abspath(os.path.join(
    os.path.dirname(__file__), '..', '..'))

# Import third_party/PRESUBMIT.py via imp to avoid importing a random
# PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
sys.dont_write_bytecode = True
third_party = \
  imp.load_source('PRESUBMIT', \
                  os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))

sys.path.append(os.path.join(REPOSITORY_ROOT, 'third_party'))
import jinja2
sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
import licenses

import copyright_scanner
import known_issues

class InputApi(object):
  def __init__(self):
    self.os_path = os.path
    self.os_walk = os.walk
    self.re = re
    self.ReadFile = _ReadFile
    self.change = InputApiChange()

class InputApiChange(object):
  def __init__(self):
    self.RepositoryRoot = lambda: REPOSITORY_ROOT


def GetIncompatibleDirectories():
  """Gets a list of third-party directories which use licenses incompatible
  with Android. This is used by the snapshot tool.
  Returns:
    A list of directories.
  """

  result = []
  for directory in _FindThirdPartyDirs():
    if directory in known_issues.KNOWN_ISSUES:
      result.append(directory)
      continue
    try:
      metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
                                   require_license_file=False,
                                   optional_keys=['License Android Compatible'])
    except licenses.LicenseError as e:
      print 'Got LicenseError while scanning ' + directory
      raise
    if metadata.get('License Android Compatible', 'no').upper() == 'YES':
      continue
    license = re.split(' [Ll]icenses?$', metadata['License'])[0]
    if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
      result.append(directory)
  return result

def GetUnknownIncompatibleDirectories():
  """Gets a list of third-party directories which use licenses incompatible
  with Android which are not present in the known_issues.py file.
  This is used by the AOSP bot.
  Returns:
    A list of directories.
  """
  incompatible_directories = frozenset(GetIncompatibleDirectories())
  known_incompatible = []
  input_api = InputApi()
  for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
    path = copyright_scanner.ForwardSlashesToOsPathSeps(input_api, path)
    for exclude in exclude_list:
      exclude = copyright_scanner.ForwardSlashesToOsPathSeps(input_api, exclude)
      if glob.has_magic(exclude):
        exclude_dirname = os.path.dirname(exclude)
        if glob.has_magic(exclude_dirname):
          print ('Exclude path %s contains an unexpected glob expression,' \
                 ' skipping.' % exclude)
        exclude = exclude_dirname
      known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
  known_incompatible = frozenset(known_incompatible)
  return incompatible_directories.difference(known_incompatible)


class ScanResult(object):
  Ok, Warnings, Errors = range(3)

# Needs to be a top-level function for multiprocessing
def _FindCopyrightViolations(files_to_scan_as_string):
  return copyright_scanner.FindCopyrightViolations(
    InputApi(), REPOSITORY_ROOT, files_to_scan_as_string)

def _ShardList(l, shard_len):
  return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]

def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
  """Checks that all files which are not in a listed third-party directory,
  and which do not use the standard Chromium license, are whitelisted.
  Args:
    excluded_dirs_list: The list of directories to exclude from scanning.
    whitelisted_files: The whitelist of files.
  Returns:
    ScanResult.Ok if all files with non-standard license headers are whitelisted
    and the whitelist contains no stale entries;
    ScanResult.Warnings if there are stale entries;
    ScanResult.Errors if new non-whitelisted entries found.
  """
  input_api = InputApi()
  files_to_scan = copyright_scanner.FindFiles(
    input_api, REPOSITORY_ROOT, ['.'], excluded_dirs_list)
  sharded_files_to_scan = _ShardList(files_to_scan, 2000)
  pool = multiprocessing.Pool()
  offending_files_chunks = pool.map_async(
      _FindCopyrightViolations, sharded_files_to_scan).get(999999)
  pool.close()
  pool.join()
  # Flatten out the result
  offending_files = \
    [item for sublist in offending_files_chunks for item in sublist]

  (unknown, missing, stale) = copyright_scanner.AnalyzeScanResults(
    input_api, whitelisted_files, offending_files)

  if unknown:
    print 'The following files contain a third-party license but are not in ' \
          'a listed third-party directory and are not whitelisted. You must ' \
          'add the following files to the whitelist.\n%s' % \
          '\n'.join(sorted(unknown))
  if missing:
    print 'The following files are whitelisted, but do not exist.\n%s' % \
        '\n'.join(sorted(missing))
  if stale:
    print 'The following files are whitelisted unnecessarily. You must ' \
          'remove the following files from the whitelist.\n%s' % \
          '\n'.join(sorted(stale))

  if unknown:
    return ScanResult.Errors
  elif stale or missing:
    return ScanResult.Warnings
  else:
    return ScanResult.Ok


def _ReadFile(full_path, mode='rU'):
  """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
  Args:
    full_path: The path of the file to read.
  Returns:
    The contents of the file as a string.
  """

  with open(full_path, mode) as f:
    return f.read()


def _ReadLocalFile(path, mode='rb'):
  """Reads a file from disk.
  Args:
    path: The path of the file to read, relative to the root of the repository.
  Returns:
    The contents of the file as a string.
  """

  return _ReadFile(os.path.join(REPOSITORY_ROOT, path), mode)


def _FindThirdPartyDirs():
  """Gets the list of third-party directories.
  Returns:
    The list of third-party directories.
  """

  # Please don't add here paths that have problems with license files,
  # as they will end up included in Android WebView snapshot.
  # Instead, add them into known_issues.py.
  prune_paths = [
    # Temporary until we figure out how not to check out quickoffice on the
    # Android license check bot. Tracked in crbug.com/350472.
    os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
    # Placeholder directory, no third-party code.
    os.path.join('third_party', 'adobe'),
    # Apache 2.0 license. See
    # https://code.google.com/p/chromium/issues/detail?id=140478.
    os.path.join('third_party', 'bidichecker'),
    # Isn't checked out on clients
    os.path.join('third_party', 'gles2_conform'),
    # The llvm-build doesn't exist for non-clang builder
    os.path.join('third_party', 'llvm-build'),
    # Binaries doesn't apply to android
    os.path.join('third_party', 'widevine'),
    # third_party directories in this tree aren't actually third party, but
    # provide a way to shadow experimental buildfiles into those directories.
    os.path.join('build', 'secondary'),
    # Not shipped, Chromium code
    os.path.join('tools', 'swarming_client'),
    # Not shipped, only relates to Chrome for Android, but not to WebView
    os.path.join('clank'),
  ]
  third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
  return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)


def _Scan():
  """Checks that license meta-data is present for all third-party code and
     that all non third-party code doesn't contain external copyrighted code.
  Returns:
    ScanResult.Ok if everything is in order;
    ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
      entries)
    ScanResult.Errors otherwise.
  """

  third_party_dirs = _FindThirdPartyDirs()

  # First, check designated third-party directories using src/tools/licenses.py.
  all_licenses_valid = True
  for path in sorted(third_party_dirs):
    try:
      licenses.ParseDir(path, REPOSITORY_ROOT)
    except licenses.LicenseError, e:
      if not (path in known_issues.KNOWN_ISSUES):
        print 'Got LicenseError "%s" while scanning %s' % (e, path)
        all_licenses_valid = False

  # Second, check for non-standard license text.
  whitelisted_files = copyright_scanner.LoadWhitelistedFilesList(InputApi())
  licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)

  return licenses_check if all_licenses_valid else ScanResult.Errors


class TemplateEntryGenerator(object):
  def __init__(self):
    self._generate_licenses_file_list_only = False
    self._toc_index = 0

  def SetGenerateLicensesFileListOnly(self, generate_licenses_file_list_only):
    self._generate_licenses_file_list_only = generate_licenses_file_list_only

  def _ReadFileGuessEncoding(self, name):
    if self._generate_licenses_file_list_only:
      return ''
    contents = ''
    with open(name, 'rb') as input_file:
      contents = input_file.read()
    try:
      return contents.decode('utf8')
    except UnicodeDecodeError:
      pass
    # If it's not UTF-8, it must be CP-1252. Fail otherwise.
    return contents.decode('cp1252')

  def MetadataToTemplateEntry(self, metadata):
    self._toc_index += 1
    return {
      'name': metadata['Name'],
      'url': metadata['URL'],
      'license_file': metadata['License File'],
      'license': self._ReadFileGuessEncoding(metadata['License File']),
      'toc_href': 'entry' + str(self._toc_index),
    }


def GenerateNoticeFile(generate_licenses_file_list_only=False):
  """Generates the contents of an Android NOTICE file for the third-party code.
  This is used by the snapshot tool.
  Returns:
    The contents of the NOTICE file.
  """

  generator = TemplateEntryGenerator()
  generator.SetGenerateLicensesFileListOnly(generate_licenses_file_list_only)
  # Start from Chromium's LICENSE file
  entries = [generator.MetadataToTemplateEntry({
    'Name': 'The Chromium Project',
    'URL': 'http://www.chromium.org',
    'License File': os.path.join(REPOSITORY_ROOT, 'LICENSE') })
  ]

  third_party_dirs = _FindThirdPartyDirs()
  # We provide attribution for all third-party directories.
  # TODO(mnaganov): Limit this to only code used by the WebView binary.
  for directory in sorted(third_party_dirs):
    try:
      metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
                                   require_license_file=False)
    except licenses.LicenseError:
      # Since this code is called during project files generation,
      # we don't want to break the it. But we assume that release
      # WebView apks are built using checkouts that pass
      # 'webview_licenses.py scan' check, thus they don't contain
      # projects with non-compatible licenses.
      continue
    license_file = metadata['License File']
    if license_file and license_file != licenses.NOT_SHIPPED:
      entries.append(generator.MetadataToTemplateEntry(metadata))

  if generate_licenses_file_list_only:
    return [entry['license_file'] for entry in entries]
  else:
    env = jinja2.Environment(
      loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
      extensions=['jinja2.ext.autoescape'])
    template = env.get_template('licenses_notice.tmpl')
    return template.render({ 'entries': entries }).encode('utf8')


def _ProcessIncompatibleResult(incompatible_directories):
  if incompatible_directories:
    print ("Incompatibly licensed directories found:\n" +
           "\n".join(sorted(incompatible_directories)))
    return ScanResult.Errors
  return ScanResult.Ok

def main():
  class FormatterWithNewLines(optparse.IndentedHelpFormatter):
    def format_description(self, description):
      paras = description.split('\n')
      formatted_paras = [textwrap.fill(para, self.width) for para in paras]
      return '\n'.join(formatted_paras) + '\n'

  parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
                                 usage='%prog [options]')
  parser.description = (__doc__ +
                        '\nCommands:\n'
                        '  scan Check licenses.\n'
                        '  notice_deps Generate the list of dependencies for '
                        'Android NOTICE file.\n'
                        '  notice [file] Generate Android NOTICE file on '
                        'stdout or into |file|.\n'
                        '  incompatible_directories Scan for incompatibly'
                        ' licensed directories.\n'
                        '  all_incompatible_directories Scan for incompatibly'
                        ' licensed directories (even those in'
                        ' known_issues.py).\n'
                        '  display_copyrights Display autorship on the files'
                        ' using names provided via stdin.\n')
  (_, args) = parser.parse_args()
  if len(args) < 1:
    parser.print_help()
    return ScanResult.Errors

  if args[0] == 'scan':
    scan_result = _Scan()
    if scan_result == ScanResult.Ok:
      print 'OK!'
    return scan_result
  elif args[0] == 'notice_deps':
    # 'set' is used to eliminate duplicate references to the same license file.
    print ' '.join(
      sorted(set(GenerateNoticeFile(generate_licenses_file_list_only=True))))
    return ScanResult.Ok
  elif args[0] == 'notice':
    notice_file_contents = GenerateNoticeFile()
    if len(args) == 1:
      print notice_file_contents
    else:
      with open(args[1], 'w') as output_file:
        output_file.write(notice_file_contents)
    return ScanResult.Ok
  elif args[0] == 'incompatible_directories':
    return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
  elif args[0] == 'all_incompatible_directories':
    return _ProcessIncompatibleResult(GetIncompatibleDirectories())
  elif args[0] == 'display_copyrights':
    files = sys.stdin.read().splitlines()
    for f, c in \
        zip(files, copyright_scanner.FindCopyrights(InputApi(), '.', files)):
      print f, '\t', ' / '.join(sorted(c))
    return ScanResult.Ok
  parser.print_help()
  return ScanResult.Errors

if __name__ == '__main__':
  sys.exit(main())