Make autofill regular expressions unicode again.

Instead of compiling the source file with UTF8 directly, run it through a python script that rewrites the UTF8 into C literals. BUG=95858 Review URL: http://codereview.chromium.org/7891020 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@101236 0039d316-1c4b-4281-b951-d872f2087c98
author: tony@chromium.org <tony@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-09-15 03:35:53 +0000
committer: tony@chromium.org <tony@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-09-15 03:35:53 +0000
commit: b91484c7f2215a9356e1f2ce075a79880dd4e8ad (patch)
tree: 854ebe43d4db3de2e96761212b548c1bcd822a2d /build/escape_unicode.py
parent: acac7e071088b69e1db406e1c2f133390dc90669 (diff)
download: chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.zip
chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.tar.gz
chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.tar.bz2
1 files changed, 55 insertions, 0 deletions
diff --git a/build/escape_unicode.py b/build/escape_unicode.py
new file mode 100755
index 0000000..5d4410e
--- /dev/null
+++ b/build/escape_unicode.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Convert any unicode characters found in the input file to C literals."""
+
+import codecs
+import optparse
+import os
+import sys
+
+def main(argv):
+  parser = optparse.OptionParser()
+  usage = 'Usage: %prog -o <output_dir> <input_file>'
+  parser.set_usage(usage)
+  parser.add_option('-o', dest='output_dir')
+
+  options, arglist = parser.parse_args(argv)
+
+  if not options.output_dir:
+    print "output_dir required"
+    return 1
+
+  if len(arglist) != 2:
+    print "input_file required"
+    return 1
+
+  in_filename = arglist[1]
+
+  if not in_filename.endswith('.utf8'):
+    print "input_file should end in .utf8"
+    return 1
+
+  out_filename = os.path.join(options.output_dir, os.path.basename(
+      os.path.splitext(in_filename)[0]))
+
+  WriteEscapedFile(in_filename, out_filename)
+
+
+def WriteEscapedFile(in_filename, out_filename):
+  input_data = codecs.open(in_filename, 'r', 'utf8').read()
+  with codecs.open(out_filename, 'w', 'ascii') as out_file:
+    for i, char in enumerate(input_data):
+      if ord(char) > 127:
+        out_file.write(repr(char.encode('utf8'))[1:-1])
+        if input_data[i + 1:i + 2] in '0123456789abcdefABCDEF':
+          out_file.write('""')
+      else:
+        out_file.write(char.encode('ascii'))
+
+
+if __name__ == '__main__':
+  exit(main(sys.argv))
author	tony@chromium.org <tony@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-09-15 03:35:53 +0000
committer	tony@chromium.org <tony@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-09-15 03:35:53 +0000
commit	b91484c7f2215a9356e1f2ce075a79880dd4e8ad (patch)
tree	854ebe43d4db3de2e96761212b548c1bcd822a2d /build/escape_unicode.py
parent	acac7e071088b69e1db406e1c2f133390dc90669 (diff)
download	chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.zip chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.tar.gz chromium_src-b91484c7f2215a9356e1f2ce075a79880dd4e8ad.tar.bz2