summaryrefslogtreecommitdiffstats
path: root/third_party/harfbuzz/contrib/tables/scripts-parse.py
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/harfbuzz/contrib/tables/scripts-parse.py')
-rw-r--r--third_party/harfbuzz/contrib/tables/scripts-parse.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/third_party/harfbuzz/contrib/tables/scripts-parse.py b/third_party/harfbuzz/contrib/tables/scripts-parse.py
new file mode 100644
index 0000000..23bac10
--- /dev/null
+++ b/third_party/harfbuzz/contrib/tables/scripts-parse.py
@@ -0,0 +1,75 @@
+import sys
+from unicode_parse_common import *
+
+# http://www.unicode.org/Public/5.1.0/ucd/Scripts.txt
+
+script_to_harfbuzz = {
+ # This is the list of HB_Script_* at the time of writing
+ 'Common': 'HB_Script_Common',
+ 'Greek': 'HB_Script_Greek',
+ 'Cyrillic': 'HB_Script_Cyrillic',
+ 'Armenian': 'HB_Script_Armenian',
+ 'Hebrew': 'HB_Script_Hebrew',
+ 'Arabic': 'HB_Script_Arabic',
+ 'Syriac': 'HB_Script_Syriac',
+ 'Thaana': 'HB_Script_Thaana',
+ 'Devanagari': 'HB_Script_Devanagari',
+ 'Bengali': 'HB_Script_Bengali',
+ 'Gurmukhi': 'HB_Script_Gurmukhi',
+ 'Gujarati': 'HB_Script_Gujarati',
+ 'Oriya': 'HB_Script_Oriya',
+ 'Tamil': 'HB_Script_Tamil',
+ 'Telugu': 'HB_Script_Telugu',
+ 'Kannada': 'HB_Script_Kannada',
+ 'Malayalam': 'HB_Script_Malayalam',
+ 'Sinhala': 'HB_Script_Sinhala',
+ 'Thai': 'HB_Script_Thai',
+ 'Lao': 'HB_Script_Lao',
+ 'Tibetan': 'HB_Script_Tibetan',
+ 'Myanmar': 'HB_Script_Myanmar',
+ 'Georgian': 'HB_Script_Georgian',
+ 'Hangul': 'HB_Script_Hangul',
+ 'Ogham': 'HB_Script_Ogham',
+ 'Runic': 'HB_Script_Runic',
+ 'Khmer': 'HB_Script_Khmer',
+ 'Inherited': 'HB_Script_Inherited',
+}
+
+class ScriptDict(object):
+ def __init__(self, base):
+ self.base = base
+
+ def __getitem__(self, key):
+ r = self.base.get(key, None)
+ if r is None:
+ return 'HB_Script_Common'
+ return r
+
+def main(infile, outfile):
+ ranges = unicode_file_parse(infile,
+ ScriptDict(script_to_harfbuzz),
+ 'HB_Script_Common')
+ ranges = sort_and_merge(ranges)
+
+ print >>outfile, '// Generated from Unicode script tables\n'
+ print >>outfile, '#ifndef SCRIPT_PROPERTIES_H_'
+ print >>outfile, '#define SCRIPT_PROPERTIES_H_\n'
+ print >>outfile, '#include <stdint.h>'
+ print >>outfile, '#include "harfbuzz-shaper.h"\n'
+ print >>outfile, 'struct script_property {'
+ print >>outfile, ' uint32_t range_start;'
+ print >>outfile, ' uint32_t range_end;'
+ print >>outfile, ' HB_Script script;'
+ print >>outfile, '};\n'
+ print >>outfile, 'static const struct script_property script_properties[] = {'
+ for (start, end, value) in ranges:
+ print >>outfile, ' {0x%x, 0x%x, %s},' % (start, end, value)
+ print >>outfile, '};\n'
+ print >>outfile, 'static const unsigned script_properties_count = %d;\n' % len(ranges)
+ print >>outfile, '#endif // SCRIPT_PROPERTIES_H_'
+
+if __name__ == '__main__':
+ if len(sys.argv) != 3:
+ print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
+ else:
+ main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))