1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
|
# 2002 May 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library. The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
finish_test
return
}
proc do_bincmp_test {testname got expect} {
binary scan $expect \c* expectvals
binary scan $got \c* gotvals
do_test $testname [list set dummy $gotvals] $expectvals
}
# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
# to change the byte-order of the string.
proc swap_byte_order {utf16} {
binary scan $utf16 \c* ints
foreach {a b} $ints {
lappend ints2 $b
lappend ints2 $a
}
return [binary format \c* $ints2]
}
#
# Test that the SQLite routines for converting between UTF encodings
# produce the same results as their TCL counterparts.
#
# $testname is the prefix to be used for the test names.
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
#
# The test procedure is:
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
# SQLite routines produce the same results.
#
# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
# SQLite routines produce the same results.
#
# 3. Use the SQLite routines to convert the native machine order UTF-16
# representation back to the original UTF-8. Check that the result
# matches the original representation.
#
# 4. Add a byte-order mark to each of the UTF-16 representations and
# check that the SQLite routines can convert them back to UTF-8. For
# byte-order mark info, refer to section 3.10 of the unicode standard.
#
# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
# that SQLite can convert them both to native byte order UTF-16
# strings, sans BOM.
#
# Coverage:
#
# sqlite_utf8to16be (step 2)
# sqlite_utf8to16le (step 1)
# sqlite_utf16to8 (steps 3, 4)
# sqlite_utf16to16le (step 5)
# sqlite_utf16to16be (step 5)
#
proc test_conversion {testname str} {
# Step 1.
set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
set utf16le_tcl [encoding convertto unicode $str]
append utf16le_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)!="littleEndian" } {
set utf16le_tcl [swap_byte_order $utf16le_tcl]
}
do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
set utf16le $utf16le_tcl
# Step 2.
set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
set utf16be_tcl [encoding convertto unicode $str]
append utf16be_tcl "\x00\x00"
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16be_tcl [swap_byte_order $utf16be_tcl]
}
do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
set utf16be $utf16be_tcl
# Step 3.
if { $::tcl_platform(byteOrder)=="littleEndian" } {
set utf16 $utf16le
} else {
set utf16 $utf16be
}
set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
# Step 4 (little endian).
append utf16le_bom "\xFF\xFE" $utf16le
set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
# Step 4 (big endian).
append utf16be_bom "\xFE\xFF" $utf16be
set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
# Step 5 (little endian to little endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
# Step 5 (big endian to big endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
# Step 5 (big endian to little endian).
set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
# Step 5 (little endian to big endian).
set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
}
translate_selftest
test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_conversion enc-10 [string repeat "\uE000" 100]
proc test_collate {enc zLeft zRight} {
return [string compare $zLeft $zRight]
}
add_test_collate $::DB 0 0 1
do_test enc-11.1 {
execsql {
CREATE TABLE ab(a COLLATE test_collate, b);
INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
CREATE INDEX ab_i ON ab(a, b);
}
} {}
do_test enc-11.2 {
set cp200 "\u00C8"
execsql {
SELECT count(*) FROM ab WHERE a = $::cp200;
}
} {2}
finish_test
|