You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
121 lines
4.2 KiB
121 lines
4.2 KiB
3 years ago
|
#!/usr/bin/env python3
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
import textwrap
|
||
|
|
||
|
|
||
|
self_path = os.path.dirname(os.path.realpath(__file__));
|
||
|
f = open(self_path + "/unicode/CaseFolding.txt", "r")
|
||
|
|
||
|
status_list = [ "C", "F" ]
|
||
|
|
||
|
folding_list = [ dict(), dict(), dict() ]
|
||
|
|
||
|
# Filter the foldings for "full" folding.
|
||
|
for line in f:
|
||
|
comment_off = line.find("#")
|
||
|
if comment_off >= 0:
|
||
|
line = line[:comment_off]
|
||
|
line = line.strip()
|
||
|
if not line:
|
||
|
continue
|
||
|
|
||
|
raw_codepoint, status, raw_mapping, ignored_tail = line.split(";", 3)
|
||
|
if not status.strip() in status_list:
|
||
|
continue
|
||
|
codepoint = int(raw_codepoint.strip(), 16)
|
||
|
mapping = [int(it, 16) for it in raw_mapping.strip().split(" ")]
|
||
|
mapping_len = len(mapping)
|
||
|
|
||
|
if mapping_len in range(1, 4):
|
||
|
folding_list[mapping_len-1][codepoint] = mapping
|
||
|
else:
|
||
|
assert(False)
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
# If we assume that (index0 ... index-1) makes a range (as defined below),
|
||
|
# check that the newly provided index is compatible with the range too; i.e.
|
||
|
# verify that the range can be extended without breaking its properties.
|
||
|
#
|
||
|
# Currently, we can handle ranges which:
|
||
|
#
|
||
|
# (1) either form consecutive sequence of codepoints and which map that range
|
||
|
# to other consecutive range of codepoints (of the same length);
|
||
|
#
|
||
|
# (2) or a consecutive sequence of codepoints with step 2 where each codepoint
|
||
|
# CP is mapped to the codepoint CP+1
|
||
|
# (e.g. 0x1234 -> 0x1235; 0x1236 -> 0x1237; 0x1238 -> 0x1239; ...).
|
||
|
#
|
||
|
# Note: When the codepoints in the range are mapped to multiple codepoints,
|
||
|
# only the 1st mapped codepoint is considered. All the other ones have to be
|
||
|
# shared by all the mappings covered by the range.
|
||
|
def is_range_compatible(folding, codepoint_list, index0, index):
|
||
|
N = index - index0
|
||
|
codepoint0 = codepoint_list[index0]
|
||
|
codepoint1 = codepoint_list[index0+1]
|
||
|
codepointN = codepoint_list[index]
|
||
|
mapping0 = folding[codepoint0]
|
||
|
mapping1 = folding[codepoint1]
|
||
|
mappingN = folding[codepointN]
|
||
|
|
||
|
# Check the range type (1):
|
||
|
if codepoint1 - codepoint0 == 1 and codepointN - codepoint0 == N \
|
||
|
and mapping1[0] - mapping0[0] == 1 and mapping1[1:] == mapping0[1:] \
|
||
|
and mappingN[0] - mapping0[0] == N and mappingN[1:] == mapping0[1:]:
|
||
|
return True
|
||
|
|
||
|
# Check the range type (2):
|
||
|
if codepoint1 - codepoint0 == 2 and codepointN - codepoint0 == 2 * N \
|
||
|
and mapping0[0] - codepoint0 == 1 \
|
||
|
and mapping1[0] - codepoint1 == 1 and mapping1[1:] == mapping0[1:] \
|
||
|
and mappingN[0] - codepointN == 1 and mappingN[1:] == mapping0[1:]:
|
||
|
return True
|
||
|
|
||
|
return False
|
||
|
|
||
|
|
||
|
def mapping_str(list, mapping):
|
||
|
return ",".join("0x{:04x}".format(x) for x in mapping)
|
||
|
|
||
|
for mapping_len in range(1, 4):
|
||
|
folding = folding_list[mapping_len-1]
|
||
|
codepoint_list = list(folding)
|
||
|
|
||
|
index0 = 0
|
||
|
count = len(folding)
|
||
|
|
||
|
records = list()
|
||
|
data_records = list()
|
||
|
|
||
|
while index0 < count:
|
||
|
index1 = index0 + 1
|
||
|
while index1 < count and is_range_compatible(folding, codepoint_list, index0, index1):
|
||
|
index1 += 1
|
||
|
|
||
|
if index1 - index0 > 2:
|
||
|
# Range of codepoints
|
||
|
records.append("R(0x{:04x},0x{:04x})".format(codepoint_list[index0], codepoint_list[index1-1]))
|
||
|
data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
|
||
|
data_records.append(mapping_str(data_records, folding[codepoint_list[index1-1]]))
|
||
|
index0 = index1
|
||
|
else:
|
||
|
# Single codepoint
|
||
|
records.append("S(0x{:04x})".format(codepoint_list[index0]))
|
||
|
data_records.append(mapping_str(data_records, folding[codepoint_list[index0]]))
|
||
|
index0 += 1
|
||
|
|
||
|
sys.stdout.write("static const unsigned FOLD_MAP_{}[] = {{\n".format(mapping_len))
|
||
|
sys.stdout.write("\n".join(textwrap.wrap(", ".join(records), 110,
|
||
|
initial_indent = " ", subsequent_indent=" ")))
|
||
|
sys.stdout.write("\n};\n")
|
||
|
|
||
|
sys.stdout.write("static const unsigned FOLD_MAP_{}_DATA[] = {{\n".format(mapping_len))
|
||
|
sys.stdout.write("\n".join(textwrap.wrap(", ".join(data_records), 110,
|
||
|
initial_indent = " ", subsequent_indent=" ")))
|
||
|
sys.stdout.write("\n};\n")
|
||
|
|
||
|
|
||
|
|