2003-10-06 22:12:11 +02:00
|
|
|
# uni_parse2.tcl --
|
|
|
|
#
|
|
|
|
# This program parses the UnicodeData file and generates the
|
|
|
|
# corresponding uni_norm.c file with compressed character
|
2005-05-07 03:21:39 +02:00
|
|
|
# data tables. The input to this program should be
|
|
|
|
# UnicodeData-3.2.0.txt and CompositionExclusions-3.2.0.txt files from:
|
2003-10-06 22:12:11 +02:00
|
|
|
# ftp://ftp.unicode.org/Public/UNIDATA/
|
|
|
|
#
|
|
|
|
# Copyright (c) 1998-1999 by Scriptics Corporation.
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Modified for ejabberd by Alexey Shchepin
|
|
|
|
#
|
|
|
|
# RCS: @(#) $Id$
|
|
|
|
|
|
|
|
|
|
|
|
namespace eval uni {
|
2005-05-07 03:21:39 +02:00
|
|
|
set cclass_shift 8
|
|
|
|
set decomp_shift 8
|
|
|
|
set comp_shift 8
|
2003-10-06 22:12:11 +02:00
|
|
|
set shift 5; # number of bits of data within a page
|
|
|
|
# This value can be adjusted to find the
|
|
|
|
# best split to minimize table size
|
|
|
|
|
|
|
|
variable pMap; # map from page to page index, each entry is
|
|
|
|
# an index into the pages table, indexed by
|
|
|
|
# page number
|
|
|
|
variable pages; # map from page index to page info, each
|
|
|
|
# entry is a list of indices into the groups
|
|
|
|
# table, the list is indexed by the offset
|
|
|
|
variable groups; # list of character info values, indexed by
|
|
|
|
# group number, initialized with the
|
|
|
|
# unassigned character group
|
|
|
|
|
|
|
|
variable categories {
|
|
|
|
Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
|
|
|
|
Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
|
|
|
|
}; # Ordered list of character categories, must
|
|
|
|
# match the enumeration in the header file.
|
|
|
|
|
|
|
|
variable titleCount 0; # Count of the number of title case
|
|
|
|
# characters. This value is used in the
|
|
|
|
# regular expression code to allocate enough
|
|
|
|
# space for the title case variants.
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::getValue {items index} {
|
|
|
|
variable categories
|
|
|
|
variable titleCount
|
|
|
|
|
|
|
|
# Extract character info
|
|
|
|
|
|
|
|
set category [lindex $items 2]
|
|
|
|
if {[scan [lindex $items 12] %4x toupper] == 1} {
|
|
|
|
set toupper [expr {$index - $toupper}]
|
|
|
|
} else {
|
|
|
|
set toupper {}
|
|
|
|
}
|
|
|
|
if {[scan [lindex $items 13] %4x tolower] == 1} {
|
|
|
|
set tolower [expr {$tolower - $index}]
|
|
|
|
} else {
|
|
|
|
set tolower {}
|
|
|
|
}
|
|
|
|
if {[scan [lindex $items 14] %4x totitle] == 1} {
|
|
|
|
set totitle [expr {$index - $totitle}]
|
|
|
|
} else {
|
|
|
|
set totitle {}
|
|
|
|
}
|
|
|
|
|
|
|
|
set categoryIndex [lsearch -exact $categories $category]
|
|
|
|
if {$categoryIndex < 0} {
|
|
|
|
puts "Unexpected character category: $index($category)"
|
|
|
|
set categoryIndex 0
|
|
|
|
} elseif {$category == "Lt"} {
|
|
|
|
incr titleCount
|
|
|
|
}
|
|
|
|
|
|
|
|
return "$categoryIndex,$toupper,$tolower,$totitle"
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::getGroup {value} {
|
|
|
|
variable groups
|
|
|
|
|
|
|
|
set gIndex [lsearch -exact $groups $value]
|
|
|
|
if {$gIndex == -1} {
|
|
|
|
set gIndex [llength $groups]
|
|
|
|
lappend groups $value
|
|
|
|
}
|
|
|
|
return $gIndex
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::addPage {info} {
|
|
|
|
variable pMap
|
|
|
|
variable pages
|
|
|
|
|
|
|
|
set pIndex [lsearch -exact $pages $info]
|
|
|
|
if {$pIndex == -1} {
|
|
|
|
set pIndex [llength $pages]
|
|
|
|
lappend pages $info
|
|
|
|
}
|
|
|
|
lappend pMap $pIndex
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::addPage {map_var pages_var info} {
|
|
|
|
variable $map_var
|
|
|
|
variable $pages_var
|
|
|
|
|
|
|
|
set pIndex [lsearch -exact [set $pages_var] $info]
|
|
|
|
if {$pIndex == -1} {
|
|
|
|
set pIndex [llength [set $pages_var]]
|
|
|
|
lappend $pages_var $info
|
|
|
|
}
|
|
|
|
lappend $map_var $pIndex
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::load_exclusions {data} {
|
|
|
|
variable exclusions
|
|
|
|
|
|
|
|
foreach line [split $data \n] {
|
|
|
|
if {$line == ""} continue
|
|
|
|
|
|
|
|
set items [split $line " "]
|
|
|
|
|
|
|
|
if {[lindex $items 0] == "#"} continue
|
|
|
|
|
|
|
|
scan [lindex $items 0] %x index
|
|
|
|
|
|
|
|
set exclusions($index) ""
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::load_tables {data} {
|
|
|
|
variable cclass_map
|
|
|
|
variable decomp_map
|
|
|
|
variable comp_map
|
|
|
|
variable comp_first
|
|
|
|
variable comp_second
|
|
|
|
variable exclusions
|
|
|
|
|
|
|
|
foreach line [split $data \n] {
|
|
|
|
if {$line == ""} continue
|
|
|
|
|
|
|
|
set items [split $line \;]
|
|
|
|
|
|
|
|
scan [lindex $items 0] %x index
|
|
|
|
set cclass [lindex $items 3]
|
|
|
|
set decomp [lindex $items 5]
|
|
|
|
|
|
|
|
set cclass_map($index) $cclass
|
|
|
|
#set decomp_map($index) $cclass
|
|
|
|
|
|
|
|
if {$decomp != ""} {
|
|
|
|
if {[string index [lindex $decomp 0] 0] == "<"} {
|
|
|
|
set decomp1 [lreplace $decomp 0 0]
|
|
|
|
set decomp {}
|
|
|
|
foreach ch $decomp1 {
|
|
|
|
scan $ch %x ch
|
|
|
|
lappend decomp $ch
|
|
|
|
}
|
|
|
|
set decomp_map($index) $decomp
|
|
|
|
} else {
|
|
|
|
switch -- [llength $decomp] {
|
|
|
|
1 {
|
|
|
|
scan $decomp %x ch
|
|
|
|
set decomp_map($index) $ch
|
|
|
|
}
|
|
|
|
2 {
|
|
|
|
scan $decomp "%x %x" ch1 ch2
|
|
|
|
set decomp [list $ch1 $ch2]
|
|
|
|
set decomp_map($index) $decomp
|
|
|
|
# hackish
|
|
|
|
if {(![info exists cclass_map($ch1)] || \
|
|
|
|
$cclass_map($ch1) == 0) && \
|
|
|
|
![info exists exclusions($index)]} {
|
|
|
|
if {[info exists comp_first($ch1)]} {
|
|
|
|
incr comp_first($ch1)
|
|
|
|
} else {
|
|
|
|
set comp_first($ch1) 1
|
|
|
|
}
|
|
|
|
if {[info exists comp_second($ch2)]} {
|
|
|
|
incr comp_second($ch2)
|
|
|
|
} else {
|
|
|
|
set comp_second($ch2) 1
|
|
|
|
}
|
|
|
|
set comp_map($decomp) $index
|
|
|
|
} else {
|
|
|
|
puts "Excluded $index"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
default {
|
|
|
|
puts "Bad canonical decomposition: $line"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#puts "[format 0x%0.4x $index]\t$cclass\t$decomp_map($index)"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#puts [array get comp_first]
|
|
|
|
#puts [array get comp_second]
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::buildTables {} {
|
|
|
|
variable cclass_shift
|
|
|
|
variable decomp_shift
|
|
|
|
variable comp_shift
|
|
|
|
|
|
|
|
variable cclass_map
|
|
|
|
variable cclass_pmap {}
|
|
|
|
variable cclass_pages {}
|
|
|
|
variable decomp_map
|
|
|
|
variable decomp_pmap {}
|
|
|
|
variable decomp_pages {}
|
|
|
|
variable decomp_list {}
|
|
|
|
variable comp_map
|
|
|
|
variable comp_pmap {}
|
|
|
|
variable comp_pages {}
|
|
|
|
variable comp_first
|
|
|
|
variable comp_second
|
|
|
|
variable comp_first_list {}
|
|
|
|
variable comp_second_list {}
|
|
|
|
variable comp_x_list {}
|
|
|
|
variable comp_y_list {}
|
|
|
|
variable comp_both_map {}
|
|
|
|
|
|
|
|
set cclass_info {}
|
|
|
|
set decomp_info {}
|
|
|
|
set comp_info {}
|
|
|
|
|
|
|
|
set cclass_mask [expr {(1 << $cclass_shift) - 1}]
|
|
|
|
set decomp_mask [expr {(1 << $decomp_shift) - 1}]
|
|
|
|
set comp_mask [expr {(1 << $comp_shift) - 1}]
|
|
|
|
|
2005-05-06 03:38:05 +02:00
|
|
|
foreach comp [array names comp_map] {
|
|
|
|
set ch1 [lindex $comp 0]
|
|
|
|
if {[info exists comp_first($ch1)] && $comp_first($ch1) > 0 && \
|
|
|
|
[info exists comp_second($ch1)] && $comp_second($ch1) > 0} {
|
|
|
|
if {[lsearch -exact $comp_x_list $ch1] < 0} {
|
|
|
|
set i [llength $comp_x_list]
|
|
|
|
lappend comp_x_list $ch1
|
|
|
|
set comp_info_map($ch1) $i
|
|
|
|
lappend comp_y_list $ch1
|
|
|
|
set comp_info_map($ch1) $i
|
|
|
|
puts "There should be no symbols which appears on"
|
|
|
|
puts "both first and second place in composition"
|
|
|
|
exit
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-10-06 22:12:11 +02:00
|
|
|
foreach comp [array names comp_map] {
|
|
|
|
set ch1 [lindex $comp 0]
|
|
|
|
set ch2 [lindex $comp 1]
|
|
|
|
|
2005-05-06 03:38:05 +02:00
|
|
|
if {$comp_first($ch1) == 1 && ![info exists comp_second($ch1)]} {
|
2003-10-06 22:12:11 +02:00
|
|
|
set i [llength $comp_first_list]
|
|
|
|
lappend comp_first_list [list $ch2 $comp_map($comp)]
|
|
|
|
set comp_info_map($ch1) [expr {$i | (1 << 16)}]
|
2005-05-06 03:38:05 +02:00
|
|
|
} elseif {$comp_second($ch2) == 1 && ![info exists comp_first($ch2)]} {
|
2003-10-06 22:12:11 +02:00
|
|
|
set i [llength $comp_second_list]
|
|
|
|
lappend comp_second_list [list $ch1 $comp_map($comp)]
|
2005-05-19 01:47:33 +02:00
|
|
|
set comp_info_map($ch2) [expr {$i | (1 << 16) | (1 << 17)}]
|
2003-10-06 22:12:11 +02:00
|
|
|
} else {
|
|
|
|
if {[lsearch -exact $comp_x_list $ch1] < 0} {
|
|
|
|
set i [llength $comp_x_list]
|
|
|
|
lappend comp_x_list $ch1
|
|
|
|
set comp_info_map($ch1) $i
|
|
|
|
}
|
|
|
|
if {[lsearch -exact $comp_y_list $ch2] < 0} {
|
|
|
|
set i [llength $comp_y_list]
|
|
|
|
lappend comp_y_list $ch2
|
2005-05-06 03:38:05 +02:00
|
|
|
set comp_info_map($ch2) [expr {$i | (1 << 17)}]
|
2003-10-06 22:12:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
set next 0
|
|
|
|
|
2005-05-07 03:21:39 +02:00
|
|
|
for {set i 0} {$i <= 0x10ffff} {incr i} {
|
2003-10-06 22:12:11 +02:00
|
|
|
#set gIndex [getGroup [getValue $i]]
|
|
|
|
|
|
|
|
set cclass_offset [expr {$i & $cclass_mask}]
|
|
|
|
|
|
|
|
if {[info exists cclass_map($i)]} {
|
|
|
|
set cclass $cclass_map($i)
|
|
|
|
} else {
|
|
|
|
set cclass 0
|
|
|
|
}
|
|
|
|
lappend cclass_info $cclass
|
|
|
|
|
|
|
|
if {$cclass_offset == $cclass_mask} {
|
|
|
|
addPage cclass_pmap cclass_pages $cclass_info
|
|
|
|
set cclass_info {}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
set decomp_offset [expr {$i & $decomp_mask}]
|
|
|
|
|
|
|
|
if {[info exists decomp_map($i)]} {
|
|
|
|
set decomp $decomp_map($i)
|
2005-05-06 03:38:05 +02:00
|
|
|
set b 1
|
|
|
|
while {$b} {
|
|
|
|
set b 0
|
|
|
|
for {set j 0} {$j < [llength $decomp]} {incr j} {
|
|
|
|
if {[info exists \
|
|
|
|
decomp_map([set ch1 [lindex $decomp $j]])]} {
|
|
|
|
#puts -$decomp
|
|
|
|
set decomp [eval [list lreplace $decomp $j $j] \
|
|
|
|
$decomp_map($ch1)]
|
|
|
|
#puts +$decomp
|
|
|
|
set b 1
|
|
|
|
}
|
|
|
|
}
|
2003-10-07 22:31:44 +02:00
|
|
|
}
|
|
|
|
|
2003-10-06 22:12:11 +02:00
|
|
|
if {[info exists decomp_used($decomp)]} {
|
|
|
|
lappend decomp_info $decomp_used($decomp)
|
|
|
|
} else {
|
|
|
|
set val [expr {([llength $decomp] << 16) + \
|
|
|
|
[llength $decomp_list]}]
|
|
|
|
#set val [expr {[llength $decomp_list]}]
|
|
|
|
lappend decomp_info $val
|
|
|
|
set decomp_used($decomp) $val
|
|
|
|
#puts "$val $decomp"
|
|
|
|
foreach d $decomp {
|
|
|
|
lappend decomp_list $d
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
lappend decomp_info -1
|
|
|
|
}
|
|
|
|
|
|
|
|
if {$decomp_offset == $decomp_mask} {
|
|
|
|
addPage decomp_pmap decomp_pages $decomp_info
|
|
|
|
set decomp_info {}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
set comp_offset [expr {$i & $comp_mask}]
|
|
|
|
|
|
|
|
if {[info exists comp_info_map($i)]} {
|
|
|
|
set comp $comp_info_map($i)
|
|
|
|
} else {
|
|
|
|
set comp -1
|
|
|
|
}
|
|
|
|
lappend comp_info $comp
|
|
|
|
|
|
|
|
if {$comp_offset == $comp_mask} {
|
|
|
|
addPage comp_pmap comp_pages $comp_info
|
|
|
|
set comp_info {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#puts [array get decomp_map]
|
|
|
|
#puts $decomp_list
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
proc uni::main {} {
|
|
|
|
global argc argv0 argv
|
|
|
|
variable cclass_shift
|
|
|
|
variable cclass_pmap
|
|
|
|
variable cclass_pages
|
|
|
|
variable decomp_shift
|
|
|
|
variable decomp_pmap
|
|
|
|
variable decomp_pages
|
|
|
|
variable decomp_list
|
|
|
|
variable comp_shift
|
|
|
|
variable comp_map
|
|
|
|
variable comp_pmap
|
|
|
|
variable comp_pages
|
|
|
|
variable comp_first_list
|
|
|
|
variable comp_second_list
|
|
|
|
variable comp_x_list
|
|
|
|
variable comp_y_list
|
|
|
|
variable pages
|
|
|
|
variable groups {}
|
|
|
|
variable titleCount
|
|
|
|
|
|
|
|
if {$argc != 3} {
|
|
|
|
puts stderr "\nusage: $argv0 <datafile> <exclusionsfile> <outdir>\n"
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
set f [open [lindex $argv 1] r]
|
|
|
|
set data [read $f]
|
|
|
|
close $f
|
|
|
|
|
|
|
|
load_exclusions $data
|
|
|
|
|
|
|
|
set f [open [lindex $argv 0] r]
|
|
|
|
set data [read $f]
|
|
|
|
close $f
|
|
|
|
|
|
|
|
load_tables $data
|
|
|
|
buildTables
|
|
|
|
#puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]"
|
|
|
|
#set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}]
|
|
|
|
#puts "shift = 6, space = $size"
|
|
|
|
#puts "title case count = $titleCount"
|
|
|
|
|
|
|
|
set f [open [file join [lindex $argv 2] uni_norm.c] w]
|
|
|
|
fconfigure $f -translation lf
|
|
|
|
puts $f "/*
|
|
|
|
* uni_norm.c --
|
|
|
|
*
|
|
|
|
* Declarations of Unicode character information tables. This file is
|
|
|
|
* automatically generated by the uni_parse2.tcl script. Do not
|
|
|
|
* modify this file by hand.
|
|
|
|
*
|
|
|
|
* Copyright (c) 1998 by Scriptics Corporation.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Modified for ejabberd by Alexey Shchepin
|
|
|
|
*
|
|
|
|
* RCS: @(#) \$Id\$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A 16-bit Unicode character is split into two parts in order to index
|
|
|
|
* into the following tables. The lower CCLASS_OFFSET_BITS comprise an offset
|
|
|
|
* into a page of characters. The upper bits comprise the page number.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define CCLASS_OFFSET_BITS $cclass_shift
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The pageMap is indexed by page number and returns an alternate page number
|
|
|
|
* that identifies a unique page of characters. Many Unicode characters map
|
|
|
|
* to the same alternate page number.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static unsigned char cclassPageMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $cclass_pmap] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
append line [lindex $cclass_pmap $i]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
/*
|
2003-10-09 20:09:05 +02:00
|
|
|
* The cclassGroupMap is indexed by combining the alternate page number with
|
|
|
|
* the page offset and returns a combining class number.
|
2003-10-06 22:12:11 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
static unsigned char cclassGroupMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set lasti [expr {[llength $cclass_pages] - 1}]
|
|
|
|
for {set i 0} {$i <= $lasti} {incr i} {
|
|
|
|
set page [lindex $cclass_pages $i]
|
|
|
|
set lastj [expr {[llength $page] - 1}]
|
|
|
|
for {set j 0} {$j <= $lastj} {incr j} {
|
|
|
|
append line [lindex $page $j]
|
|
|
|
if {$j != $lastj || $i != $lasti} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
2005-05-07 03:21:39 +02:00
|
|
|
#define GetUniCharCClass(ch) (cclassGroupMap\[(cclassPageMap\[(((int)(ch)) & 0x1fffff) >> CCLASS_OFFSET_BITS\] << CCLASS_OFFSET_BITS) | ((ch) & ((1 << CCLASS_OFFSET_BITS)-1))\])
|
2003-10-06 22:12:11 +02:00
|
|
|
|
|
|
|
|
|
|
|
#define DECOMP_OFFSET_BITS $decomp_shift
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The pageMap is indexed by page number and returns an alternate page number
|
|
|
|
* that identifies a unique page of characters. Many Unicode characters map
|
|
|
|
* to the same alternate page number.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static unsigned char decompPageMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $decomp_pmap] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
append line [lindex $decomp_pmap $i]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
/*
|
2003-10-09 20:09:05 +02:00
|
|
|
* The decompGroupMap is indexed by combining the alternate page number with
|
|
|
|
* the page offset and returns a group number that identifies a length and
|
|
|
|
* shift of decomposition sequence in decompList
|
2003-10-06 22:12:11 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
static int decompGroupMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set lasti [expr {[llength $decomp_pages] - 1}]
|
|
|
|
for {set i 0} {$i <= $lasti} {incr i} {
|
|
|
|
set page [lindex $decomp_pages $i]
|
|
|
|
set lastj [expr {[llength $page] - 1}]
|
|
|
|
for {set j 0} {$j <= $lastj} {incr j} {
|
|
|
|
append line [lindex $page $j]
|
|
|
|
if {$j != $lastj || $i != $lasti} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
/*
|
2003-10-09 20:09:05 +02:00
|
|
|
* List of decomposition sequences
|
2003-10-06 22:12:11 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
static int decompList\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $decomp_list] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
set val [lindex $decomp_list $i]
|
|
|
|
|
|
|
|
append line [format "%d" $val]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This macro extracts the information about a character from the
|
|
|
|
* Unicode character tables.
|
|
|
|
*/
|
|
|
|
|
2005-05-07 03:21:39 +02:00
|
|
|
#define GetUniCharDecompInfo(ch) (decompGroupMap\[(decompPageMap\[(((int)(ch)) & 0x1fffff) >> DECOMP_OFFSET_BITS\] << DECOMP_OFFSET_BITS) | ((ch) & ((1 << DECOMP_OFFSET_BITS)-1))\])
|
2003-10-06 22:12:11 +02:00
|
|
|
|
|
|
|
#define GetDecompShift(info) ((info) & 0xffff)
|
|
|
|
#define GetDecompLen(info) ((info) >> 16)
|
|
|
|
|
|
|
|
|
|
|
|
#define COMP_OFFSET_BITS $comp_shift
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The pageMap is indexed by page number and returns an alternate page number
|
|
|
|
* that identifies a unique page of characters. Many Unicode characters map
|
|
|
|
* to the same alternate page number.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static unsigned char compPageMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $comp_pmap] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
append line [lindex $comp_pmap $i]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The groupMap is indexed by combining the alternate page number with
|
|
|
|
* the page offset and returns a group number that identifies a unique
|
|
|
|
* set of character attributes.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int compGroupMap\[\] = {"
|
|
|
|
set line " "
|
|
|
|
set lasti [expr {[llength $comp_pages] - 1}]
|
|
|
|
for {set i 0} {$i <= $lasti} {incr i} {
|
|
|
|
set page [lindex $comp_pages $i]
|
|
|
|
set lastj [expr {[llength $page] - 1}]
|
|
|
|
for {set j 0} {$j <= $lastj} {incr j} {
|
|
|
|
append line [lindex $page $j]
|
|
|
|
if {$j != $lastj || $i != $lasti} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
/*
|
2003-10-09 20:09:05 +02:00
|
|
|
* Lists of compositions for characters that appears only in one composition
|
2003-10-06 22:12:11 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
static int compFirstList\[\]\[2\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $comp_first_list] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
set val [lindex $comp_first_list $i]
|
|
|
|
|
|
|
|
append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 60} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
static int compSecondList\[\]\[2\] = {"
|
|
|
|
set line " "
|
|
|
|
set last [expr {[llength $comp_second_list] - 1}]
|
|
|
|
for {set i 0} {$i <= $last} {incr i} {
|
|
|
|
set val [lindex $comp_second_list $i]
|
|
|
|
|
|
|
|
append line [format "{%d, %d}" [lindex $val 0] [lindex $val 1]]
|
|
|
|
if {$i != $last} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 60} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
puts $f "};
|
|
|
|
|
2003-10-09 20:09:05 +02:00
|
|
|
/*
|
|
|
|
* Compositions matrix
|
|
|
|
*/
|
|
|
|
|
2003-10-06 22:12:11 +02:00
|
|
|
static int compBothList\[[llength $comp_x_list]\]\[[llength $comp_y_list]\] = {"
|
|
|
|
set lastx [expr {[llength $comp_x_list] - 1}]
|
|
|
|
set lasty [expr {[llength $comp_y_list] - 1}]
|
|
|
|
for {set i 0} {$i <= $lastx} {incr i} {
|
|
|
|
puts $f " \{"
|
|
|
|
set line " "
|
|
|
|
for {set j 0} {$j <= $lasty} {incr j} {
|
|
|
|
set comp [list [lindex $comp_x_list $i] [lindex $comp_y_list $j]]
|
|
|
|
if {[info exists comp_map($comp)]} {
|
|
|
|
set val $comp_map($comp)
|
|
|
|
} else {
|
|
|
|
set val 0
|
|
|
|
}
|
|
|
|
|
|
|
|
append line [format "%d" $val]
|
|
|
|
if {$j != $lasty} {
|
|
|
|
append line ", "
|
|
|
|
}
|
|
|
|
if {[string length $line] > 70} {
|
|
|
|
puts $f $line
|
|
|
|
set line " "
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f $line
|
|
|
|
if {$j != $lasty} {
|
|
|
|
puts $f " \},"
|
|
|
|
} else {
|
|
|
|
puts $f " \}"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
puts $f "};
|
|
|
|
|
|
|
|
|
2005-05-07 03:21:39 +02:00
|
|
|
#define GetUniCharCompInfo(ch) (compGroupMap\[(compPageMap\[(((int)(ch)) & 0x1fffff) >> COMP_OFFSET_BITS\] << COMP_OFFSET_BITS) | ((ch) & ((1 << COMP_OFFSET_BITS)-1))\])
|
2003-10-06 22:12:11 +02:00
|
|
|
|
|
|
|
#define CompSingleMask (1 << 16)
|
|
|
|
#define CompMask ((1 << 16) - 1)
|
2005-05-06 03:38:05 +02:00
|
|
|
#define CompSecondMask (1 << 17)
|
2003-10-06 22:12:11 +02:00
|
|
|
"
|
|
|
|
|
|
|
|
close $f
|
|
|
|
}
|
|
|
|
|
|
|
|
uni::main
|
|
|
|
|
|
|
|
return
|