Index: head/tools/tools/locale/Makefile =================================================================== --- head/tools/tools/locale/Makefile (revision 340203) +++ head/tools/tools/locale/Makefile (revision 340204) @@ -1,191 +1,185 @@ # $FreeBSD$ # See https://wiki.freebsd.org/LocaleNewApproach # Taken from FreeBSD svn [base]/user/edwin/locale/cldr # # needs: # devel/p5-Tie-IxHash # # Modified by John Marino to suit DragonFly needs # .OBJDIR: . -.if !defined(CLDRDIR) -CLDRDIR!= grep ^cldr etc/unicode.conf | cut -f 2 -d " " +.if !defined(UNIDIR) +.error UNIDIR is not set .endif -.if !defined(UNIDATADIR) -UNIDATADIR!= grep ^unidata etc/unicode.conf | cut -f 2 -d " " -.endif -PASSON= CLDRDIR="${CLDRDIR}" UNIDATADIR="${UNIDATADIR}" +PASSON= UNIDIR="${UNIDIR}" ETCDIR= ${.CURDIR}/etc KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef TYPES?= ${KNOWN} LOCALE_DESTDIR?= /tmp/generated-locales/ COLLATION_SPECIAL?= \ cs_CZ ISO8859-2 \ da_DK ISO8859-1 \ da_DK ISO8859-15 \ hr_HR ISO8859-2 \ hu_HU ISO8859-2 \ nb_NO ISO8859-1 \ nb_NO ISO8859-15 \ sk_SK ISO8859-2 \ sr_Latn_RS ISO8859-2 \ sr_Cyrl_RS ISO8859-5 \ zh_Hans_CN GB2312 \ zh_Hans_CN eucCN \ zh_Hant_TW Big5 \ zh_Hans_CN GB18030 \ zh_Hans_CN GBK \ ja_JP eucJP \ nn_NO ISO8859-15 \ nn_NO ISO8859-1 .for area enc in ${COLLATION_SPECIAL} COLLATIONS_SPECIAL_ENV+= ${area}.${enc} .endfor PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" -.if defined(LC) -LC:= --lc=${LC} -.endif - all: - cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src + cp ${ETCDIR}/common.UTF-8.src ${UNIDIR}/posix/xx_Comm_C.UTF-8.src .for t in ${TYPES} . if ${KNOWN:M${t}} test -d ${t} || mkdir ${t} make build-${t} . endif .endfor @echo "" @find . -name *failed .for t in ${TYPES} install: install-${t} install-${t}: . if ${KNOWN:M${t}} rm -rf ${.CURDIR}/${t}.draft - rm -rf ${.CURDIR}/../../../share/${t} - mv ${.CURDIR}/${t} ${.CURDIR}/../../../share/${t} + rm -f ${.CURDIR}/../../../share/${t}/Makefile + rm -f ${.CURDIR}/../../../share/${t}/*.src + mv ${.CURDIR}/${t}/* ${.CURDIR}/../../../share/${t}/ . endif .endfor post-install: .for t in ${TYPES} . if ${KNOWN:M${t}} (cd ${.CURDIR}/../../../share/${t} && \ make && make install && make clean) . endif .endfor .for t in ${TYPES} gen-${t}: mkdir -p ${t} ${t}.draft perl -I tools tools/cldr2def.pl \ - --cldr=$$(realpath ${CLDRDIR}) \ - --unidata=$$(realpath ${UNIDATADIR}) \ + --unidir=$$(realpath ${UNIDIR}) \ --etc=$$(realpath ${ETCDIR}) \ - --type=${t} ${LC} + --type=${t} build-${t}: gen-${t} env ${PASSON} tools/finalize ${t} .endfor gen-ctypedef: transfer-rollup static-colldef: gen-colldef build-colldef: static-colldef static-colldef: .for area enc in ${COLLATION_SPECIAL} - awk -f tools/extract-colldef.awk ${CLDRDIR}/posix/${area}.${enc}.src > colldef.draft/${area}.${enc}.src + awk -f tools/extract-colldef.awk ${UNIDIR}/posix/${area}.${enc}.src > \ + colldef.draft/${area}.${enc}.src .endfor transfer-rollup: - cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src + cp ${ETCDIR}/common.UTF-8.src ${UNIDIR}/posix/xx_Comm_C.UTF-8.src rollup: perl -I tools tools/utf8-rollup.pl \ - --cldr=$$(realpath ${CLDRDIR}) \ + --unidir=$$(realpath ${UNIDIR}) \ --etc=$$(realpath ${ETCDIR}) clean: .for t in ${TYPES} rm -rf ${t} ${t}.draft .endfor BASE_LOCALES_OF_INTEREST?= \ af_ZA am_ET ar_AE ar_EG ar_JO ar_MA ar_QA ar_SA \ be_BY bg_BG ca_AD ca_ES ca_FR ca_IT \ cs_CZ da_DK de_AT de_CH de_DE el_GR en_AU en_CA \ en_GB en_HK en_IE en_NZ en_PH en_SG en_US en_ZA \ es_AR es_CR es_ES es_MX et_EE eu_ES fi_FI fr_BE \ fr_CA fr_CH fr_FR he_IL hi_IN hr_HR hu_HU hy_AM \ is_IS it_CH it_IT ja_JP ko_KR lt_LT lv_LV \ nb_NO nl_BE nl_NL nn_NO pl_PL pt_BR pt_PT ro_RO \ ru_RU se_FI se_NO sk_SK sl_SI sv_FI sv_SE tr_TR \ uk_UA \ kk_KZ mn_MN sr_Cyrl_RS sr_Latn_RS \ zh_Hans_CN zh_Hant_HK zh_Hant_TW \ bn_IN gu_IN or_IN ta_IN te_IN kn_IN ml_IN si_LK \ th_TH lo_LA bo_IN my_MM pa_Guru_IN ka_GE chr_US \ km_KH shi_Tfng_MA ii_CN vai_Vaii_LR vi_VN ENCODINGS= Big5 \ CP1251 \ CP866 \ CP949 \ eucCN \ eucJP \ eucKR \ GB18030 \ GB2312 \ GBK \ ISO8859-1 \ ISO8859-13 \ ISO8859-15 \ ISO8859-2 \ ISO8859-5 \ ISO8859-7 \ ISO8859-9 \ KOI8-R \ KOI8-U \ SJIS \ US-ASCII \ UTF-8 \ POSIX: -.if exists (${CLDRDIR}/tools/java/cldr.jar) - mkdir -p ${CLDRDIR}/posix +.if exists (${UNIDIR}/tools/java/cldr.jar) + mkdir -p ${UNIDIR}/posix . for area in ${BASE_LOCALES_OF_INTEREST} -. if !exists(${CLDRDIR}/posix/${area}.UTF-8.src) - java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \ +. if !exists(${UNIDIR}/posix/${area}.UTF-8.src) + java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ org.unicode.cldr.posix.GeneratePOSIX \ - -d ${CLDRDIR}/posix -m ${area} -c UTF-8 + -d ${UNIDIR}/posix -m ${area} -c UTF-8 . endif . endfor . for area encoding in ${COLLATION_SPECIAL} -. if !exists(${CLDRDIR}/posix/${area}.${encoding}.src) - java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \ +. if !exists(${UNIDIR}/posix/${area}.${encoding}.src) + java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ org.unicode.cldr.posix.GeneratePOSIX \ - -d ${CLDRDIR}/posix -m ${area} -c ${encoding} + -d ${UNIDIR}/posix -m ${area} -c ${encoding} . endif . endfor . for enc in ${ENCODINGS} -. if !exists(${CLDRDIR}/posix/${enc}.cm) - java -DCLDR_DIR=${CLDRDIR:Q} -jar ${CLDRDIR}/tools/java/cldr.jar \ +. if !exists(${UNIDIR}/posix/${enc}.cm) + java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ org.unicode.cldr.posix.GenerateCharmap \ - -d ${CLDRDIR}/posix -c ${enc} + -d ${UNIDIR}/posix -c ${enc} . endif . endfor .else @echo "Please install CLDR toolset for the desired release" - @echo "It should go at ${CLDRDIR}/tools" + @echo "It should go at ${UNIDIR}/tools" .endif clean-POSIX: - rm -f ${CLDRDIR}/posix/* + rm -f ${UNIDIR}/posix/* Index: head/tools/tools/locale/README =================================================================== --- head/tools/tools/locale/README (revision 340203) +++ head/tools/tools/locale/README (revision 340204) @@ -1,38 +1,31 @@ # $FreeBSD$ To generate the locales: Tools needed: java (openjdk >= 8) perl converters/p5-Text-Iconv devel/p5-Tie-IxHash textproc/p5-XML-Parser -Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the +1. Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the core.zip, keyboards.zip, and tools.zip. - -Extract: - mkdir -p ~/unicode/cldr/v33.0 - cd ~/unicode/cldr/v33.0 - unzip ~/core.zip ~/keyboards.zip ~/tools.zip - -Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest. - -Extract: - mkdir -p ~/unicode/UNIDATA/11.0.0 - cd ~/unicode/UNIDATA/11.0.0 +2. Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest. +3. Extract: + mkdir -p ~/unicode + cd ~/unicode + unzip ~/core.zip + unzip ~/keyboards.zip + unzip ~/tools.zip unzip ~/UCD.zip - -Either modify tools/tools/locales/etc/unicode.conf or export variables: - CLDRDIR=~/unicode/cldr/v33.0; export CLDRDIR - UNIDATADIR=~/unicode/UNIDATA/9.0.0; export UNIDATADIR - -Build the CLDR tools: - cd $CLDRDIR/tools/java +4. Export variable: + UNIDIR=~/unicode; export UNIDIR +5. Build the CLDR tools: + cd $UNIDIR/tools/java ant jar - -Run: +6. Build POSIX data files from CLDR data: make POSIX +7. Build and install new locale data: make make install Index: head/tools/tools/locale/etc/unicode.conf =================================================================== --- head/tools/tools/locale/etc/unicode.conf (revision 340203) +++ head/tools/tools/locale/etc/unicode.conf (nonexistent) @@ -1,4 +0,0 @@ -# $FreeBSD$ - -cldr ~/unicode/cldr/30.0.3 -unidata ~/unicode/UNIDATA/9.0.0 Property changes on: head/tools/tools/locale/etc/unicode.conf ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/tools/tools/locale/tools/UTF82encoding.pl =================================================================== --- head/tools/tools/locale/tools/UTF82encoding.pl (revision 340203) +++ head/tools/tools/locale/tools/UTF82encoding.pl (nonexistent) @@ -1,78 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Data::Dumper; - -if ($#ARGV != 1) { - print "Usage: $0 \n"; - exit; -} - -open(FIN, "$ARGV[0]/posix/UTF-8.cm"); -my @lines = ; -chomp(@lines); -close(FIN); - -my %cm = (); -foreach my $line (@lines) { - next if ($line =~ /^#/); - next if ($line eq ""); - next if ($line !~ /^; -chomp(@lines); -close(FIN); - -foreach my $line (@lines) { - if ($line =~ /^#/) { - print "$line\n"; - next; - } - - my @l = split(//, $line); - for (my $i = 0; $i <= $#l; $i++) { - my $hex = sprintf("%X", ord($l[$i])); - - if (( $l[$i] gt "\x20") - && ($l[$i] lt "a" || $l[$i] gt "z") - && ($l[$i] lt "A" || $l[$i] gt "Z") - && ($l[$i] lt "0" || $l[$i] gt "9") - && ($l[$i] lt "\x80")) { - print $l[$i]; - next; - } - - if (defined $cm{$hex}) { - print $cm{$hex}; - next; - } - - $hex = sprintf("%X%X", ord($l[$i]), ord($l[$i + 1])); - if (defined $cm{$hex}) { - $i += 1; - print $cm{$hex}; - next; - } - - $hex = sprintf("%X%X%X", - ord($l[$i]), ord($l[$i + 1]), ord($l[$i + 2 ])); - if (defined $cm{$hex}) { - $i += 2; - print $cm{$hex}; - next; - } - - print "\n--$hex--\n"; - } - print "\n"; - -} Property changes on: head/tools/tools/locale/tools/UTF82encoding.pl ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/tools/tools/locale/tools/changeoffset.pl =================================================================== --- head/tools/tools/locale/tools/changeoffset.pl (revision 340203) +++ head/tools/tools/locale/tools/changeoffset.pl (nonexistent) @@ -1,30 +0,0 @@ -#!/usr/bin/perl -w - -if ($#ARGV != 2) { - print STDERR "Usage: $0 \n"; - print STDERR "offset should be in hex and can be prefixed with a -.\n"; - exit; -} - -$fin = $ARGV[0]; -$fout = $ARGV[1]; -$offset = hex($ARGV[2]); - -open(FIN, "$fin.TXT") or die "Cannot open $fin.TXT for reading"; -open(FOUT, ">$fout.TXT"); - -foreach my $l () { - my @a = split(" ", $l); - - if ($a[0] =~ /^0x[0-9a-fA-F]+$/) { - my $c = length($a[0]); - my $h = hex($a[0]) + $offset; - - $l = sprintf("0x%*X%s", $c - 2, $h, substr($l, $c)); - } - - print FOUT $l; -} - -close(FOUT); -close(FIN); Property changes on: head/tools/tools/locale/tools/changeoffset.pl ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/tools/tools/locale/tools/whatis.pl =================================================================== --- head/tools/tools/locale/tools/whatis.pl (revision 340203) +++ head/tools/tools/locale/tools/whatis.pl (nonexistent) @@ -1,29 +0,0 @@ -#!/bin/sh - -UNIDATA=$(grep ^unidata etc/unicode.conf | cut -f 2 -d " ") -UTF8=$(grep ^cldr etc/unicode.conf | cut -f 2 -d " ")/UTF-8.cm -CHARMAPS=etc/charmaps - -if [ -z "$1" ]; then - echo "Usage: $0 " - exit -fi - -UCS=$* -UCS_=$(echo $* | sed -e 's/ /./g') -echo UCS: ${UCS} - -echo UTF-8.cm: -grep "${UCS_}" ${UTF8} | sed -e 's/ */ /g' - -echo UNIDATA: -grep "${UCS_}" ${UNIDATA} -L=$(grep "${UCS_}" ${UNIDATA}) - -echo UCC: -grep "${UCS_}" ${UNIDATA} | awk -F\; '{ print $1 }' - - -echo CHARMAPS: -grep ${UCS_} ${CHARMAPS}/* | sed -e "s|${CHARMAPS}/||g" -grep ${UCC} ${CHARMAPS}/* | sed -e "s|${CHARMAPS}/||g" Property changes on: head/tools/tools/locale/tools/whatis.pl ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/tools/tools/locale/tools/unicode2src.pl =================================================================== --- head/tools/tools/locale/tools/unicode2src.pl (revision 340203) +++ head/tools/tools/locale/tools/unicode2src.pl (nonexistent) @@ -1,144 +0,0 @@ -#!/usr/bin/perl -wC - -# -# $FreeBSD$ -# - -use strict; -use XML::Parser; -use Tie::IxHash; -use Data::Dumper; -use Getopt::Long; -use Digest::SHA qw(sha1_hex); - - -if ($#ARGV < 2) { - print "Usage: $0 --cldr= --unidata= --etc= --input= --output=\n"; - exit(1); -} - -my @filter = (); - -my $CLDRDIR = undef; -my $UNIDATADIR = undef; -my $ETCDIR = undef; -my $TYPE = undef; -my $INPUT = undef; -my $OUTPUT = undef; - -my $result = GetOptions ( - "cldr=s" => \$CLDRDIR, - "unidata=s" => \$UNIDATADIR, - "etc=s" => \$ETCDIR, - "type=s" => \$TYPE, - "input=s" => \$INPUT, - "output=s" => \$OUTPUT, - ); - -my %ucd = (); -my %utf8map = (); -my %utf8aliases = (); -get_unidata($UNIDATADIR); -get_utf8map("$CLDRDIR/posix/UTF-8.cm"); -convert($INPUT, $OUTPUT); - -############################ - -sub get_unidata { - my $directory = shift; - - open(FIN, "$directory/UnicodeData.txt") - or die("Cannot open $directory/UnicodeData.txt");; - my @lines = ; - chomp(@lines); - close(FIN); - - foreach my $l (@lines) { - my @a = split(/;/, $l); - - $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name - $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code - } -} - -sub get_utf8map { - my $file = shift; - - open(FIN, $file); - my @lines = ; - close(FIN); - chomp(@lines); - - my $prev_k = undef; - my $prev_v = ""; - my $incharmap = 0; - foreach my $l (@lines) { - $l =~ s/\r//; - next if ($l =~ /^\#/); - next if ($l eq ""); - - if ($l eq "CHARMAP") { - $incharmap = 1; - next; - } - - next if (!$incharmap); - last if ($l eq "END CHARMAP"); - - $l =~ /^<([^\s]+)>\s+(.*)/; - my $k = $1; - my $v = $2; - $k =~ s/_/ /g; # unicode char string - $v =~ s/\\x//g; # UTF-8 char code - $utf8map{$k} = $v; - - $utf8aliases{$k} = $prev_k if ($prev_v eq $v); - - $prev_v = $v; - $prev_k = $k; - } -} - -sub decode_cldr { - my $s = shift; - - my $v = $utf8map{$s}; - $v = $utf8aliases{$s} if (!defined $v); - die "Cannot convert $s" if (!defined $v); - - return pack("C", hex($v)) if (length($v) == 2); - return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) - if (length($v) == 4); - return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), - hex(substr($v, 4, 2))) if (length($v) == 6); - print STDERR "Cannot convert $s\n"; - return "length = " . length($v); -} - -sub convert { - my $IN = shift; - my $OUT = shift; - - open(FIN, "$IN"); - open(FOUT, ">$OUT"); - -# print Dumper(%utf8map); - - my $l; - while (defined ($l = )) { - chomp($l); - - if ($l =~ /^#/) { - print FOUT $l, "\n"; - next; - } - - while ($l =~ /^(.*?)<(.*?)>(.*)$/) { - $l = $1 . decode_cldr($2) . $3; - } - print FOUT $l, "\n"; - } - - close(FOUT); - close(FIN); -} Property changes on: head/tools/tools/locale/tools/unicode2src.pl ___________________________________________________________________ Deleted: svn:eol-style ## -1 +0,0 ## -native \ No newline at end of property Deleted: svn:executable ## -1 +0,0 ## -* \ No newline at end of property Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Deleted: svn:mime-type ## -1 +0,0 ## -text/plain \ No newline at end of property Index: head/tools/tools/locale/tools/cldr2def.pl =================================================================== --- head/tools/tools/locale/tools/cldr2def.pl (revision 340203) +++ head/tools/tools/locale/tools/cldr2def.pl (revision 340204) @@ -1,1041 +1,1017 @@ #!/usr/local/bin/perl -wC # $FreeBSD$ use strict; use File::Copy; use XML::Parser; use Tie::IxHash; use Text::Iconv; -use Data::Dumper; +#use Data::Dumper; use Getopt::Long; use Digest::SHA qw(sha1_hex); require "charmaps.pm"; if ($#ARGV < 2) { - print "Usage: $0 --cldr= --unidata= --etc= --type= [--lc=]\n"; + print "Usage: $0 --unidir= --etc= --type=\n"; exit(1); } my $DEFENCODING = "UTF-8"; -my @filter = (); -my $CLDRDIR = undef; -my $UNIDATADIR = undef; +my $UNIDIR = undef; my $ETCDIR = undef; my $TYPE = undef; -my $doonly = undef; my $result = GetOptions ( - "cldr=s" => \$CLDRDIR, - "unidata=s" => \$UNIDATADIR, + "unidir=s" => \$UNIDIR, "etc=s" => \$ETCDIR, "type=s" => \$TYPE, - "lc=s" => \$doonly ); my %convertors = (); my %ucd = (); my %values = (); my %hashtable = (); my %languages = (); my %translations = (); my %encodings = (); my %alternativemonths = (); get_languages(); my %utf8map = (); my %utf8aliases = (); -get_unidata($UNIDATADIR); -get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm"); +get_unidata($UNIDIR); +get_utf8map("$UNIDIR/posix/$DEFENCODING.cm"); get_encodings("$ETCDIR/charmaps"); my %keys = (); tie(%keys, "Tie::IxHash"); tie(%hashtable, "Tie::IxHash"); my %FILESNAMES = ( "monetdef" => "LC_MONETARY", "timedef" => "LC_TIME", "msgdef" => "LC_MESSAGES", "numericdef" => "LC_NUMERIC", "colldef" => "LC_COLLATE", "ctypedef" => "LC_CTYPE" ); my %callback = ( mdorder => \&callback_mdorder, altmon => \&callback_altmon, cformat => \&callback_cformat, dformat => \&callback_dformat, dtformat => \&callback_dtformat, cbabmon => \&callback_abmon, cbampm => \&callback_ampm, data => undef, ); my %DESC = ( # numericdef "decimal_point" => "decimal_point", "thousands_sep" => "thousands_sep", "grouping" => "grouping", # monetdef "int_curr_symbol" => "int_curr_symbol (last character always " . "SPACE)", "currency_symbol" => "currency_symbol", "mon_decimal_point" => "mon_decimal_point", "mon_thousands_sep" => "mon_thousands_sep", "mon_grouping" => "mon_grouping", "positive_sign" => "positive_sign", "negative_sign" => "negative_sign", "int_frac_digits" => "int_frac_digits", "frac_digits" => "frac_digits", "p_cs_precedes" => "p_cs_precedes", "p_sep_by_space" => "p_sep_by_space", "n_cs_precedes" => "n_cs_precedes", "n_sep_by_space" => "n_sep_by_space", "p_sign_posn" => "p_sign_posn", "n_sign_posn" => "n_sign_posn", # msgdef "yesexpr" => "yesexpr", "noexpr" => "noexpr", "yesstr" => "yesstr", "nostr" => "nostr", # timedef "abmon" => "Short month names", "mon" => "Long month names (as in a date)", "abday" => "Short weekday names", "day" => "Long weekday names", "t_fmt" => "X_fmt", "d_fmt" => "x_fmt", "c_fmt" => "c_fmt", "am_pm" => "AM/PM", "d_t_fmt" => "date_fmt", "altmon" => "Long month names (without case ending)", "md_order" => "md_order", "t_fmt_ampm" => "ampm_fmt", ); if ($TYPE eq "colldef") { transform_collation(); make_makefile(); } if ($TYPE eq "ctypedef") { transform_ctypes(); make_makefile(); } if ($TYPE eq "numericdef") { %keys = ( "decimal_point" => "s", "thousands_sep" => "s", "grouping" => "ai", ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "monetdef") { %keys = ( "int_curr_symbol" => "s", "currency_symbol" => "s", "mon_decimal_point" => "s", "mon_thousands_sep" => "s", "mon_grouping" => "ai", "positive_sign" => "s", "negative_sign" => "s", "int_frac_digits" => "i", "frac_digits" => "i", "p_cs_precedes" => "i", "p_sep_by_space" => "i", "n_cs_precedes" => "i", "n_sep_by_space" => "i", "p_sign_posn" => "i", "n_sign_posn" => "i" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "msgdef") { %keys = ( "yesexpr" => "s", "noexpr" => "s", "yesstr" => "s", "nostr" => "s" ); get_fields(); print_fields(); make_makefile(); } if ($TYPE eq "timedef") { %keys = ( "abmon" => " "as", "abday" => "as", "day" => "as", "t_fmt" => "s", "d_fmt" => " " " " " " "s", ); get_fields(); print_fields(); make_makefile(); } sub callback_ampm { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; my $enc = $callback{data}{e}; if ($nl eq 'ru_RU') { if ($enc eq 'UTF-8') { $s = 'дп;пп'; } else { my $converter = Text::Iconv->new("utf-8", "$enc"); $s = $converter->convert("дп;пп"); } } return $s; } sub callback_cformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ko_KR') { $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/ %Z//; $s =~ s/ %z//; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_dformat { my $s = shift; $s =~ s/(%m(|[-.]))%e/$1%d/; $s =~ s/%e((|[-.])%m)/%d$1/; return $s; }; sub callback_dtformat { my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP') { $s =~ s/(> )(%H)/$1%A $2/; } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') { if ($nl ne 'ko_KR') { $s =~ s/%m/%_m/; } $s =~ s/(> )(%p)/$1%A $2/; } $s =~ s/\.,/\./; $s =~ s/^"%e\./%A %e/; $s =~ s/^"(%B %e, )/"%A, $1/; $s =~ s/^"(%e %B )/"%A $1/; return $s; }; sub callback_mdorder { my $s = shift; return undef if (!defined $s); $s =~ s/[^dem]//g; $s =~ s/e/d/g; return $s; }; sub callback_altmon { # if the language/country is known in %alternative months then # return that, otherwise repeat mon my $s = shift; if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) { my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}}); my @cleaned; foreach (@altnames) { $_ =~ s/^\s+//; $_ =~ s/\s+$//; push @cleaned, $_; } return join(";",@cleaned); } return $s; } sub callback_abmon { # for specified CJK locales, pad result with a space to enable # columns to line up (style established in FreeBSD in 2001) my $s = shift; my $nl = $callback{data}{l} . "_" . $callback{data}{c}; if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_HK' || $nl eq 'zh_TW') { my @monthnames = split(";", $s); my @cleaned; foreach (@monthnames) { if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ || ($_ =~ /^"/ && $_ !~ /^"(||)/)) { $_ =~ s/^"/"/; } push @cleaned, $_; } return join(";",@cleaned); } return $s; } ############################ sub get_unidata { my $directory = shift; open(FIN, "$directory/UnicodeData.txt") or die("Cannot open $directory/UnicodeData.txt");; my @lines = ; chomp(@lines); close(FIN); foreach my $l (@lines) { my @a = split(/;/, $l); $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code } } sub get_utf8map { my $file = shift; open(FIN, $file); my @lines = ; close(FIN); chomp(@lines); my $prev_k = undef; my $prev_v = ""; my $incharmap = 0; foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); if ($l eq "CHARMAP") { $incharmap = 1; next; } next if (!$incharmap); last if ($l eq "END CHARMAP"); $l =~ /^<([^\s]+)>\s+(.*)/; my $k = $1; my $v = $2; $k =~ s/_/ /g; # unicode char string $v =~ s/\\x//g; # UTF-8 char code $utf8map{$k} = $v; $utf8aliases{$k} = $prev_k if ($prev_v eq $v); $prev_v = $v; $prev_k = $k; } } sub get_encodings { my $dir = shift; foreach my $e (sort(keys(%encodings))) { if (!open(FIN, "$dir/$e.TXT")) { print "Cannot open charmap for $e\n"; next; } $encodings{$e} = 1; my @lines = ; close(FIN); chomp(@lines); foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); my @a = split(" ", $l); next if ($#a < 1); $a[0] =~ s/^0[xX]//; # local char code $a[1] =~ s/^0[xX]//; # unicode char code $convertors{$e}{uc($a[1])} = uc($a[0]); } } } sub get_languages { my %data = get_xmldata($ETCDIR); %languages = %{$data{L}}; %translations = %{$data{T}}; %alternativemonths = %{$data{AM}}; %encodings = %{$data{E}}; - - return if (!defined $doonly); - - my @a = split(/_/, $doonly); - if ($#a == 1) { - $filter[0] = $a[0]; - $filter[1] = "x"; - $filter[2] = $a[1]; - } elsif ($#a == 2) { - $filter[0] = $a[0]; - $filter[1] = $a[1]; - $filter[2] = $a[2]; - } - - print Dumper(@filter); - return; } sub transform_ctypes { # Add the C.UTF-8 $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { - next if ($#filter == 2 && ($filter[0] ne $l - || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c if ($c ne "x"); my $actfile = $file; - my $filename = "$CLDRDIR/posix/xx_Comm_C.UTF-8.src"; + my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } open(FIN, "$filename"); print "Reading from $filename for ${l}_${f}_${c}\n"; $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read my @lines; my $shex; my $uhex; while () { push @lines, $_; } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT @lines; close(FOUT); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { next if ($enc eq $DEFENCODING); - $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"; + $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; } @lines = (); open(FIN, "$filename"); while () { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_CTYPE/../^END LC_CTYPE/) { push @lines, $_; } } close(FIN); $uhex = sha1_hex(join("\n", @lines) . $enc); $languages{$l}{$f}{data}{$c}{$enc} = $uhex; $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$enc.src"); print FOUT <) { if ((/^comment_char\s/) || (/^escape_char\s/)){ push @lines, $_; } if (/^LC_COLLATE/../^END LC_COLLATE/) { $_ =~ s/[ ]+/ /g; push @lines, $_; } } close(FIN); $shex = sha1_hex(join("\n", @lines)); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex; $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1; open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src"); print FOUT <; chomp(@lines); close(FIN); my $continue = 0; foreach my $k (keys(%keys)) { foreach my $line (@lines) { $line =~ s/\r//; next if (!$continue && $line !~ /^$k\s/); if ($continue) { $line =~ s/^\s+//; } else { $line =~ s/^$k\s+//; } $values{$l}{$f}{$c}{$k} = "" if (!defined $values{$l}{$f}{$c}{$k}); $continue = ($line =~ /\/$/); $line =~ s/\/$// if ($continue); while ($line =~ /_/) { $line =~ s/\<([^>_]+)_([^>]+)\>/<$1 $2>/; } die "_ in data - $line" if ($line =~ /_/); $values{$l}{$f}{$c}{$k} .= $line; last if (!$continue); } } } } } } sub decodecldr { my $e = shift; my $s = shift; my $v = undef; if ($e eq "UTF-8") { # # Conversion to UTF-8 can be done from the Unicode name to # the UTF-8 character code. # $v = $utf8map{$s}; die "Cannot convert $s in $e (charmap)" if (!defined $v); } else { # # Conversion to these encodings can be done from the Unicode # name to Unicode code to the encodings code. # my $ucc = undef; $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s}); $ucc = $ucd{name2code}{$utf8aliases{$s}} if (!defined $ucc && $utf8aliases{$s} && defined $ucd{name2code}{$utf8aliases{$s}}); if (!defined $ucc) { if (defined $translations{$e}{$s}{hex}) { $v = $translations{$e}{$s}{hex}; $ucc = 0; } elsif (defined $translations{$e}{$s}{ucc}) { $ucc = $translations{$e}{$s}{ucc}; } } die "Cannot convert $s in $e (ucd string)" if (!defined $ucc); $v = $convertors{$e}{$ucc} if (!defined $v); $v = $translations{$e}{$s}{hex} if (!defined $v && defined $translations{$e}{$s}{hex}); if (!defined $v && defined $translations{$e}{$s}{unicode}) { my $ucn = $translations{$e}{$s}{unicode}; $ucc = $ucd{name2code}{$ucn} if (defined $ucd{name2code}{$ucn}); $ucc = $ucd{name2code}{$utf8aliases{$ucn}} if (!defined $ucc && defined $ucd{name2code}{$utf8aliases{$ucn}}); $v = $convertors{$e}{$ucc}; } die "Cannot convert $s in $e (charmap)" if (!defined $v); } return pack("C", hex($v)) if (length($v) == 2); return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2))) if (length($v) == 4); return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)), hex(substr($v, 4, 2))) if (length($v) == 6); print STDERR "Cannot convert $e $s\n"; return "length = " . length($v); } sub translate { my $enc = shift; my $v = shift; return $translations{$enc}{$v} if (defined $translations{$enc}{$v}); return undef; } sub print_fields { foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { - next if ($#filter == 2 && ($filter[0] ne $l - || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c; print "Writing to $file in $enc\n"; if ($enc ne $DEFENCODING && !defined $convertors{$enc}) { print "Failed! Cannot convert to $enc.\n"; next; }; open(FOUT, ">$TYPE.draft/$file.$enc.new"); my $okay = 1; my $output = ""; print FOUT </) { $k = substr($g, 1); $g = $keys{$k}; } # Callback function if ($g =~ /^\(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $p1 . $rv . $p3; } $output .= "$v\n"; next; } if ($g eq "as") { foreach my $v (split(/;/, $v)) { $v =~ s/^"//; $v =~ s/"$//; my $cm = ""; while ($v =~ /^(.*?)<(.*?)>(.*)/) { my $p1 = $1; $cm = $2; my $p3 = $3; my $rv = decodecldr($enc, $cm); # $rv = translate($enc, # $cm) # if (!defined $rv); if (!defined $rv) { print STDERR "Could not convert $k ($cm) from $DEFENCODING to $enc\n"; $okay = 0; next; } $v = $1 . $rv . $3; } $output .= "$v\n"; } next; } die("$k is '$g'"); } $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output); $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1; print FOUT "$output# EOF\n"; close(FOUT); if ($okay) { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.src"); } else { rename("$TYPE.draft/$file.$enc.new", "$TYPE.draft/$file.$enc.failed"); } } } } } } sub make_makefile { - return if ($#filter > -1); print "Creating Makefile for $TYPE\n"; my $SRCOUT; my $SRCOUT2; my $SRCOUT3 = ""; my $SRCOUT4 = ""; my $MAPLOC; if ($TYPE eq "colldef") { $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.IMPSRC} \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " . "\${.OBJDIR}/\${.IMPSRC:T:R}"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT2 = "LC_COLLATE"; $SRCOUT3 = "" . ".for f t in \${LOCALES_MAPPED}\n" . "FILES+=\t\$t.LC_COLLATE\n" . "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" . "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " . "-i \${.ALLSRC} \\\n" . "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" . "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" . ".endfor\n\n"; $SRCOUT4 = "## LOCALES_MAPPED\n"; } elsif ($TYPE eq "ctypedef") { $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " . "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " . "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " . " || true"; $SRCOUT2 = "LC_CTYPE"; $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" . "locale/etc/final-maps\n"; $SRCOUT3 = "## SYMPAIRS\n\n" . ".for s t in \${SYMPAIRS}\n" . "\${t:S/src\$/LC_CTYPE/}: " . "\$s\n" . "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " . "-w \${MAPLOC}/widths.txt \\\n" . "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " . "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " . " || true\n" . ".endfor\n\n"; } else { $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}"; $SRCOUT2 = "out"; $MAPLOC = ""; } open(FOUT, ">$TYPE.draft/Makefile"); print FOUT < +EOF + } + + print FOUT < 0) { my $link = shift(@files); $link =~ s/_x_x//; # special case for C $link =~ s/_x_/_/; # strip family if none there foreach my $file (@files) { my @a = split(/_/, $file); my @b = split(/\./, $a[-1]); $file =~ s/_x_/_/; print FOUT "SAME+=\t\t$link $file\n"; undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]}); } } } foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { - next if ($#filter == 2 && ($filter[0] ne $l - || $filter[1] ne $f || $filter[2] ne $c)); next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING} && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") { print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") . "${c} - not read\n"; next; } foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l; $file .= "_" . $f if ($f ne "x"); $file .= "_" . $c if ($c ne "x"); next if (!defined $languages{$l}{$f}{data}{$c}{$e}); print FOUT "LOCALES+=\t$file.$e\n"; } if (defined $languages{$l}{$f}{nc_link}) { foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n"; } } if (defined $languages{$l}{$f}{e_link}) { foreach my $el (split(" ", $languages{$l}{$f}{e_link})) { my @a = split(/:/, $el); my $file = $l . "_"; $file .= $f . "_" if ($f ne "x"); $file .= $c; print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n"; } } } } } print FOUT < EOF close(FOUT); } Index: head/tools/tools/locale/tools/finalize =================================================================== --- head/tools/tools/locale/tools/finalize (revision 340203) +++ head/tools/tools/locale/tools/finalize (revision 340204) @@ -1,182 +1,182 @@ #!/bin/sh # # $FreeBSD$ # # This is a helper script for the Makefile in the parent directory. # When the localization definitions are generated in the draft area, # this script will copy base ones that others symlink to, and rearrange # the generate makefile to pull the LOCALES first. # set -e usage () { echo "finalize ' to package standard localization" echo "type must be one of { monetdef, msgdef, numericdef, timedef, colldef, ctypedef }" exit 1 } [ $# -ne 1 ] && usage [ $1 = "monetdef" -o $1 = "msgdef" -o $1 = "colldef" -o \ $1 = "numericdef" -o $1 = "timedef" -o $1 = "ctypedef" ] || usage self=$(realpath $0) base=$(dirname ${self}) old=${base}/../${1}.draft new=${base}/../${1} TEMP=/tmp/${1}.locales TEMP2=/tmp/${1}.hashes TEMP3=/tmp/${1}.symlinks TEMP4=/tmp/${1}.mapped FULLMAP=/tmp/utf8-map FULLEXTRACT=/tmp/extracted-names AWKCMD="/## PLACEHOLDER/ { \ while ( getline line < \"${TEMP}\" ) {print line} } \ /## SYMPAIRS/ { \ while ( getline line < \"${TEMP3}\" ) {print line} } \ /## LOCALES_MAPPED/ { \ while ( getline line < \"${TEMP4}\" ) {print line} } \ !/## / { print \$0 }" # Rename the sources with 3 components name into the POSIX version of the name using @modifier cd $old pwd for i in *_*_*.*.src; do if [ "$i" = "*_*_*.*.src" ]; then break fi oldname=${i%.*} nname=`echo $oldname | awk '{ split($0, a, "_"); print a[1]"_"a[3]"@"a[2];} '` mv -f ${oldname}.src ${nname}.src sed -i '' -e "s/${oldname}/${nname}/g" Makefile COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${nname}/g") done # For variable without @modifier ambiguity do not keep the @modifier for i in *@*.src; do if [ "$i" = "*@*.src" ]; then break fi oldname=${i%.*} shortname=${oldname%@*} if [ $(ls ${shortname}@* | wc -l) -eq 1 ] ; then mv -f $i ${shortname}.src sed -i '' -e "s/${oldname}/${shortname}/g" Makefile COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${oldname}/${shortname}/g") fi done # Rename the modifiers into non abbreviated version for i in *@Latn.src; do if [ "$i" = "*@Latn.src" ]; then break fi mv -f ${i} ${i%@*}@latin.src sed -i '' -e "s/${i%.*}/${i%@*}@latin/g" Makefile COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@latin/g") done for i in *@Cyrl.src; do if [ "$i" = "*@Cyrl.src" ]; then break fi mv -f ${i} ${i%@*}@cyrillic.src sed -i '' -e "s/${i%.*}/${i%@*}@cyrillic/g" Makefile COLLATIONS_SPECIAL=$(echo ${COLLATIONS_SPECIAL} | sed -e "s/${i%.*}/${i%@*}@cyrillic/g") done # On locales with multiple modifiers rename the "default" version without the @modifier default_locales="sr_RS@cyrillic" for i in ${default_locales}; do localename=${i%@*} mod=${i#*@} for l in ${localename}.*@${mod}.src; do if [ "$l" = "${localename}.*@${mod}.src" ]; then break fi mv -f ${l} ${l%@*}.src sed -i '' -e "s/${l%.*}/${l%@*}/g" Makefile done done cd - grep '^LOCALES+' ${old}/Makefile > ${TEMP} if [ $1 = "ctypedef" ] then keep=$(cat ${TEMP} | awk '{ print $2 ".src" }') (cd ${old} && md5 -r ${keep} | sort) > ${TEMP2} keep=$(awk '{ if ($1 != last1) print $2; last1 = $1; }' ${TEMP2}) for original in ${keep} do cp ${old}/${original} ${new}/ done awk '{ if ($1 == last1) { print "SYMPAIRS+=\t" last2 " " $2 } \ else {last1 = $1; last2 = $2}}' ${TEMP2} > ${TEMP3} rm -f ${TEMP2} /usr/bin/sed -E -e 's/[ ]+/ /g' \ - ${CLDRDIR}/posix/UTF-8.cm \ + ${UNIDIR}/posix/UTF-8.cm \ > ${base}/../etc/final-maps/map.UTF-8 /usr/bin/sed -E -e 's/[ ]+/ /g' \ - ${CLDRDIR}/posix/eucCN.cm \ + ${UNIDIR}/posix/eucCN.cm \ > ${base}/../etc/final-maps/map.eucCN /usr/bin/sed -E -e 's/[ ]+/ /g' \ - ${CLDRDIR}/posix/eucCN.cm \ + ${UNIDIR}/posix/eucCN.cm \ > ${base}/../etc/final-maps/map.GB2312 CHARMAPS="ARMSCII-8 Big5 CP1131 CP1251 \ CP866 GBK ISCII-DEV ISO8859-1 \ ISO8859-13 ISO8859-15 ISO8859-2 ISO8859-4 \ ISO8859-5 ISO8859-7 ISO8859-9 KOI8-R KOI8-U \ PT154 SJIS US-ASCII eucJP eucKR" # GB18030 blows up, use pre-generate Illumos version for map in ${CHARMAPS} do encoding=${map} /usr/local/bin/perl ${base}/convert_map.pl \ ${base}/../etc/charmaps/${map}.TXT ${encoding} \ | /usr/bin/sed -E -e 's/ +/ /g' \ > ${base}/../etc/final-maps/map.${map} echo map ${map} converted. done elif [ $1 = "colldef" ] then awk -v tmp4=${TEMP4} '$1 == "SAME+=" && $0 !~ /legacy/ { orig=$2 dest=$3 gsub(/.*\./, "", orig) gsub(/.*\./, "", dest) if (orig != dest ) print "LOCALES_MAPPED+=\t"$2 " "$3 > tmp4 }' ${old}/Makefile for line in $(awk '{ print $3 }' ${TEMP4}); do sed -i '' "/^SAME.*$line$/d" ${old}/Makefile done echo "" >> ${TEMP4} for enc in ${COLLATIONS_SPECIAL}; do sed -i '' "/^.*${enc}$/d" ${TEMP4} echo "LOCALES+= ${enc}" >> ${TEMP4} done keep=$(cat ${TEMP} | awk '{ print $2 }') for original in ${keep} ${COLLATIONS_SPECIAL} do cp ${old}/${original}.src ${new}/ done else # below is everything but ctypedef keep=$(cat ${TEMP} | awk '{ print $2 }') for original in ${keep} do cp ${old}/${original}.src ${new}/ done fi grep -v '^LOCALES+' ${old}/Makefile | awk "${AWKCMD}" > ${new}/Makefile rm -f ${TEMP} ${TEMP3} ${TEMP4} Index: head/tools/tools/locale/tools/utf8-rollup.pl =================================================================== --- head/tools/tools/locale/tools/utf8-rollup.pl (revision 340203) +++ head/tools/tools/locale/tools/utf8-rollup.pl (revision 340204) @@ -1,373 +1,374 @@ #!/usr/local/bin/perl -wC +# $FreeBSD$ use strict; #use File::Copy; #use XML::Parser; use Tie::IxHash; #use Data::Dumper; use Getopt::Long; #use Digest::SHA qw(sha1_hex); #require "charmaps.pm"; if ($#ARGV != 1) { - print "Usage: $0 --cldr= --etc=\n"; + print "Usage: $0 --unidir= --etc=\n"; exit(1); } -my $CLDRDIR = undef; +my $UNIDIR = undef; my $ETCDIR = undef; my $result = GetOptions ( - "cldr=s" => \$CLDRDIR, + "unidir=s" => \$UNIDIR, "etc=s" => \$ETCDIR, ); my @SECTIONS = ( ["en_US", "* 0x0000 - 0x007F Basic Latin\n" . "* 0x0080 - 0x00FF Latin-1 Supplement\n" . "* 0x0100 - 0x017F Latin Extended-A\n" . "* 0x0180 - 0x024F Latin Extended-B\n" . "* 0x0250 - 0x02AF IPA Extensions\n" . "* 0x1D00 - 0x1D7F Phonetic Extensions\n" . "* 0x1D80 - 0x1DBF Phonetic Extensions Supplement\n" . "* 0x1E00 - 0x1EFF Latin Extended Additional\n" . "* 0x2150 - 0x218F Number Forms (partial - Roman Numerals)\n". "* 0x2C60 - 0x2C7F Latin Extended-C\n" . "* 0xA720 - 0xA7FF Latin Extended-D\n" . "* 0xAB30 - 0xAB6F Latin Extended-E\n" . "* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n". "* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"], ["el_GR", "* 0x0370 - 0x03FF Greek (No Coptic!)\n" . "* 0x1F00 - 0x1FFF Greek Extended\n"], ["ru_RU", "* 0x0400 - 0x04FF Cyrillic\n" . "* 0x0500 - 0x052F Cyrillic Supplementary\n" . "* 0x2DE0 - 0x2DFF Cyrillic Extended-A\n" . "* 0xA640 - 0xA69F Cyrillic Extended-B\n"], ["hy_AM", "* 0x0530 - 0x058F Armenian\n" . "* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"], ["he_IL", "* 0x0590 - 0x05FF Hebrew\n" . "* 0xFB00 - 0xFF4F Alphabetic Presentation Forms (partial)\n"], ["ar_SA", "* 0x0600 - 0x06FF Arabic\n" . "* 0x0750 - 0x074F Arabic Supplement\n" . "* 0x08A0 - 0x08FF Arabic Extended-A\n" . "* 0xFB50 - 0xFDFF Arabic Presentation Forms (partial)\n" . "* 0xFE70 - 0xFEFF Arabic Presentation Forms-B (partial)\n"], ["hi_IN", "* 0x0900 - 0x097F Devanagari\n" . "* 0xA8E0 - 0xA8FF Devanagari Extended\n"], ["bn_IN", "* 0x0900 - 0x097F Bengali\n"], ["pa_Guru_IN", "* 0x0A00 - 0x0A7F Gurmukhi\n"], ["gu_IN", "* 0x0A80 - 0x0AFF Gujarati\n"], ["or_IN", "* 0x0B00 - 0x0B7F Oriya\n"], ["ta_IN", "* 0x0B80 - 0x0BFF Tamil\n"], ["te_IN", "* 0x0C00 - 0x0C7F Telugu\n"], ["kn_IN", "* 0x0C80 - 0x0CFF Kannada\n"], ["ml_IN", "* 0x0D00 - 0x0D7F Malayalam\n"], ["si_LK", "* 0x0D80 - 0x0DFF Sinhala\n"], ["th_TH", "* 0x0E00 - 0x0E7F Thai\n"], ["lo_LA", "* 0x0E80 - 0x0EFF Lao\n"], ["bo_IN", "* 0x0F00 - 0x0FFF Tibetan\n"], ["my_MM", "* 0x1000 - 0x109F Myanmar\n" . "* 0xA9E0 - 0xA9FF Myanmar Extended-B\n" . "* 0xAA60 - 0xAA7F Myanmar Extended-A\n"], ["ka_GE", "* 0x10A0 - 0x10FF Georgia\n" . "* 0x2D00 - 0x2D2F Georgian Supplement\n"], ["ja_JP", "* 0x1100 - 0x11FF Hangul Jamo\n" . "* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" . "* 0x3040 - 0x309F Hiragana\n" . "* 0x30A0 - 0x30FF Katakana\n" . "* 0x31F0 - 0x31FF Katakana Phonetic Extensions\n" . "* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" . "* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" . "* 0x3300 - 0x33FF CJK Compatibility\n" . "* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension-A (added)\n" . "* 0x4E00 - 0x9FCC CJK Unified Ideographs (overridden)\n" . "* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" . "* 0xD7B0 - 0xD7FF Hangul Jamo Extended-B\n" . "* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n" . "* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"], ["am_ET", "* 0x1200 - 0x137F Ethiopic\n" . "* 0x1380 - 0x139F Ethiopic Supplement\n" . "* 0x2D80 - 0x2DDF Ethiopic Extended\n" . "* 0xAB00 - 0xAB2F Ethiopic Extended-A\n"], ["chr_US", "* 0x13A0 - 0x13FF Cherokee\n"], ["km_KH", "* 0x1780 - 0x17FF Khmer\n" . "* 0x19E0 - 0x19FF Khmer Symbols\n"], ["shi_Tfng_MA", "* 0x2D30 - 0x2D2F Tifinagh\n"], ["ii_CN", "* 0xA000 - 0xA48F Yi Syllables\n" . "* 0xA490 - 0xA4CF Yi Radicals\n"], ["vai_Vaii_LR", "* 0xA500 - 0xA63F Vai\n"], ["ko_KR", "* 0x3130 - 0x318F Hangul Compatibility Jamo (partial)\n" . "* 0xA960 - 0xA97F Hangul Jamo Extended-A\n" . "* 0xAC00 - 0xA7A3 Hangul Syllables (partial)\n" . "* 0xFF00 - 0xFFEF Halfwidth and Fullwidth Forms (partial)\n"], ); # ["zh_Hans_CN", "* 0x2E80 - 0x2EFF CJK Radicals Supplement\n" . # "* 0x2F00 - 0x2FDF Rangxi Radicales\n" . # "* 0x3000 - 0x30FF CJK Symbols and Punctuation (partial)\n" . # "* 0x3200 - 0x32FF Enclosed CJK Letters and Months (partial)\n" . # "* 0x3400 - 0x4DB5 CJK Unified Ideographs Extension A\n" . # "* 0xF900 - 0xFAFF CJK Compatibility Ideographs (partial)\n"], my %seen = (); my %pending_seen = (); my %utf8map = (); my %utf8aliases = (); my $outfilename = "$ETCDIR/common.UTF-8.src"; my $manual_file = "$ETCDIR/manual-input.UTF-8"; my $stars = "**********************************************************************\n"; -get_utf8map("$CLDRDIR/posix/UTF-8.cm"); +get_utf8map("$UNIDIR/posix/UTF-8.cm"); generate_header (); generate_sections (); generate_footer (); ############################ sub get_utf8map { my $file = shift; open(FIN, $file); my @lines = ; close(FIN); chomp(@lines); my $prev_k = undef; my $prev_v = ""; my $incharmap = 0; foreach my $l (@lines) { $l =~ s/\r//; next if ($l =~ /^\#/); next if ($l eq ""); if ($l eq "CHARMAP") { $incharmap = 1; next; } next if (!$incharmap); last if ($l eq "END CHARMAP"); $l =~ /^<([^\s]+)>\s+(.*)/; my $k = $1; my $v = $2; $k =~ s/_/ /g; # unicode char string $v =~ s/\\x//g; # UTF-8 char code $utf8map{$k} = $v; $utf8aliases{$k} = $prev_k if ($prev_v eq $v); $prev_v = $v; $prev_k = $k; } } sub generate_header { open(FOUT, ">", "$outfilename") or die ("can't write to $outfilename\n"); print FOUT <;/\n"; for ($n = hex("3401"); $n <= hex("4DB4"); $n++) { $back2hex=sprintf("%X", $n); push @result, "\t;/\n"; } push @result, "\t\n"; push @result, "$T\t;/\n"; for ($n = hex("4E01"); $n <= hex("9FCB"); $n++) { $back2hex=sprintf("%X", $n); push @result, "\t;/\n"; } push @result, "\t\n"; } push @result, "merge\tnow\n"; } return @result; } sub compress_ctype { my $territory = shift; my $term; my $active = 0; my $cat_loaded = 0; my $lock_ID; my $prev_ID; my $curr_ID; my $lock_name; my $prev_name; my $curr_name; my $key_name; my $category = ''; my @lines = initialize_lines ($territory); - my $filename = "$CLDRDIR/posix/$territory.UTF-8.src"; + my $filename = "$UNIDIR/posix/$territory.UTF-8.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; return; } open(FIN, "$filename"); print "Reading from $filename\n"; while () { if (/^LC_CTYPE/../^END LC_CTYPE/) { if ($_ ne "LC_CTYPE\n" && $_ ne "END LC_CTYPE\n" && $_ ne "*************\n" && $_ ne "\n") { push @lines, $_; } } } close(FIN); foreach my $line (@lines) { if ($line =~ m/^([a-z]{3,})\t/) { $category = $1; if ($category eq 'merge') { merge_seen; next; } if ($category ne 'print') { $cat_loaded = 1; } } next if ($category eq 'print'); if ($category eq 'toupper' || $category eq 'tolower') { if ($line =~ m/<([-_A-Za-z0-9]+)>,/) { $key_name = $1; $key_name =~ s/_/ /g; if (already_seen_RO (hex($utf8map{$key_name}))) { next; } if ($cat_loaded) { print FOUT $category; } $cat_loaded = 0; $line =~ s/^[a-z]{3,}\t/\t/; print FOUT $line; } next; } if ($line =~ m/<([-_A-Za-z0-9]+)>(;.|)$/) { $term = ($2 eq '') ? 1 : 0; $curr_name = $1; $key_name = $1; $key_name =~ s/_/ /g; $curr_ID = hex($utf8map{$key_name}); if (already_seen ($curr_ID)) { next; } if ($active) { if ($curr_ID == $prev_ID + 1) { $prev_ID = $curr_ID; $prev_name = $curr_name; } else { if ($cat_loaded) { print FOUT $category; } $cat_loaded = 0; if ($prev_ID == $lock_ID) { print FOUT "\t<" . $prev_name . ">;/\n"; } elsif ($prev_ID - 1 == $lock_ID) { print FOUT "\t<" . $lock_name . ">;/\n"; print FOUT "\t<" . $prev_name . ">;/\n"; } else { print FOUT "\t<" . $lock_name . ">;...;<" . $prev_name . ">;/\n"; } $lock_ID = $curr_ID; $prev_ID = $curr_ID; $lock_name = $curr_name; $prev_name = $curr_name; } } else { $active = 1; $lock_ID = $curr_ID; $prev_ID = $curr_ID; $lock_name = $curr_name; $prev_name = $curr_name; } if ($term) { if ($cat_loaded) { print FOUT $category; } $cat_loaded = 0; if ($curr_ID == $lock_ID) { print FOUT "\t<" . $curr_name . ">\n"; } elsif ($curr_ID == $lock_ID + 1) { print FOUT "\t<" . $lock_name . ">;/\n"; print FOUT "\t<" . $curr_name . ">\n"; } else { print FOUT "\t<" . $lock_name . ">;...;<" . $curr_name . ">\n"; } $active = 0; } } else { print FOUT $line; } } } sub generate_sections { foreach my $section (@SECTIONS ) { print FOUT "\n"; print FOUT $stars; print FOUT @$section[1]; print FOUT $stars; compress_ctype (@$section[0]); merge_seen; } my @lines = (); open(FIN, "$manual_file"); print "Reading from $manual_file\n"; while () { push @lines, $_; } close(FIN); foreach my $line (@lines) { print FOUT $line; } }