[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/ -> utf8_heavy.pl (source)

   1  package utf8;
   2  use strict;
   3  use warnings;
   4  
   5  sub DEBUG () { 0 }
   6  
   7  sub DESTROY {}
   8  
   9  my %Cache;
  10  
  11  our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map);
  12  
  13  sub croak { require Carp; Carp::croak(@_) }
  14  
  15  ##
  16  ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape.
  17  ## It's a data structure that encodes a set of Unicode characters.
  18  ##
  19  
  20  sub SWASHNEW {
  21      my ($class, $type, $list, $minbits, $none) = @_;
  22      local $^D = 0 if $^D;
  23  
  24      print STDERR "SWASHNEW @_\n" if DEBUG;
  25  
  26      ##
  27      ## Get the list of codepoints for the type.
  28      ## Called from swash_init (see utf8.c) or SWASHNEW itself.
  29      ##
  30      ## Callers of swash_init:
  31      ##     op.c:pmtrans             -- for tr/// and y///
  32      ##     regexec.c:regclass_swash -- for /[]/, \p, and \P
  33      ##     utf8.c:is_utf8_common    -- for common Unicode properties
  34      ##     utf8.c:to_utf8_case      -- for lc, uc, ucfirst, etc. and //i
  35      ##
  36      ## Given a $type, our goal is to fill $list with the set of codepoint
  37      ## ranges. If $type is false, $list passed is used.
  38      ##
  39      ## $minbits:
  40      ##     For binary properties, $minbits must be 1.
  41      ##     For character mappings (case and transliteration), $minbits must
  42      ##     be a number except 1.
  43      ##
  44      ## $list (or that filled according to $type):
  45      ##     Refer to perlunicode.pod, "User-Defined Character Properties."
  46      ##     
  47      ##     For binary properties, only characters with the property value
  48      ##     of True should be listed. The 3rd column, if any, will be ignored.
  49      ##
  50      ## To make the parsing of $type clear, this code takes the a rather
  51      ## unorthodox approach of last'ing out of the block once we have the
  52      ## info we need. Were this to be a subroutine, the 'last' would just
  53      ## be a 'return'.
  54      ##
  55      my $file; ## file to load data from, and also part of the %Cache key.
  56      my $ListSorted = 0;
  57  
  58      if ($type)
  59      {
  60          $type =~ s/^\s+//;
  61          $type =~ s/\s+$//;
  62  
  63          print STDERR "type = $type\n" if DEBUG;
  64  
  65        GETFILE:
  66          {
  67          ##
  68          ## It could be a user-defined property.
  69          ##
  70  
  71          my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1);
  72  
  73          if (defined $caller1 && $type =~ /^(?:\w+)$/) {
  74          my $prop = "$caller1}::$type";
  75          if (exists &{$prop}) {
  76              no strict 'refs';
  77              
  78              $list = &{$prop};
  79              last GETFILE;
  80          }
  81          }
  82  
  83              my $wasIs;
  84  
  85              ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i)
  86                or
  87              $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i
  88                or
  89              $type =~ s/^(?:Script|sc)\s*[:=]\s*//i
  90                or
  91              $type =~ s/^Block\s*[:=]\s*/In/i;
  92  
  93  
  94          ##
  95          ## See if it's in some enumeration.
  96          ##
  97          require  "unicore/PVA.pl";
  98          if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) {
  99          my ($enum, $val) = (lc $1, lc $2);
 100          $enum =~ tr/ _-//d;
 101          $val =~ tr/ _-//d;
 102  
 103          my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum};
 104          my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val};
 105  
 106          if ($pa and $f) {
 107              $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc";
 108              $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl";
 109              last GETFILE;
 110          }
 111          }
 112          else {
 113          my $t = lc $type;
 114          $t =~ tr/ _-//d;
 115  
 116          if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) {
 117              $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl";
 118              last GETFILE;
 119          }
 120          }
 121  
 122              ##
 123              ## See if it's in the direct mapping table.
 124              ##
 125              require  "unicore/Exact.pl";
 126              if (my $base = $utf8::Exact{$type}) {
 127                  $file = "unicore/lib/gc_sc/$base.pl";
 128                  last GETFILE;
 129              }
 130  
 131              ##
 132              ## If not there exactly, try the canonical form. The canonical
 133              ## form is lowercased, with any separators (\s+|[-_]) removed.
 134              ##
 135              my $canonical = lc $type;
 136              $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g;
 137              print STDERR "canonical = $canonical\n" if DEBUG;
 138  
 139              require  "unicore/Canonical.pl";
 140              if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) {
 141                  $file = "unicore/lib/gc_sc/$base.pl";
 142                  last GETFILE;
 143              }
 144  
 145          ##
 146          ## See if it's a user-level "To".
 147          ##
 148  
 149          my $caller0 = caller(0);
 150  
 151          if (defined $caller0 && $type =~ /^To(?:\w+)$/) {
 152          my $map = $caller0 . "::" . $type;
 153  
 154          if (exists &{$map}) {
 155              no strict 'refs';
 156              
 157              $list = &{$map};
 158              last GETFILE;
 159          }
 160          }
 161  
 162              ##
 163              ## Last attempt -- see if it's a standard "To" name
 164          ## (e.g. "ToLower")  ToTitle is used by ucfirst().
 165          ## The user-level way to access ToDigit() and ToFold()
 166          ## is to use Unicode::UCD.
 167              ##
 168              if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) {
 169                  $file = "unicore/To/$1.pl";
 170                  ## would like to test to see if $file actually exists....
 171                  last GETFILE;
 172              }
 173  
 174              ##
 175              ## If we reach this line, it's because we couldn't figure
 176              ## out what to do with $type. Ouch.
 177              ##
 178  
 179              return $type;
 180          }
 181  
 182      if (defined $file) {
 183          print STDERR "found it (file='$file')\n" if DEBUG;
 184  
 185          ##
 186          ## If we reach here, it was due to a 'last GETFILE' above
 187          ## (exception: user-defined properties and mappings), so we
 188          ## have a filename, so now we load it if we haven't already.
 189          ## If we have, return the cached results. The cache key is the
 190          ## class and file to load.
 191          ##
 192          my $found = $Cache{$class, $file};
 193          if ($found and ref($found) eq $class) {
 194          print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG;
 195          return $found;
 196          }
 197  
 198          $list = do $file; die $@ if $@;
 199      }
 200  
 201          $ListSorted = 1; ## we know that these lists are sorted
 202      }
 203  
 204      my $extras;
 205      my $bits = $minbits;
 206  
 207      my $ORIG = $list;
 208      if ($list) {
 209      my @tmp = split(/^/m, $list);
 210      my %seen;
 211      no warnings;
 212      $extras = join '', grep /^[^0-9a-fA-F]/, @tmp;
 213      $list = join '',
 214          map  { $_->[1] }
 215          sort { $a->[0] <=> $b->[0] }
 216          map  { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] }
 217          grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right
 218      }
 219  
 220      if ($none) {
 221      my $hextra = sprintf "%04x", $none + 1;
 222      $list =~ s/\tXXXX$/\t$hextra/mg;
 223      }
 224  
 225      if ($minbits != 1 && $minbits < 32) { # not binary property
 226      my $top = 0;
 227      while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) {
 228          my $min = CORE::hex $1;
 229          my $max = defined $2 ? CORE::hex $2 : $min;
 230          my $val = defined $3 ? CORE::hex $3 : 0;
 231          $val += $max - $min if defined $3;
 232          $top = $val if $val > $top;
 233      }
 234      my $topbits =
 235          $top > 0xffff ? 32 :
 236          $top > 0xff ? 16 : 8;
 237      $bits = $topbits if $bits < $topbits;
 238      }
 239  
 240      my @extras;
 241      for my $x ($extras) {
 242      pos $x = 0;
 243      while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) {
 244          my $char = $1;
 245          my $name = $2;
 246          print STDERR "$1 => $2\n" if DEBUG;
 247          if ($char =~ /[-+!&]/) {
 248          my ($c,$t) = split(/::/, $name, 2);    # bogus use of ::, really
 249          my $subobj;
 250          if ($c eq 'utf8') {
 251              $subobj = utf8->SWASHNEW($t, "", $minbits, 0);
 252          }
 253          elsif (exists &$name) {
 254              $subobj = utf8->SWASHNEW($name, "", $minbits, 0);
 255          }
 256          elsif ($c =~ /^([0-9a-fA-F]+)/) {
 257              $subobj = utf8->SWASHNEW("", $c, $minbits, 0);
 258          }
 259          return $subobj unless ref $subobj;
 260          push @extras, $name => $subobj;
 261          $bits = $subobj->{BITS} if $bits < $subobj->{BITS};
 262          }
 263      }
 264      }
 265  
 266      print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG;
 267  
 268      my $SWASH = bless {
 269      TYPE => $type,
 270      BITS => $bits,
 271      EXTRAS => $extras,
 272      LIST => $list,
 273      NONE => $none,
 274      @extras,
 275      } => $class;
 276  
 277      if ($file) {
 278          $Cache{$class, $file} = $SWASH;
 279      }
 280  
 281      return $SWASH;
 282  }
 283  
 284  # Now SWASHGET is recasted into a C function S_swash_get (see utf8.c).
 285  
 286  1;


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1