[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package Text::ParseWords; 2 3 use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); 4 $VERSION = "3.26"; 5 6 require 5.000; 7 8 use Exporter; 9 @ISA = qw(Exporter); 10 @EXPORT = qw(shellwords quotewords nested_quotewords parse_line); 11 @EXPORT_OK = qw(old_shellwords); 12 13 14 sub shellwords { 15 my (@lines) = @_; 16 my @allwords; 17 18 foreach my $line (@lines) { 19 $line =~ s/^\s+//; 20 my @words = parse_line('\s+', 0, $line); 21 pop @words if (@words and !defined $words[-1]); 22 return() unless (@words || !length($line)); 23 push(@allwords, @words); 24 } 25 return(@allwords); 26 } 27 28 29 30 sub quotewords { 31 my($delim, $keep, @lines) = @_; 32 my($line, @words, @allwords); 33 34 foreach $line (@lines) { 35 @words = parse_line($delim, $keep, $line); 36 return() unless (@words || !length($line)); 37 push(@allwords, @words); 38 } 39 return(@allwords); 40 } 41 42 43 44 sub nested_quotewords { 45 my($delim, $keep, @lines) = @_; 46 my($i, @allwords); 47 48 for ($i = 0; $i < @lines; $i++) { 49 @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]); 50 return() unless (@{$allwords[$i]} || !length($lines[$i])); 51 } 52 return(@allwords); 53 } 54 55 56 57 sub parse_line { 58 my($delimiter, $keep, $line) = @_; 59 my($word, @pieces); 60 61 no warnings 'uninitialized'; # we will be testing undef strings 62 63 while (length($line)) { 64 # This pattern is optimised to be stack conservative on older perls. 65 # Do not refactor without being careful and testing it on very long strings. 66 # See Perl bug #42980 for an example of a stack busting input. 67 $line =~ s/^ 68 (?: 69 # double quoted string 70 (") # $quote 71 ((?>[^\\"]*(?:\\.[^\\"]*)*))" # $quoted 72 | # --OR-- 73 # singe quoted string 74 (') # $quote 75 ((?>[^\\']*(?:\\.[^\\']*)*))' # $quoted 76 | # --OR-- 77 # unquoted string 78 ( # $unquoted 79 (?:\\.|[^\\"'])*? 80 ) 81 # followed by 82 ( # $delim 83 \Z(?!\n) # EOL 84 | # --OR-- 85 (?-x:$delimiter) # delimiter 86 | # --OR-- 87 (?!^)(?=["']) # a quote 88 ) 89 )//xs or return; # extended layout 90 my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6); 91 92 93 return() unless( defined($quote) || length($unquoted) || length($delim)); 94 95 if ($keep) { 96 $quoted = "$quote$quoted$quote"; 97 } 98 else { 99 $unquoted =~ s/\\(.)/$1/sg; 100 if (defined $quote) { 101 $quoted =~ s/\\(.)/$1/sg if ($quote eq '"'); 102 $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); 103 } 104 } 105 $word .= substr($line, 0, 0); # leave results tainted 106 $word .= defined $quote ? $quoted : $unquoted; 107 108 if (length($delim)) { 109 push(@pieces, $word); 110 push(@pieces, $delim) if ($keep eq 'delimiters'); 111 undef $word; 112 } 113 if (!length($line)) { 114 push(@pieces, $word); 115 } 116 } 117 return(@pieces); 118 } 119 120 121 122 sub old_shellwords { 123 124 # Usage: 125 # use ParseWords; 126 # @words = old_shellwords($line); 127 # or 128 # @words = old_shellwords(@lines); 129 # or 130 # @words = old_shellwords(); # defaults to $_ (and clobbers it) 131 132 no warnings 'uninitialized'; # we will be testing undef strings 133 local *_ = \join('', @_) if @_; 134 my (@words, $snippet); 135 136 s/\A\s+//; 137 while ($_ ne '') { 138 my $field = substr($_, 0, 0); # leave results tainted 139 for (;;) { 140 if (s/\A"(([^"\\]|\\.)*)"//s) { 141 ($snippet = $1) =~ s#\\(.)#$1#sg; 142 } 143 elsif (/\A"/) { 144 require Carp; 145 Carp::carp("Unmatched double quote: $_"); 146 return(); 147 } 148 elsif (s/\A'(([^'\\]|\\.)*)'//s) { 149 ($snippet = $1) =~ s#\\(.)#$1#sg; 150 } 151 elsif (/\A'/) { 152 require Carp; 153 Carp::carp("Unmatched single quote: $_"); 154 return(); 155 } 156 elsif (s/\A\\(.?)//s) { 157 $snippet = $1; 158 } 159 elsif (s/\A([^\s\\'"]+)//) { 160 $snippet = $1; 161 } 162 else { 163 s/\A\s+//; 164 last; 165 } 166 $field .= $snippet; 167 } 168 push(@words, $field); 169 } 170 return @words; 171 } 172 173 1; 174 175 __END__ 176 177 =head1 NAME 178 179 Text::ParseWords - parse text into an array of tokens or array of arrays 180 181 =head1 SYNOPSIS 182 183 use Text::ParseWords; 184 @lists = &nested_quotewords($delim, $keep, @lines); 185 @words = "ewords($delim, $keep, @lines); 186 @words = &shellwords(@lines); 187 @words = &parse_line($delim, $keep, $line); 188 @words = &old_shellwords(@lines); # DEPRECATED! 189 190 =head1 DESCRIPTION 191 192 The &nested_quotewords() and "ewords() functions accept a delimiter 193 (which can be a regular expression) 194 and a list of lines and then breaks those lines up into a list of 195 words ignoring delimiters that appear inside quotes. "ewords() 196 returns all of the tokens in a single long list, while &nested_quotewords() 197 returns a list of token lists corresponding to the elements of @lines. 198 &parse_line() does tokenizing on a single string. The &*quotewords() 199 functions simply call &parse_line(), so if you're only splitting 200 one line you can call &parse_line() directly and save a function 201 call. 202 203 The $keep argument is a boolean flag. If true, then the tokens are 204 split on the specified delimiter, but all other characters (quotes, 205 backslashes, etc.) are kept in the tokens. If $keep is false then the 206 &*quotewords() functions remove all quotes and backslashes that are 207 not themselves backslash-escaped or inside of single quotes (i.e., 208 "ewords() tries to interpret these characters just like the Bourne 209 shell). NB: these semantics are significantly different from the 210 original version of this module shipped with Perl 5.000 through 5.004. 211 As an additional feature, $keep may be the keyword "delimiters" which 212 causes the functions to preserve the delimiters in each string as 213 tokens in the token lists, in addition to preserving quote and 214 backslash characters. 215 216 &shellwords() is written as a special case of "ewords(), and it 217 does token parsing with whitespace as a delimiter-- similar to most 218 Unix shells. 219 220 =head1 EXAMPLES 221 222 The sample program: 223 224 use Text::ParseWords; 225 @words = "ewords('\s+', 0, q{this is "a test" of\ quotewords \"for you}); 226 $i = 0; 227 foreach (@words) { 228 print "$i: <$_>\n"; 229 $i++; 230 } 231 232 produces: 233 234 0: <this> 235 1: <is> 236 2: <a test> 237 3: <of quotewords> 238 4: <"for> 239 5: <you> 240 241 demonstrating: 242 243 =over 4 244 245 =item 0 246 247 a simple word 248 249 =item 1 250 251 multiple spaces are skipped because of our $delim 252 253 =item 2 254 255 use of quotes to include a space in a word 256 257 =item 3 258 259 use of a backslash to include a space in a word 260 261 =item 4 262 263 use of a backslash to remove the special meaning of a double-quote 264 265 =item 5 266 267 another simple word (note the lack of effect of the 268 backslashed double-quote) 269 270 =back 271 272 Replacing C<"ewords('\s+', 0, q{this is...})> 273 with C<&shellwords(q{this is...})> 274 is a simpler way to accomplish the same thing. 275 276 =head1 AUTHORS 277 278 Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original 279 author unknown). Much of the code for &parse_line() (including the 280 primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>. 281 282 Examples section another documentation provided by John Heidemann 283 <johnh@ISI.EDU> 284 285 Bug reports, patches, and nagging provided by lots of folks-- thanks 286 everybody! Special thanks to Michael Schwern <schwern@envirolink.org> 287 for assuring me that a &nested_quotewords() would be useful, and to 288 Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about 289 error-checking (sort of-- you had to be there). 290 291 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |