mirror of
https://github.com/kingToolbox/WindTerm.git
synced 2024-12-26 04:10:08 +08:00
Add onigmo 5.13.5
This commit is contained in:
parent
cf11cf128d
commit
432b7676da
28
src/Onigmo/.editorconfig
Normal file
28
src/Onigmo/.editorconfig
Normal file
@ -0,0 +1,28 @@
|
||||
; see: http://editorconfig.org/
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[**.[ch]]
|
||||
indent_style = tab
|
||||
indent_size = 2
|
||||
tab_width = 8
|
||||
|
||||
[**.py]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
[**.rb]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
tab_width = 8
|
||||
|
||||
[win32/*]
|
||||
end_of_line = crlf
|
||||
|
||||
[win32/*.py]
|
||||
end_of_line = lf
|
66
src/Onigmo/.gitignore
vendored
Normal file
66
src/Onigmo/.gitignore
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
# ignore dot-files, binary files and backup files
|
||||
.*
|
||||
*.o
|
||||
*.lo
|
||||
*.so
|
||||
*.a
|
||||
*.la
|
||||
*.obj
|
||||
*.def
|
||||
*.dll
|
||||
*.exe
|
||||
*.exp
|
||||
*.lib
|
||||
*.pyc
|
||||
*.bak
|
||||
*.BAK
|
||||
*~
|
||||
*.swp
|
||||
*.orig
|
||||
*.rej
|
||||
*.RES
|
||||
*.res
|
||||
|
||||
# don't want to ignore
|
||||
!.gitignore
|
||||
!.editorconfig
|
||||
|
||||
# working dirs
|
||||
.deps
|
||||
.libs
|
||||
|
||||
# autotools generated files
|
||||
/autom4te.cache
|
||||
/config.h
|
||||
/config.log
|
||||
/config.status
|
||||
/libtool
|
||||
/onig-config
|
||||
/oniguruma.pc
|
||||
/Makefile
|
||||
/sample/Makefile
|
||||
/stamp-h1
|
||||
|
||||
# generated executable files
|
||||
/enc/mktable
|
||||
/sample/crnl
|
||||
/sample/encode
|
||||
/sample/listcap
|
||||
/sample/names
|
||||
/sample/posix
|
||||
/sample/simple
|
||||
/sample/sql
|
||||
/sample/syntax
|
||||
/testc
|
||||
/testcu
|
||||
/testp
|
||||
|
||||
# tag files
|
||||
tags
|
||||
TAGS
|
||||
|
||||
# GNU global files
|
||||
GPATH
|
||||
GRTAGS
|
||||
GSYMS
|
||||
GTAGS
|
2
src/Onigmo/AUTHORS
Normal file
2
src/Onigmo/AUTHORS
Normal file
@ -0,0 +1,2 @@
|
||||
kentkt AT csc DOT jp (K.Takata)
|
||||
sndgk393 AT ybb DOT ne DOT jp (K.Kosako)
|
87
src/Onigmo/COPYING
Normal file
87
src/Onigmo/COPYING
Normal file
@ -0,0 +1,87 @@
|
||||
Onigmo (Oniguruma-mod) LICENSE
|
||||
------------------------------
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
Oniguruma LICENSE
|
||||
-----------------
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
Ruby BSDL
|
||||
---------
|
||||
Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
2369
src/Onigmo/HISTORY
Normal file
2369
src/Onigmo/HISTORY
Normal file
File diff suppressed because it is too large
Load Diff
365
src/Onigmo/INSTALL
Normal file
365
src/Onigmo/INSTALL
Normal file
@ -0,0 +1,365 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
|
||||
2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell commands `./configure; make; make install' should
|
||||
configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
`INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the `make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type `make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior `make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type `make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide `make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like `make install' and `make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the `configure' script does not know about. Run `./configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'. This
|
||||
is known as a "VPATH" build.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple `-arch' options to the
|
||||
compiler but only a single `-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the `lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the
|
||||
default for these options is expressed in terms of `${prefix}', so that
|
||||
specifying just `--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to `configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
`make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, `make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
`${prefix}'. Any directories that were specified during `configure',
|
||||
but not in terms of `${prefix}', must each be overridden at install
|
||||
time for the entire installation to be relocated. The approach of
|
||||
makefile variable overrides for each directory variable is required by
|
||||
the GNU Coding Standards, and ideally causes no recompilation.
|
||||
However, some platforms have known limitations with the semantics of
|
||||
shared libraries that end up requiring recompilation when using this
|
||||
method, particularly noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the `DESTDIR' variable. For
|
||||
example, `make install DESTDIR=/alternate/directory' will prepend
|
||||
`/alternate/directory' before all installation names. The approach of
|
||||
`DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of `${prefix}'
|
||||
at `configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of `make' will be. For these packages, running `./configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with `make V=1'; while running `./configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with `make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
|
||||
CC is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||
to try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
|
||||
in your `PATH', put it _after_ `/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in `/boot/common',
|
||||
not `/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, `configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share,
|
||||
you can create a site shell script called `config.site' that gives
|
||||
default values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of all of the options to `configure', and exit.
|
||||
|
||||
`--help=short'
|
||||
`--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
`configure', and exit. The `short' variant lists options used
|
||||
only in the top level, while the `recursive' variant lists options
|
||||
also present in any nested packages.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names::
|
||||
for more details, including other options available for fine-tuning
|
||||
the installation locations.
|
||||
|
||||
`--no-create'
|
||||
`-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
||||
|
118
src/Onigmo/Makefile.am
Normal file
118
src/Onigmo/Makefile.am
Normal file
@ -0,0 +1,118 @@
|
||||
## Makefile.am for Oniguruma
|
||||
encdir = $(top_srcdir)/enc
|
||||
sampledir = $(top_srcdir)/sample
|
||||
libname = libonig.la
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
#AM_CFLAGS = -DNOT_RUBY
|
||||
AM_CFLAGS =
|
||||
INCLUDES = -I$(top_srcdir) -I$(includedir)
|
||||
|
||||
SUBDIRS = . sample
|
||||
|
||||
include_HEADERS = oniguruma.h oniggnu.h onigposix.h
|
||||
lib_LTLIBRARIES = $(libname)
|
||||
|
||||
libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
|
||||
regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \
|
||||
regenc.c regsyntax.c regtrav.c regversion.c st.c \
|
||||
regposix.c regposerr.c \
|
||||
$(encdir)/unicode.c $(encdir)/ascii.c $(encdir)/utf8.c \
|
||||
$(encdir)/utf16_be.c $(encdir)/utf16_le.c \
|
||||
$(encdir)/utf32_be.c $(encdir)/utf32_le.c \
|
||||
$(encdir)/unicode/casefold.h $(encdir)/unicode/name2ctype.h \
|
||||
$(encdir)/euc_jp.c $(encdir)/sjis.c $(encdir)/cp932.c \
|
||||
$(encdir)/iso8859_1.c \
|
||||
$(encdir)/iso8859_2.c $(encdir)/iso8859_3.c \
|
||||
$(encdir)/iso8859_4.c $(encdir)/iso8859_5.c \
|
||||
$(encdir)/iso8859_6.c $(encdir)/iso8859_7.c \
|
||||
$(encdir)/iso8859_8.c $(encdir)/iso8859_9.c \
|
||||
$(encdir)/iso8859_10.c $(encdir)/iso8859_11.c \
|
||||
$(encdir)/iso8859_13.c $(encdir)/iso8859_14.c \
|
||||
$(encdir)/iso8859_15.c $(encdir)/iso8859_16.c \
|
||||
$(encdir)/euc_tw.c $(encdir)/euc_kr.c $(encdir)/big5.c \
|
||||
$(encdir)/gb18030.c $(encdir)/koi8_r.c $(encdir)/cp1251.c
|
||||
|
||||
libonig_la_LDFLAGS = -version-info $(LTVERSION)
|
||||
|
||||
EXTRA_DIST = .gitignore oniguruma.pc.in HISTORY README.ja index.html \
|
||||
index_ja.html doc/API doc/API.ja doc/RE doc/RE.ja doc/FAQ doc/FAQ.ja \
|
||||
doc/UnicodeProps.txt \
|
||||
tool/.gitignore tool/CaseFolding.py tool/convert-name2ctype.sh \
|
||||
tool/enc-unicode.rb \
|
||||
win32/Makefile win32/config.h win32/testc.c \
|
||||
win32/makedef.py win32/onig.rc \
|
||||
$(encdir)/koi8.c $(encdir)/mktable.c \
|
||||
$(sampledir)/encode.c $(sampledir)/listcap.c $(sampledir)/names.c \
|
||||
$(sampledir)/posix.c $(sampledir)/simple.c $(sampledir)/sql.c \
|
||||
$(sampledir)/syntax.c $(sampledir)/crnl.c \
|
||||
test.rb testconv.rb testconvu.rb \
|
||||
onig.py testpy.py
|
||||
|
||||
bin_SCRIPTS = onig-config
|
||||
|
||||
onig-config: onig-config.in
|
||||
|
||||
do_subst = sed \
|
||||
-e 's,[@]datadir[@],$(datadir),g' \
|
||||
-e 's,[@]datarootdir[@],$(datarootdir),g' \
|
||||
-e 's,[@]PACKAGE_VERSION[@],$(PACKAGE_VERSION),g' \
|
||||
-e 's,[@]prefix[@],$(prefix),g' \
|
||||
-e 's,[@]exec_prefix[@],$(exec_prefix),g' \
|
||||
-e 's,[@]libdir[@],$(libdir),g' \
|
||||
-e 's,[@]includedir[@],$(includedir),g'
|
||||
|
||||
oniguruma.pc: $(srcdir)/oniguruma.pc.in Makefile
|
||||
$(do_subst) < $(<) > $(@)
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = oniguruma.pc
|
||||
|
||||
dll:
|
||||
$(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \
|
||||
$(LIBS)
|
||||
strip libonig.dll
|
||||
|
||||
# Ruby TEST
|
||||
rtest:
|
||||
$(RUBYDIR)/ruby -w -Ke $(srcdir)/test.rb
|
||||
|
||||
# character-types-table source generator
|
||||
mktable: $(encdir)/mktable.c $(srcdir)/regenc.h
|
||||
$(CC) -I$(top_srcdir) -o mktable $(encdir)/mktable.c
|
||||
|
||||
|
||||
# TEST
|
||||
TESTS = testc testp testcu
|
||||
|
||||
check_PROGRAMS = testc testp testcu
|
||||
|
||||
atest: testc testp testcu
|
||||
@echo "[Oniguruma API, ASCII/EUC-JP check]"
|
||||
@$(top_builddir)/testc | grep RESULT
|
||||
@echo "[POSIX API, ASCII/EUC-JP check]"
|
||||
@$(top_builddir)/testp | grep RESULT
|
||||
@echo "[Oniguruma API, UTF-16 check]"
|
||||
@$(top_builddir)/testcu | grep RESULT
|
||||
|
||||
testc_SOURCES = testc.c
|
||||
testc_LDADD = libonig.la
|
||||
|
||||
testp_SOURCES = testc.c
|
||||
testp_LDADD = libonig.la
|
||||
testp_CFLAGS = -DPOSIX_TEST
|
||||
|
||||
testcu_SOURCES = testu.c
|
||||
testcu_LDADD = libonig.la
|
||||
|
||||
|
||||
testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
|
||||
ruby -Ke $(srcdir)/testconv.rb < $(srcdir)/test.rb > $@
|
||||
|
||||
testu.c: $(srcdir)/test.rb $(srcdir)/testconvu.rb
|
||||
ruby -Ke $(srcdir)/testconvu.rb $(srcdir)/test.rb > $@
|
||||
|
||||
win32/testc.c: $(srcdir)/test.rb $(srcdir)/testconv.rb
|
||||
ruby -Ke $(srcdir)/testconv.rb -win < $(srcdir)/test.rb | nkf -cs > $@
|
||||
|
||||
## END OF FILE
|
1437
src/Onigmo/Makefile.in
Normal file
1437
src/Onigmo/Makefile.in
Normal file
File diff suppressed because it is too large
Load Diff
241
src/Onigmo/README
Normal file
241
src/Onigmo/README
Normal file
@ -0,0 +1,241 @@
|
||||
README 2013/03/21
|
||||
|
||||
Onigmo (Oniguruma-mod) -- (C) K.Takata <kentkt AT csc DOT jp>
|
||||
|
||||
https://github.com/k-takata/Onigmo
|
||||
|
||||
Onigmo is a regular expressions library forked from Oniguruma.
|
||||
Some of new features introduced in Perl 5.10+ can be used.
|
||||
|
||||
Some patches are merged from Ruby 2.0.0.
|
||||
|
||||
|
||||
Main New features:
|
||||
Regular Expressions (depends on the syntax):
|
||||
\K, \R, \X, (?(cond)yes|no)
|
||||
(?adlu), \g{name}, \g{n}, (?&name), (?n), (?R), (?0)
|
||||
(?P<name>...), (?P=name), (?P>name)
|
||||
|
||||
API:
|
||||
onig_search_gpos (for Perl-compatible \G)
|
||||
|
||||
Encoding:
|
||||
CP932
|
||||
|
||||
Syntax:
|
||||
Python
|
||||
|
||||
|
||||
New Source Files:
|
||||
enc/cp932.c CP932 encoding.
|
||||
enc/unicode/casefold.h Unicode case folding data.
|
||||
enc/unicode/name2ctype.h Unicode properties data.
|
||||
|
||||
onig.py onig.dll/libonig.so loader.
|
||||
testpy.py test program.
|
||||
|
||||
tool/CaseFolding.py generates casefold.h.
|
||||
tool/convert-name2ctype.sh converts name2ctype.kwd to name2ctypes.h.
|
||||
tool/enc-unicode.rb generates name2ctype.kwd.
|
||||
|
||||
win32/makedef.py creates onig.def.
|
||||
win32/onig.rc resource file for onig.dll.
|
||||
|
||||
|
||||
ToDo:
|
||||
* Reduce the size of Unicode Character Data.
|
||||
* (?|...)
|
||||
* Improve (?(cond)yes|no). (support look-ahead/behind assertions.)
|
||||
|
||||
|
||||
Oniguruma's README follows:
|
||||
======================================================================
|
||||
README 2007/05/31
|
||||
|
||||
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
|
||||
Oniguruma is a regular expressions library.
|
||||
The characteristics of this library is that different character encoding
|
||||
for every regular expression object can be specified.
|
||||
|
||||
Supported character encodings:
|
||||
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
|
||||
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
|
||||
|
||||
* GB18030: contributed by KUBO Takehiro
|
||||
* CP1251: contributed by Byte
|
||||
------------------------------------------------------------
|
||||
|
||||
License
|
||||
|
||||
BSD license.
|
||||
|
||||
|
||||
Install
|
||||
|
||||
Case 1: Unix and Cygwin platform
|
||||
|
||||
1. ./configure
|
||||
2. make
|
||||
3. make install
|
||||
|
||||
* uninstall
|
||||
|
||||
make uninstall
|
||||
|
||||
* test (ASCII/EUC-JP)
|
||||
|
||||
make atest
|
||||
|
||||
* configuration check
|
||||
|
||||
onig-config --cflags
|
||||
onig-config --libs
|
||||
onig-config --prefix
|
||||
onig-config --exec-prefix
|
||||
|
||||
|
||||
|
||||
Case 2: Win32 platform (VC++)
|
||||
|
||||
1. copy win32\Makefile Makefile
|
||||
2. copy win32\config.h config.h
|
||||
3. nmake
|
||||
|
||||
onig_s.lib: static link library
|
||||
onig.dll: dynamic link library
|
||||
|
||||
* test (ASCII/Shift_JIS)
|
||||
4. copy win32\testc.c testc.c
|
||||
5. nmake ctest
|
||||
|
||||
|
||||
|
||||
Regular Expressions
|
||||
|
||||
See doc/RE (or doc/RE.ja for Japanese).
|
||||
|
||||
|
||||
Usage
|
||||
|
||||
Include oniguruma.h in your program. (Oniguruma API)
|
||||
See doc/API for Oniguruma API.
|
||||
|
||||
If you want to disable UChar type (== unsigned char) definition
|
||||
in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
|
||||
include oniguruma.h.
|
||||
|
||||
If you want to disable regex_t type definition in oniguruma.h,
|
||||
define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
|
||||
|
||||
Example of the compiling/linking command line in Unix or Cygwin,
|
||||
(prefix == /usr/local case)
|
||||
|
||||
cc sample.c -L/usr/local/lib -lonig
|
||||
|
||||
|
||||
If you want to use static link library(onig_s.lib) in Win32,
|
||||
add option -DONIG_EXTERN=extern to C compiler.
|
||||
|
||||
|
||||
|
||||
Sample Programs
|
||||
|
||||
sample/simple.c example of the minimum (Oniguruma API)
|
||||
sample/names.c example of the named group callback.
|
||||
sample/encode.c example of some encodings.
|
||||
sample/listcap.c example of the capture history.
|
||||
sample/posix.c POSIX API sample.
|
||||
sample/sql.c example of the variable meta characters.
|
||||
(SQL-like pattern matching)
|
||||
|
||||
Test Programs
|
||||
sample/syntax.c Perl, Java and ASIS syntax test.
|
||||
sample/crnl.c --enable-crnl-as-line-terminator test
|
||||
|
||||
|
||||
Source Files
|
||||
|
||||
oniguruma.h Oniguruma API header file. (public)
|
||||
onig-config.in configuration check program template.
|
||||
|
||||
regenc.h character encodings framework header file.
|
||||
regint.h internal definitions
|
||||
regparse.h internal definitions for regparse.c and regcomp.c
|
||||
regcomp.c compiling and optimization functions
|
||||
regenc.c character encodings framework.
|
||||
regerror.c error message function
|
||||
regext.c extended API functions. (deluxe version API)
|
||||
regexec.c search and match functions
|
||||
regparse.c parsing functions.
|
||||
regsyntax.c pattern syntax functions and built-in syntax definitions.
|
||||
regtrav.c capture history tree data traverse functions.
|
||||
regversion.c version info function.
|
||||
st.h hash table functions header file
|
||||
st.c hash table functions
|
||||
|
||||
oniggnu.h GNU regex API header file. (public)
|
||||
reggnu.c GNU regex API functions
|
||||
|
||||
onigposix.h POSIX API header file. (public)
|
||||
regposerr.c POSIX error message function.
|
||||
regposix.c POSIX API functions.
|
||||
|
||||
enc/mktable.c character type table generator.
|
||||
enc/ascii.c ASCII encoding.
|
||||
enc/euc_jp.c EUC-JP encoding.
|
||||
enc/euc_tw.c EUC-TW encoding.
|
||||
enc/euc_kr.c EUC-KR, EUC-CN encoding.
|
||||
enc/sjis.c Shift_JIS encoding.
|
||||
enc/big5.c Big5 encoding.
|
||||
enc/gb18030.c GB18030 encoding.
|
||||
enc/koi8.c KOI8 encoding.
|
||||
enc/koi8_r.c KOI8-R encoding.
|
||||
enc/cp1251.c CP1251 encoding.
|
||||
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
|
||||
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
|
||||
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
|
||||
enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
|
||||
enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
|
||||
enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
|
||||
enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
|
||||
enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
|
||||
enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
|
||||
enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
|
||||
enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
|
||||
enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
|
||||
enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
|
||||
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
|
||||
enc/iso8859_16.c ISO-8859-16 encoding.
|
||||
(Latin-10 or South-Eastern European with Euro)
|
||||
enc/utf8.c UTF-8 encoding.
|
||||
enc/utf16_be.c UTF-16BE encoding.
|
||||
enc/utf16_le.c UTF-16LE encoding.
|
||||
enc/utf32_be.c UTF-32BE encoding.
|
||||
enc/utf32_le.c UTF-32LE encoding.
|
||||
enc/unicode.c Unicode information data.
|
||||
|
||||
win32/Makefile Makefile for Win32 (VC++)
|
||||
win32/config.h config.h for Win32
|
||||
|
||||
|
||||
|
||||
ToDo
|
||||
|
||||
? case fold flag: Katakana <-> Hiragana.
|
||||
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
|
||||
?? \X (== \PM\pM*)
|
||||
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
|
||||
?? transmission stopper. (return ONIG_STOP from match_at())
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
247
src/Onigmo/README.ja
Normal file
247
src/Onigmo/README.ja
Normal file
@ -0,0 +1,247 @@
|
||||
README.ja 2013/03/21
|
||||
|
||||
鬼雲 (鬼車改) -- (C) K.Takata <kentkt AT csc DOT jp>
|
||||
|
||||
https://github.com/k-takata/Onigmo
|
||||
|
||||
鬼雲は、鬼車から派生した正規表現ライブラリである。
|
||||
Perl 5.10以降で新たに導入された正規表現の一部が使用可能になっている。
|
||||
|
||||
一部のパッチは Ruby 2.0.0 からマージしている。
|
||||
|
||||
|
||||
主な新機能:
|
||||
正規表現 (文法依存):
|
||||
\K, \R, \X, (?(cond)yes|no)
|
||||
(?adlu), \g{name}, \g{n}, (?&name), (?n), (?R), (?0)
|
||||
(?P<name>...), (?P=name), (?P>name)
|
||||
|
||||
API:
|
||||
onig_search_gpos (Perl互換の \G 用)
|
||||
|
||||
エンコーディング:
|
||||
CP932
|
||||
|
||||
文法:
|
||||
Python
|
||||
|
||||
|
||||
新規ソースファイル:
|
||||
enc/cp932.c CP932 エンコーディング
|
||||
enc/unicode/casefold.h Unicodeケースフォールドデータ
|
||||
enc/unicode/name2ctype.h Unicodeプロパティデータ
|
||||
|
||||
onig.py onig.dll/libonig.so ローダ
|
||||
testpy.py テストプログラム
|
||||
|
||||
tool/CaseFolding.py casefold.hを生成
|
||||
tool/convert-name2ctype.sh name2ctype.kwdをname2ctypes.hに変換
|
||||
tool/enc-unicode.rb name2ctype.kwdを生成
|
||||
|
||||
win32/makedef.py onig.defを作成
|
||||
win32/onig.rc onig.dll用リソースファイル
|
||||
|
||||
|
||||
ToDo:
|
||||
* Unicode Character Data のサイズ削減。
|
||||
* (?|...)
|
||||
* (?(cond)yes|no) の改善。(先読み・戻り読みの対応)
|
||||
|
||||
|
||||
以下、鬼車の README.ja:
|
||||
======================================================================
|
||||
README.ja 2007/05/31
|
||||
|
||||
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
|
||||
鬼車は正規表現ライブラリである。
|
||||
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
|
||||
文字エンコーディングを指定できることである。
|
||||
|
||||
サポートしている文字エンコーディング:
|
||||
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
|
||||
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
|
||||
|
||||
* GB18030: 久保健洋氏提供
|
||||
* CP1251: Byte氏提供
|
||||
------------------------------------------------------------
|
||||
|
||||
ライセンス
|
||||
|
||||
BSDライセンスに従う。
|
||||
|
||||
|
||||
インストール
|
||||
|
||||
ケース1: UnixとCygwin環境
|
||||
|
||||
1. ./configure
|
||||
2. make
|
||||
3. make install
|
||||
|
||||
アンインストール
|
||||
|
||||
make uninstall
|
||||
|
||||
動作テスト (ASCII/EUC-JP)
|
||||
|
||||
make atest
|
||||
|
||||
|
||||
構成確認
|
||||
|
||||
onig-config --cflags
|
||||
onig-config --libs
|
||||
onig-config --prefix
|
||||
onig-config --exec-prefix
|
||||
|
||||
|
||||
|
||||
ケース2: Win32(VC++)環境
|
||||
|
||||
1. copy win32\Makefile Makefile
|
||||
2. copy win32\config.h config.h
|
||||
3. nmake
|
||||
|
||||
onig_s.lib: static link library
|
||||
onig.dll: dynamic link library
|
||||
|
||||
* 動作テスト (ASCII/Shift_JIS)
|
||||
4. copy win32\testc.c testc.c
|
||||
5. nmake ctest
|
||||
|
||||
|
||||
|
||||
正規表現
|
||||
|
||||
doc/RE.jaを参照
|
||||
|
||||
|
||||
使用方法
|
||||
|
||||
使用するプログラムで、oniguruma.hをインクルードする(Oniguruma APIの場合)。
|
||||
Oniguruma APIについては、doc/API.jaを参照。
|
||||
|
||||
oniguruma.hで定義されている型名UChar(== unsigned char)を無効にしたい場合
|
||||
には、ONIG_ESCAPE_UCHAR_COLLISIONをdefineしてからoniguruma.hをインクルード
|
||||
すること。このときにはUCharは定義されず、OnigUCharという名前の定義のみが
|
||||
有効になる。
|
||||
|
||||
oniguruma.hで定義されている型名regex_tを無効にしたい場合には、
|
||||
ONIG_ESCAPE_REGEX_T_COLLISIONをdefineしてからoniguruma.hをインクルード
|
||||
すること。このときにはregex_tは定義されず、OnigRegexType, OnigRegexという
|
||||
名前の定義のみが有効になる。
|
||||
|
||||
Unix/Cygwin上でコンパイル、リンクする場合の例:
|
||||
(prefixが/usr/localのとき)
|
||||
cc sample.c -L/usr/local/lib -lonig
|
||||
|
||||
GNU libtoolを使用しているので、プラットフォームが共有ライブラリをサポートして
|
||||
いれば、使用できるようになっている。
|
||||
静的ライブラリと共有ライブラリのどちらを使用するかを指定する方法、実行時点での
|
||||
環境設定方法については、自分で調べて下さい。
|
||||
|
||||
|
||||
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
|
||||
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
|
||||
|
||||
|
||||
使用例プログラム
|
||||
|
||||
sample/simple.c 最小例 (Oniguruma API)
|
||||
sample/names.c 名前付きグループコールバック使用例
|
||||
sample/encode.c 幾つかの文字エンコーディング使用例
|
||||
sample/listcap.c 捕獲履歴機能の使用例
|
||||
sample/posix.c POSIX API使用例
|
||||
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
|
||||
|
||||
テストプログラム
|
||||
sample/syntax.c Perl、Java、ASIS文法のテスト
|
||||
sample/crnl.c --enable-crnl-as-line-terminator テスト
|
||||
|
||||
|
||||
ソースファイル
|
||||
|
||||
oniguruma.h 鬼車APIヘッダ (公開)
|
||||
onig-config.in onig-configプログラム テンプレート
|
||||
|
||||
regenc.h 文字エンコーディング枠組みヘッダ
|
||||
regint.h 内部宣言
|
||||
regparse.h regparse.cとregcomp.cのための内部宣言
|
||||
regcomp.c コンパイル、最適化関数
|
||||
regenc.c 文字エンコーディング枠組み
|
||||
regerror.c エラーメッセージ関数
|
||||
regext.c 拡張API関数
|
||||
regexec.c 検索、照合関数
|
||||
regparse.c 正規表現パターン解析関数
|
||||
regsyntax.c 正規表現パターン文法関数、組込み文法定義
|
||||
regtrav.c 捕獲履歴木巡回関数
|
||||
regversion.c 版情報関数
|
||||
st.h ハッシュテーブル関数宣言
|
||||
st.c ハッシュテーブル関数
|
||||
|
||||
oniggnu.h GNU regex APIヘッダ (公開)
|
||||
reggnu.c GNU regex API関数
|
||||
|
||||
onigposix.h POSIX APIヘッダ (公開)
|
||||
regposerr.c POSIX APIエラーメッセージ関数
|
||||
regposix.c POSIX API関数
|
||||
|
||||
enc/mktable.c 文字タイプテーブル生成プログラム
|
||||
enc/ascii.c ASCII エンコーディング
|
||||
enc/euc_jp.c EUC-JP エンコーディング
|
||||
enc/euc_tw.c EUC-TW エンコーディング
|
||||
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
|
||||
enc/sjis.c Shift_JIS エンコーディング
|
||||
enc/big5.c Big5 エンコーディング
|
||||
enc/gb18030.c GB18030 エンコーディング
|
||||
enc/koi8.c KOI8 エンコーディング
|
||||
enc/koi8_r.c KOI8-R エンコーディング
|
||||
enc/cp1251.c CP1251 エンコーディング
|
||||
enc/iso8859_1.c ISO-8859-1 (Latin-1)
|
||||
enc/iso8859_2.c ISO-8859-2 (Latin-2)
|
||||
enc/iso8859_3.c ISO-8859-3 (Latin-3)
|
||||
enc/iso8859_4.c ISO-8859-4 (Latin-4)
|
||||
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
|
||||
enc/iso8859_6.c ISO-8859-6 (Arabic)
|
||||
enc/iso8859_7.c ISO-8859-7 (Greek)
|
||||
enc/iso8859_8.c ISO-8859-8 (Hebrew)
|
||||
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
|
||||
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
|
||||
enc/iso8859_11.c ISO-8859-11 (Thai)
|
||||
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
|
||||
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
|
||||
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
|
||||
enc/iso8859_16.c ISO-8859-16
|
||||
(Latin-10 または South-Eastern European with Euro)
|
||||
enc/utf8.c UTF-8 エンコーディング
|
||||
enc/utf16_be.c UTF-16BE エンコーディング
|
||||
enc/utf16_le.c UTF-16LE エンコーディング
|
||||
enc/utf32_be.c UTF-32BE エンコーディング
|
||||
enc/utf32_le.c UTF-32LE エンコーディング
|
||||
enc/unicode.c Unicode情報
|
||||
|
||||
win32/Makefile Win32用 Makefile (for VC++)
|
||||
win32/config.h Win32用 config.h
|
||||
|
||||
|
||||
|
||||
残件
|
||||
|
||||
? case fold flag: Katakana <-> Hiragana
|
||||
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
|
||||
?? \X (== \PM\pM*)
|
||||
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
|
||||
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
アドレス: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
991
src/Onigmo/aclocal.m4
vendored
Normal file
991
src/Onigmo/aclocal.m4
vendored
Normal file
@ -0,0 +1,991 @@
|
||||
# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.65],,
|
||||
[m4_warning([this file was generated for autoconf 2.65.
|
||||
You have another version of autoconf. It may work, but is not guaranteed to.
|
||||
If you have problems, you may need to regenerate the build system entirely.
|
||||
To do so, use the procedure documented by the package, typically `autoreconf'.])])
|
||||
|
||||
# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_AUTOMAKE_VERSION(VERSION)
|
||||
# ----------------------------
|
||||
# Automake X.Y traces this macro to ensure aclocal.m4 has been
|
||||
# generated from the m4 files accompanying Automake X.Y.
|
||||
# (This private macro should not be called outside this file.)
|
||||
AC_DEFUN([AM_AUTOMAKE_VERSION],
|
||||
[am__api_version='1.11'
|
||||
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
|
||||
dnl require some minimum version. Point them to the right macro.
|
||||
m4_if([$1], [1.11.1], [],
|
||||
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
|
||||
])
|
||||
|
||||
# _AM_AUTOCONF_VERSION(VERSION)
|
||||
# -----------------------------
|
||||
# aclocal traces this macro to find the Autoconf version.
|
||||
# This is a private macro too. Using m4_define simplifies
|
||||
# the logic in aclocal, which can simply ignore this definition.
|
||||
m4_define([_AM_AUTOCONF_VERSION], [])
|
||||
|
||||
# AM_SET_CURRENT_AUTOMAKE_VERSION
|
||||
# -------------------------------
|
||||
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
|
||||
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
|
||||
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
|
||||
[AM_AUTOMAKE_VERSION([1.11.1])dnl
|
||||
m4_ifndef([AC_AUTOCONF_VERSION],
|
||||
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
|
||||
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
|
||||
|
||||
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
|
||||
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
|
||||
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
|
||||
#
|
||||
# Of course, Automake must honor this variable whenever it calls a
|
||||
# tool from the auxiliary directory. The problem is that $srcdir (and
|
||||
# therefore $ac_aux_dir as well) can be either absolute or relative,
|
||||
# depending on how configure is run. This is pretty annoying, since
|
||||
# it makes $ac_aux_dir quite unusable in subdirectories: in the top
|
||||
# source directory, any form will work fine, but in subdirectories a
|
||||
# relative path needs to be adjusted first.
|
||||
#
|
||||
# $ac_aux_dir/missing
|
||||
# fails when called from a subdirectory if $ac_aux_dir is relative
|
||||
# $top_srcdir/$ac_aux_dir/missing
|
||||
# fails if $ac_aux_dir is absolute,
|
||||
# fails when called from a subdirectory in a VPATH build with
|
||||
# a relative $ac_aux_dir
|
||||
#
|
||||
# The reason of the latter failure is that $top_srcdir and $ac_aux_dir
|
||||
# are both prefixed by $srcdir. In an in-source build this is usually
|
||||
# harmless because $srcdir is `.', but things will broke when you
|
||||
# start a VPATH build or use an absolute $srcdir.
|
||||
#
|
||||
# So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
|
||||
# iff we strip the leading $srcdir from $ac_aux_dir. That would be:
|
||||
# am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
|
||||
# and then we would define $MISSING as
|
||||
# MISSING="\${SHELL} $am_aux_dir/missing"
|
||||
# This will work as long as MISSING is not called from configure, because
|
||||
# unfortunately $(top_srcdir) has no meaning in configure.
|
||||
# However there are other variables, like CC, which are often used in
|
||||
# configure, and could therefore not use this "fixed" $ac_aux_dir.
|
||||
#
|
||||
# Another solution, used here, is to always expand $ac_aux_dir to an
|
||||
# absolute PATH. The drawback is that using absolute paths prevent a
|
||||
# configured tree to be moved without reconfiguration.
|
||||
|
||||
AC_DEFUN([AM_AUX_DIR_EXPAND],
|
||||
[dnl Rely on autoconf to set up CDPATH properly.
|
||||
AC_PREREQ([2.50])dnl
|
||||
# expand $ac_aux_dir to an absolute path
|
||||
am_aux_dir=`cd $ac_aux_dir && pwd`
|
||||
])
|
||||
|
||||
# AM_CONDITIONAL -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 9
|
||||
|
||||
# AM_CONDITIONAL(NAME, SHELL-CONDITION)
|
||||
# -------------------------------------
|
||||
# Define a conditional.
|
||||
AC_DEFUN([AM_CONDITIONAL],
|
||||
[AC_PREREQ(2.52)dnl
|
||||
ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])],
|
||||
[$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
|
||||
AC_SUBST([$1_TRUE])dnl
|
||||
AC_SUBST([$1_FALSE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_TRUE])dnl
|
||||
_AM_SUBST_NOTMAKE([$1_FALSE])dnl
|
||||
m4_define([_AM_COND_VALUE_$1], [$2])dnl
|
||||
if $2; then
|
||||
$1_TRUE=
|
||||
$1_FALSE='#'
|
||||
else
|
||||
$1_TRUE='#'
|
||||
$1_FALSE=
|
||||
fi
|
||||
AC_CONFIG_COMMANDS_PRE(
|
||||
[if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
|
||||
AC_MSG_ERROR([[conditional "$1" was never defined.
|
||||
Usually this means the macro was only invoked conditionally.]])
|
||||
fi])])
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 10
|
||||
|
||||
# There are a few dirty hacks below to avoid letting `AC_PROG_CC' be
|
||||
# written in clear, in which case automake, when reading aclocal.m4,
|
||||
# will think it sees a *use*, and therefore will trigger all it's
|
||||
# C support machinery. Also note that it means that autoscan, seeing
|
||||
# CC etc. in the Makefile, will ask for an AC_PROG_CC use...
|
||||
|
||||
|
||||
# _AM_DEPENDENCIES(NAME)
|
||||
# ----------------------
|
||||
# See how the compiler implements dependency checking.
|
||||
# NAME is "CC", "CXX", "GCJ", or "OBJC".
|
||||
# We try a few techniques and use that to set a single cache variable.
|
||||
#
|
||||
# We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
|
||||
# modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
|
||||
# dependency, and given that the user is not expected to run this macro,
|
||||
# just rely on AC_PROG_CC.
|
||||
AC_DEFUN([_AM_DEPENDENCIES],
|
||||
[AC_REQUIRE([AM_SET_DEPDIR])dnl
|
||||
AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
|
||||
AC_REQUIRE([AM_MAKE_INCLUDE])dnl
|
||||
AC_REQUIRE([AM_DEP_TRACK])dnl
|
||||
|
||||
ifelse([$1], CC, [depcc="$CC" am_compiler_list=],
|
||||
[$1], CXX, [depcc="$CXX" am_compiler_list=],
|
||||
[$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
|
||||
[$1], UPC, [depcc="$UPC" am_compiler_list=],
|
||||
[$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'],
|
||||
[depcc="$$1" am_compiler_list=])
|
||||
|
||||
AC_CACHE_CHECK([dependency style of $depcc],
|
||||
[am_cv_$1_dependencies_compiler_type],
|
||||
[if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
|
||||
# We make a subdir and do the tests there. Otherwise we can end up
|
||||
# making bogus files that we don't know about and never remove. For
|
||||
# instance it was reported that on HP-UX the gcc test will end up
|
||||
# making a dummy file named `D' -- because `-MD' means `put the output
|
||||
# in D'.
|
||||
mkdir conftest.dir
|
||||
# Copy depcomp to subdir because otherwise we won't find it if we're
|
||||
# using a relative directory.
|
||||
cp "$am_depcomp" conftest.dir
|
||||
cd conftest.dir
|
||||
# We will build objects and dependencies in a subdirectory because
|
||||
# it helps to detect inapplicable dependency modes. For instance
|
||||
# both Tru64's cc and ICC support -MD to output dependencies as a
|
||||
# side effect of compilation, but ICC will put the dependencies in
|
||||
# the current directory while Tru64 will put them in the object
|
||||
# directory.
|
||||
mkdir sub
|
||||
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
if test "$am_compiler_list" = ""; then
|
||||
am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
|
||||
fi
|
||||
am__universal=false
|
||||
m4_case([$1], [CC],
|
||||
[case " $depcc " in #(
|
||||
*\ -arch\ *\ -arch\ *) am__universal=true ;;
|
||||
esac],
|
||||
[CXX],
|
||||
[case " $depcc " in #(
|
||||
*\ -arch\ *\ -arch\ *) am__universal=true ;;
|
||||
esac])
|
||||
|
||||
for depmode in $am_compiler_list; do
|
||||
# Setup a source with many dependencies, because some compilers
|
||||
# like to wrap large dependency lists on column 80 (with \), and
|
||||
# we should not choose a depcomp mode which is confused by this.
|
||||
#
|
||||
# We need to recreate these files for each test, as the compiler may
|
||||
# overwrite some of them when testing with obscure command lines.
|
||||
# This happens at least with the AIX C compiler.
|
||||
: > sub/conftest.c
|
||||
for i in 1 2 3 4 5 6; do
|
||||
echo '#include "conftst'$i'.h"' >> sub/conftest.c
|
||||
# Using `: > sub/conftst$i.h' creates only sub/conftst1.h with
|
||||
# Solaris 8's {/usr,}/bin/sh.
|
||||
touch sub/conftst$i.h
|
||||
done
|
||||
echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
|
||||
|
||||
# We check with `-c' and `-o' for the sake of the "dashmstdout"
|
||||
# mode. It turns out that the SunPro C++ compiler does not properly
|
||||
# handle `-M -o', and we need to detect this. Also, some Intel
|
||||
# versions had trouble with output in subdirs
|
||||
am__obj=sub/conftest.${OBJEXT-o}
|
||||
am__minus_obj="-o $am__obj"
|
||||
case $depmode in
|
||||
gcc)
|
||||
# This depmode causes a compiler race in universal mode.
|
||||
test "$am__universal" = false || continue
|
||||
;;
|
||||
nosideeffect)
|
||||
# after this tag, mechanisms are not by side-effect, so they'll
|
||||
# only be used when explicitly requested
|
||||
if test "x$enable_dependency_tracking" = xyes; then
|
||||
continue
|
||||
else
|
||||
break
|
||||
fi
|
||||
;;
|
||||
msvisualcpp | msvcmsys)
|
||||
# This compiler won't grok `-c -o', but also, the minuso test has
|
||||
# not run yet. These depmodes are late enough in the game, and
|
||||
# so weak that their functioning should not be impacted.
|
||||
am__obj=conftest.${OBJEXT-o}
|
||||
am__minus_obj=
|
||||
;;
|
||||
none) break ;;
|
||||
esac
|
||||
if depmode=$depmode \
|
||||
source=sub/conftest.c object=$am__obj \
|
||||
depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
|
||||
$SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
|
||||
>/dev/null 2>conftest.err &&
|
||||
grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
|
||||
grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
|
||||
${MAKE-make} -s -f confmf > /dev/null 2>&1; then
|
||||
# icc doesn't choke on unknown options, it will just issue warnings
|
||||
# or remarks (even with -Werror). So we grep stderr for any message
|
||||
# that says an option was ignored or not supported.
|
||||
# When given -MP, icc 7.0 and 7.1 complain thusly:
|
||||
# icc: Command line warning: ignoring option '-M'; no argument required
|
||||
# The diagnosis changed in icc 8.0:
|
||||
# icc: Command line remark: option '-MP' not supported
|
||||
if (grep 'ignoring option' conftest.err ||
|
||||
grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
|
||||
am_cv_$1_dependencies_compiler_type=$depmode
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
cd ..
|
||||
rm -rf conftest.dir
|
||||
else
|
||||
am_cv_$1_dependencies_compiler_type=none
|
||||
fi
|
||||
])
|
||||
AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
|
||||
AM_CONDITIONAL([am__fastdep$1], [
|
||||
test "x$enable_dependency_tracking" != xno \
|
||||
&& test "$am_cv_$1_dependencies_compiler_type" = gcc3])
|
||||
])
|
||||
|
||||
|
||||
# AM_SET_DEPDIR
|
||||
# -------------
|
||||
# Choose a directory name for dependency files.
|
||||
# This macro is AC_REQUIREd in _AM_DEPENDENCIES
|
||||
AC_DEFUN([AM_SET_DEPDIR],
|
||||
[AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
|
||||
])
|
||||
|
||||
|
||||
# AM_DEP_TRACK
|
||||
# ------------
|
||||
AC_DEFUN([AM_DEP_TRACK],
|
||||
[AC_ARG_ENABLE(dependency-tracking,
|
||||
[ --disable-dependency-tracking speeds up one-time build
|
||||
--enable-dependency-tracking do not reject slow dependency extractors])
|
||||
if test "x$enable_dependency_tracking" != xno; then
|
||||
am_depcomp="$ac_aux_dir/depcomp"
|
||||
AMDEPBACKSLASH='\'
|
||||
fi
|
||||
AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
|
||||
AC_SUBST([AMDEPBACKSLASH])dnl
|
||||
_AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
|
||||
])
|
||||
|
||||
# Generate code to set up dependency tracking. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
#serial 5
|
||||
|
||||
# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# ------------------------------
|
||||
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[{
|
||||
# Autoconf 2.62 quotes --file arguments for eval, but not when files
|
||||
# are listed without --file. Let's play safe and only enable the eval
|
||||
# if we detect the quoting.
|
||||
case $CONFIG_FILES in
|
||||
*\'*) eval set x "$CONFIG_FILES" ;;
|
||||
*) set x $CONFIG_FILES ;;
|
||||
esac
|
||||
shift
|
||||
for mf
|
||||
do
|
||||
# Strip MF so we end up with the name of the file.
|
||||
mf=`echo "$mf" | sed -e 's/:.*$//'`
|
||||
# Check whether this is an Automake generated Makefile or not.
|
||||
# We used to match only the files named `Makefile.in', but
|
||||
# some people rename them; so instead we look at the file content.
|
||||
# Grep'ing the first line is not enough: some people post-process
|
||||
# each Makefile.in and add a new line on top of each file to say so.
|
||||
# Grep'ing the whole file is not good either: AIX grep has a line
|
||||
# limit of 2048, but all sed's we know have understand at least 4000.
|
||||
if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
|
||||
dirpart=`AS_DIRNAME("$mf")`
|
||||
else
|
||||
continue
|
||||
fi
|
||||
# Extract the definition of DEPDIR, am__include, and am__quote
|
||||
# from the Makefile without running `make'.
|
||||
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
|
||||
test -z "$DEPDIR" && continue
|
||||
am__include=`sed -n 's/^am__include = //p' < "$mf"`
|
||||
test -z "am__include" && continue
|
||||
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
|
||||
# When using ansi2knr, U may be empty or an underscore; expand it
|
||||
U=`sed -n 's/^U = //p' < "$mf"`
|
||||
# Find all dependency output files, they are included files with
|
||||
# $(DEPDIR) in their names. We invoke sed twice because it is the
|
||||
# simplest approach to changing $(DEPDIR) to its actual value in the
|
||||
# expansion.
|
||||
for file in `sed -n "
|
||||
s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
|
||||
sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
|
||||
# Make sure the directory exists.
|
||||
test -f "$dirpart/$file" && continue
|
||||
fdir=`AS_DIRNAME(["$file"])`
|
||||
AS_MKDIR_P([$dirpart/$fdir])
|
||||
# echo "creating $dirpart/$file"
|
||||
echo '# dummy' > "$dirpart/$file"
|
||||
done
|
||||
done
|
||||
}
|
||||
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
|
||||
|
||||
# AM_OUTPUT_DEPENDENCY_COMMANDS
|
||||
# -----------------------------
|
||||
# This macro should only be invoked once -- use via AC_REQUIRE.
|
||||
#
|
||||
# This code is only required when automatic dependency tracking
|
||||
# is enabled. FIXME. This creates each `.P' file that we will
|
||||
# need in order to bootstrap the dependency handling code.
|
||||
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AC_CONFIG_COMMANDS([depfiles],
|
||||
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
|
||||
[AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
|
||||
])
|
||||
|
||||
# Do all the work for Automake. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
||||
# 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 16
|
||||
|
||||
# This macro actually does too much. Some checks are only needed if
|
||||
# your package does certain things. But this isn't really a big deal.
|
||||
|
||||
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
|
||||
# AM_INIT_AUTOMAKE([OPTIONS])
|
||||
# -----------------------------------------------
|
||||
# The call with PACKAGE and VERSION arguments is the old style
|
||||
# call (pre autoconf-2.50), which is being phased out. PACKAGE
|
||||
# and VERSION should now be passed to AC_INIT and removed from
|
||||
# the call to AM_INIT_AUTOMAKE.
|
||||
# We support both call styles for the transition. After
|
||||
# the next Automake release, Autoconf can make the AC_INIT
|
||||
# arguments mandatory, and then we can depend on a new Autoconf
|
||||
# release and drop the old call support.
|
||||
AC_DEFUN([AM_INIT_AUTOMAKE],
|
||||
[AC_PREREQ([2.62])dnl
|
||||
dnl Autoconf wants to disallow AM_ names. We explicitly allow
|
||||
dnl the ones we care about.
|
||||
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
|
||||
AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
|
||||
AC_REQUIRE([AC_PROG_INSTALL])dnl
|
||||
if test "`cd $srcdir && pwd`" != "`pwd`"; then
|
||||
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
|
||||
# is not polluted with repeated "-I."
|
||||
AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
|
||||
# test to see if srcdir already configured
|
||||
if test -f $srcdir/config.status; then
|
||||
AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
|
||||
fi
|
||||
fi
|
||||
|
||||
# test whether we have cygpath
|
||||
if test -z "$CYGPATH_W"; then
|
||||
if (cygpath --version) >/dev/null 2>/dev/null; then
|
||||
CYGPATH_W='cygpath -w'
|
||||
else
|
||||
CYGPATH_W=echo
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([CYGPATH_W])
|
||||
|
||||
# Define the identity of the package.
|
||||
dnl Distinguish between old-style and new-style calls.
|
||||
m4_ifval([$2],
|
||||
[m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
|
||||
AC_SUBST([PACKAGE], [$1])dnl
|
||||
AC_SUBST([VERSION], [$2])],
|
||||
[_AM_SET_OPTIONS([$1])dnl
|
||||
dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
|
||||
m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,,
|
||||
[m4_fatal([AC_INIT should be called with package and version arguments])])dnl
|
||||
AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
|
||||
AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
|
||||
|
||||
_AM_IF_OPTION([no-define],,
|
||||
[AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package])
|
||||
AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl
|
||||
|
||||
# Some tools Automake needs.
|
||||
AC_REQUIRE([AM_SANITY_CHECK])dnl
|
||||
AC_REQUIRE([AC_ARG_PROGRAM])dnl
|
||||
AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOCONF, autoconf)
|
||||
AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version})
|
||||
AM_MISSING_PROG(AUTOHEADER, autoheader)
|
||||
AM_MISSING_PROG(MAKEINFO, makeinfo)
|
||||
AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||
AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
|
||||
AC_REQUIRE([AM_PROG_MKDIR_P])dnl
|
||||
# We need awk for the "check" target. The system "awk" is bad on
|
||||
# some platforms.
|
||||
AC_REQUIRE([AC_PROG_AWK])dnl
|
||||
AC_REQUIRE([AC_PROG_MAKE_SET])dnl
|
||||
AC_REQUIRE([AM_SET_LEADING_DOT])dnl
|
||||
_AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
|
||||
[_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
|
||||
[_AM_PROG_TAR([v7])])])
|
||||
_AM_IF_OPTION([no-dependencies],,
|
||||
[AC_PROVIDE_IFELSE([AC_PROG_CC],
|
||||
[_AM_DEPENDENCIES(CC)],
|
||||
[define([AC_PROG_CC],
|
||||
defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_CXX],
|
||||
[_AM_DEPENDENCIES(CXX)],
|
||||
[define([AC_PROG_CXX],
|
||||
defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl
|
||||
AC_PROVIDE_IFELSE([AC_PROG_OBJC],
|
||||
[_AM_DEPENDENCIES(OBJC)],
|
||||
[define([AC_PROG_OBJC],
|
||||
defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl
|
||||
])
|
||||
_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
|
||||
dnl The `parallel-tests' driver may need to know about EXEEXT, so add the
|
||||
dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro
|
||||
dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
|
||||
AC_CONFIG_COMMANDS_PRE(dnl
|
||||
[m4_provide_if([_AM_COMPILER_EXEEXT],
|
||||
[AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
|
||||
])
|
||||
|
||||
dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
|
||||
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
|
||||
dnl mangled by Autoconf and run in a shell conditional statement.
|
||||
m4_define([_AC_COMPILER_EXEEXT],
|
||||
m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
|
||||
|
||||
|
||||
# When config.status generates a header, we must update the stamp-h file.
|
||||
# This file resides in the same directory as the config header
|
||||
# that is generated. The stamp files are numbered to have different names.
|
||||
|
||||
# Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
|
||||
# loop where config.status creates the headers, so we can generate
|
||||
# our stamp files there.
|
||||
AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
|
||||
[# Compute $1's index in $config_headers.
|
||||
_am_arg=$1
|
||||
_am_stamp_count=1
|
||||
for _am_header in $config_headers :; do
|
||||
case $_am_header in
|
||||
$_am_arg | $_am_arg:* )
|
||||
break ;;
|
||||
* )
|
||||
_am_stamp_count=`expr $_am_stamp_count + 1` ;;
|
||||
esac
|
||||
done
|
||||
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_SH
|
||||
# ------------------
|
||||
# Define $install_sh.
|
||||
AC_DEFUN([AM_PROG_INSTALL_SH],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
if test x"${install_sh}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
|
||||
*)
|
||||
install_sh="\${SHELL} $am_aux_dir/install-sh"
|
||||
esac
|
||||
fi
|
||||
AC_SUBST(install_sh)])
|
||||
|
||||
# Copyright (C) 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# Check whether the underlying file-system supports filenames
|
||||
# with a leading dot. For instance MS-DOS doesn't.
|
||||
AC_DEFUN([AM_SET_LEADING_DOT],
|
||||
[rm -rf .tst 2>/dev/null
|
||||
mkdir .tst 2>/dev/null
|
||||
if test -d .tst; then
|
||||
am__leading_dot=.
|
||||
else
|
||||
am__leading_dot=_
|
||||
fi
|
||||
rmdir .tst 2>/dev/null
|
||||
AC_SUBST([am__leading_dot])])
|
||||
|
||||
# Check to see how 'make' treats includes. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# AM_MAKE_INCLUDE()
|
||||
# -----------------
|
||||
# Check to see how make treats includes.
|
||||
AC_DEFUN([AM_MAKE_INCLUDE],
|
||||
[am_make=${MAKE-make}
|
||||
cat > confinc << 'END'
|
||||
am__doit:
|
||||
@echo this is the am__doit target
|
||||
.PHONY: am__doit
|
||||
END
|
||||
# If we don't find an include directive, just comment out the code.
|
||||
AC_MSG_CHECKING([for style of include used by $am_make])
|
||||
am__include="#"
|
||||
am__quote=
|
||||
_am_result=none
|
||||
# First try GNU make style include.
|
||||
echo "include confinc" > confmf
|
||||
# Ignore all kinds of additional output from `make'.
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=include
|
||||
am__quote=
|
||||
_am_result=GNU
|
||||
;;
|
||||
esac
|
||||
# Now try BSD make style include.
|
||||
if test "$am__include" = "#"; then
|
||||
echo '.include "confinc"' > confmf
|
||||
case `$am_make -s -f confmf 2> /dev/null` in #(
|
||||
*the\ am__doit\ target*)
|
||||
am__include=.include
|
||||
am__quote="\""
|
||||
_am_result=BSD
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
AC_SUBST([am__include])
|
||||
AC_SUBST([am__quote])
|
||||
AC_MSG_RESULT([$_am_result])
|
||||
rm -f confinc confmf
|
||||
])
|
||||
|
||||
# Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6
|
||||
|
||||
# AM_PROG_CC_C_O
|
||||
# --------------
|
||||
# Like AC_PROG_CC_C_O, but changed for automake.
|
||||
AC_DEFUN([AM_PROG_CC_C_O],
|
||||
[AC_REQUIRE([AC_PROG_CC_C_O])dnl
|
||||
AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
AC_REQUIRE_AUX_FILE([compile])dnl
|
||||
# FIXME: we rely on the cache variable name because
|
||||
# there is no other way.
|
||||
set dummy $CC
|
||||
am_cc=`echo $[2] | sed ['s/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/']`
|
||||
eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o
|
||||
if test "$am_t" != yes; then
|
||||
# Losing compiler, so override with the script.
|
||||
# FIXME: It is wrong to rewrite CC.
|
||||
# But if we don't then we get into trouble of one sort or another.
|
||||
# A longer-term fix would be to have automake use am__CC in this case,
|
||||
# and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
|
||||
CC="$am_aux_dir/compile $CC"
|
||||
fi
|
||||
dnl Make sure AC_PROG_CC is never called again, or it will override our
|
||||
dnl setting of CC.
|
||||
m4_define([AC_PROG_CC],
|
||||
[m4_fatal([AC_PROG_CC cannot be called after AM_PROG_CC_C_O])])
|
||||
])
|
||||
|
||||
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6
|
||||
|
||||
# AM_MISSING_PROG(NAME, PROGRAM)
|
||||
# ------------------------------
|
||||
AC_DEFUN([AM_MISSING_PROG],
|
||||
[AC_REQUIRE([AM_MISSING_HAS_RUN])
|
||||
$1=${$1-"${am_missing_run}$2"}
|
||||
AC_SUBST($1)])
|
||||
|
||||
|
||||
# AM_MISSING_HAS_RUN
|
||||
# ------------------
|
||||
# Define MISSING if not defined so far and test if it supports --run.
|
||||
# If it does, set am_missing_run to use it, otherwise, to nothing.
|
||||
AC_DEFUN([AM_MISSING_HAS_RUN],
|
||||
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
|
||||
AC_REQUIRE_AUX_FILE([missing])dnl
|
||||
if test x"${MISSING+set}" != xset; then
|
||||
case $am_aux_dir in
|
||||
*\ * | *\ *)
|
||||
MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
|
||||
*)
|
||||
MISSING="\${SHELL} $am_aux_dir/missing" ;;
|
||||
esac
|
||||
fi
|
||||
# Use eval to expand $SHELL
|
||||
if eval "$MISSING --run true"; then
|
||||
am_missing_run="$MISSING --run "
|
||||
else
|
||||
am_missing_run=
|
||||
AC_MSG_WARN([`missing' script is too old or missing])
|
||||
fi
|
||||
])
|
||||
|
||||
# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_MKDIR_P
|
||||
# ---------------
|
||||
# Check for `mkdir -p'.
|
||||
AC_DEFUN([AM_PROG_MKDIR_P],
|
||||
[AC_PREREQ([2.60])dnl
|
||||
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
|
||||
dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
|
||||
dnl while keeping a definition of mkdir_p for backward compatibility.
|
||||
dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
|
||||
dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
|
||||
dnl Makefile.ins that do not define MKDIR_P, so we do our own
|
||||
dnl adjustment using top_builddir (which is defined more often than
|
||||
dnl MKDIR_P).
|
||||
AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
|
||||
case $mkdir_p in
|
||||
[[\\/$]]* | ?:[[\\/]]*) ;;
|
||||
*/*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
|
||||
esac
|
||||
])
|
||||
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2001, 2002, 2003, 2005, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4
|
||||
|
||||
# _AM_MANGLE_OPTION(NAME)
|
||||
# -----------------------
|
||||
AC_DEFUN([_AM_MANGLE_OPTION],
|
||||
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
# _AM_SET_OPTION(NAME)
|
||||
# ------------------------------
|
||||
# Set option NAME. Presently that only means defining a flag for this option.
|
||||
AC_DEFUN([_AM_SET_OPTION],
|
||||
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
|
||||
|
||||
# _AM_SET_OPTIONS(OPTIONS)
|
||||
# ----------------------------------
|
||||
# OPTIONS is a space-separated list of Automake options.
|
||||
AC_DEFUN([_AM_SET_OPTIONS],
|
||||
[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
|
||||
|
||||
# _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
|
||||
# -------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
AC_DEFUN([_AM_IF_OPTION],
|
||||
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
|
||||
|
||||
# Check to make sure that the build environment is sane. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
|
||||
# Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 5
|
||||
|
||||
# AM_SANITY_CHECK
|
||||
# ---------------
|
||||
AC_DEFUN([AM_SANITY_CHECK],
|
||||
[AC_MSG_CHECKING([whether build environment is sane])
|
||||
# Just in case
|
||||
sleep 1
|
||||
echo timestamp > conftest.file
|
||||
# Reject unsafe characters in $srcdir or the absolute working directory
|
||||
# name. Accept space and tab only in the latter.
|
||||
am_lf='
|
||||
'
|
||||
case `pwd` in
|
||||
*[[\\\"\#\$\&\'\`$am_lf]]*)
|
||||
AC_MSG_ERROR([unsafe absolute working directory name]);;
|
||||
esac
|
||||
case $srcdir in
|
||||
*[[\\\"\#\$\&\'\`$am_lf\ \ ]]*)
|
||||
AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);;
|
||||
esac
|
||||
|
||||
# Do `set' in a subshell so we don't clobber the current shell's
|
||||
# arguments. Must try -L first in case configure is actually a
|
||||
# symlink; some systems play weird games with the mod time of symlinks
|
||||
# (eg FreeBSD returns the mod time of the symlink's containing
|
||||
# directory).
|
||||
if (
|
||||
set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
|
||||
if test "$[*]" = "X"; then
|
||||
# -L didn't work.
|
||||
set X `ls -t "$srcdir/configure" conftest.file`
|
||||
fi
|
||||
rm -f conftest.file
|
||||
if test "$[*]" != "X $srcdir/configure conftest.file" \
|
||||
&& test "$[*]" != "X conftest.file $srcdir/configure"; then
|
||||
|
||||
# If neither matched, then we have a broken ls. This can happen
|
||||
# if, for instance, CONFIG_SHELL is bash and it inherits a
|
||||
# broken ls alias from the environment. This has actually
|
||||
# happened. Such a system could not be considered "sane".
|
||||
AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken
|
||||
alias in your environment])
|
||||
fi
|
||||
|
||||
test "$[2]" = conftest.file
|
||||
)
|
||||
then
|
||||
# Ok.
|
||||
:
|
||||
else
|
||||
AC_MSG_ERROR([newly created file is older than distributed files!
|
||||
Check your system clock])
|
||||
fi
|
||||
AC_MSG_RESULT(yes)])
|
||||
|
||||
# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# AM_PROG_INSTALL_STRIP
|
||||
# ---------------------
|
||||
# One issue with vendor `install' (even GNU) is that you can't
|
||||
# specify the program used to strip binaries. This is especially
|
||||
# annoying in cross-compiling environments, where the build's strip
|
||||
# is unlikely to handle the host's binaries.
|
||||
# Fortunately install-sh will honor a STRIPPROG variable, so we
|
||||
# always use install-sh in `make install-strip', and initialize
|
||||
# STRIPPROG with the value of the STRIP variable (set by the user).
|
||||
AC_DEFUN([AM_PROG_INSTALL_STRIP],
|
||||
[AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
|
||||
# Installed binaries are usually stripped using `strip' when the user
|
||||
# run `make install-strip'. However `strip' might not be the right
|
||||
# tool to use in cross-compilation environments, therefore Automake
|
||||
# will honor the `STRIP' environment variable to overrule this program.
|
||||
dnl Don't test for $cross_compiling = yes, because it might be `maybe'.
|
||||
if test "$cross_compiling" != no; then
|
||||
AC_CHECK_TOOL([STRIP], [strip], :)
|
||||
fi
|
||||
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
|
||||
AC_SUBST([INSTALL_STRIP_PROGRAM])])
|
||||
|
||||
# Copyright (C) 2006, 2008 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# _AM_SUBST_NOTMAKE(VARIABLE)
|
||||
# ---------------------------
|
||||
# Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
|
||||
# This macro is traced by Automake.
|
||||
AC_DEFUN([_AM_SUBST_NOTMAKE])
|
||||
|
||||
# AM_SUBST_NOTMAKE(VARIABLE)
|
||||
# ---------------------------
|
||||
# Public sister of _AM_SUBST_NOTMAKE.
|
||||
AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
|
||||
|
||||
# Check how to create a tarball. -*- Autoconf -*-
|
||||
|
||||
# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 2
|
||||
|
||||
# _AM_PROG_TAR(FORMAT)
|
||||
# --------------------
|
||||
# Check how to create a tarball in format FORMAT.
|
||||
# FORMAT should be one of `v7', `ustar', or `pax'.
|
||||
#
|
||||
# Substitute a variable $(am__tar) that is a command
|
||||
# writing to stdout a FORMAT-tarball containing the directory
|
||||
# $tardir.
|
||||
# tardir=directory && $(am__tar) > result.tar
|
||||
#
|
||||
# Substitute a variable $(am__untar) that extract such
|
||||
# a tarball read from stdin.
|
||||
# $(am__untar) < result.tar
|
||||
AC_DEFUN([_AM_PROG_TAR],
|
||||
[# Always define AMTAR for backward compatibility.
|
||||
AM_MISSING_PROG([AMTAR], [tar])
|
||||
m4_if([$1], [v7],
|
||||
[am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
|
||||
[m4_case([$1], [ustar],, [pax],,
|
||||
[m4_fatal([Unknown tar format])])
|
||||
AC_MSG_CHECKING([how to create a $1 tar archive])
|
||||
# Loop over all known methods to create a tar archive until one works.
|
||||
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
|
||||
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
|
||||
# Do not fold the above two line into one, because Tru64 sh and
|
||||
# Solaris sh will not grok spaces in the rhs of `-'.
|
||||
for _am_tool in $_am_tools
|
||||
do
|
||||
case $_am_tool in
|
||||
gnutar)
|
||||
for _am_tar in tar gnutar gtar;
|
||||
do
|
||||
AM_RUN_LOG([$_am_tar --version]) && break
|
||||
done
|
||||
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
|
||||
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
|
||||
am__untar="$_am_tar -xf -"
|
||||
;;
|
||||
plaintar)
|
||||
# Must skip GNU tar: if it does not support --format= it doesn't create
|
||||
# ustar tarball either.
|
||||
(tar --version) >/dev/null 2>&1 && continue
|
||||
am__tar='tar chf - "$$tardir"'
|
||||
am__tar_='tar chf - "$tardir"'
|
||||
am__untar='tar xf -'
|
||||
;;
|
||||
pax)
|
||||
am__tar='pax -L -x $1 -w "$$tardir"'
|
||||
am__tar_='pax -L -x $1 -w "$tardir"'
|
||||
am__untar='pax -r'
|
||||
;;
|
||||
cpio)
|
||||
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
|
||||
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
|
||||
am__untar='cpio -i -H $1 -d'
|
||||
;;
|
||||
none)
|
||||
am__tar=false
|
||||
am__tar_=false
|
||||
am__untar=false
|
||||
;;
|
||||
esac
|
||||
|
||||
# If the value was cached, stop now. We just wanted to have am__tar
|
||||
# and am__untar set.
|
||||
test -n "${am_cv_prog_tar_$1}" && break
|
||||
|
||||
# tar/untar a dummy directory, and stop if the command works
|
||||
rm -rf conftest.dir
|
||||
mkdir conftest.dir
|
||||
echo GrepMe > conftest.dir/file
|
||||
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
|
||||
rm -rf conftest.dir
|
||||
if test -s conftest.tar; then
|
||||
AM_RUN_LOG([$am__untar <conftest.tar])
|
||||
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
|
||||
fi
|
||||
done
|
||||
rm -rf conftest.dir
|
||||
|
||||
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
|
||||
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
|
||||
AC_SUBST([am__tar])
|
||||
AC_SUBST([am__untar])
|
||||
]) # _AM_PROG_TAR
|
||||
|
||||
m4_include([m4/libtool.m4])
|
||||
m4_include([m4/ltoptions.m4])
|
||||
m4_include([m4/ltsugar.m4])
|
||||
m4_include([m4/ltversion.m4])
|
||||
m4_include([m4/lt~obsolete.m4])
|
143
src/Onigmo/compile
Normal file
143
src/Onigmo/compile
Normal file
@ -0,0 +1,143 @@
|
||||
#! /bin/sh
|
||||
# Wrapper for compilers which do not understand `-c -o'.
|
||||
|
||||
scriptversion=2009-10-06.20; # UTC
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009 Free Software
|
||||
# Foundation, Inc.
|
||||
# Written by Tom Tromey <tromey@cygnus.com>.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# This file is maintained in Automake, please report
|
||||
# bugs to <bug-automake@gnu.org> or send patches to
|
||||
# <automake-patches@gnu.org>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: compile [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Wrapper for compilers which do not understand `-c -o'.
|
||||
Remove `-o dest.o' from ARGS, run PROGRAM with the remaining
|
||||
arguments, and rename the output as expected.
|
||||
|
||||
If you are trying to build a whole package this is not the
|
||||
right script to run: please start by reading the file `INSTALL'.
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "compile $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
ofile=
|
||||
cfile=
|
||||
eat=
|
||||
|
||||
for arg
|
||||
do
|
||||
if test -n "$eat"; then
|
||||
eat=
|
||||
else
|
||||
case $1 in
|
||||
-o)
|
||||
# configure might choose to run compile as `compile cc -o foo foo.c'.
|
||||
# So we strip `-o arg' only if arg is an object.
|
||||
eat=1
|
||||
case $2 in
|
||||
*.o | *.obj)
|
||||
ofile=$2
|
||||
;;
|
||||
*)
|
||||
set x "$@" -o "$2"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*.c)
|
||||
cfile=$1
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set x "$@" "$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
shift
|
||||
done
|
||||
|
||||
if test -z "$ofile" || test -z "$cfile"; then
|
||||
# If no `-o' option was seen then we might have been invoked from a
|
||||
# pattern rule where we don't need one. That is ok -- this is a
|
||||
# normal compilation that the losing compiler can handle. If no
|
||||
# `.c' file was seen then we are probably linking. That is also
|
||||
# ok.
|
||||
exec "$@"
|
||||
fi
|
||||
|
||||
# Name of file we expect compiler to create.
|
||||
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
|
||||
|
||||
# Create the lock directory.
|
||||
# Note: use `[/\\:.-]' here to ensure that we don't use the same name
|
||||
# that we are using for the .o file. Also, base the name on the expected
|
||||
# object file name, since that is what matters with a parallel build.
|
||||
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
|
||||
while true; do
|
||||
if mkdir "$lockdir" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
# FIXME: race condition here if user kills between mkdir and trap.
|
||||
trap "rmdir '$lockdir'; exit 1" 1 2 15
|
||||
|
||||
# Run the compile.
|
||||
"$@"
|
||||
ret=$?
|
||||
|
||||
if test -f "$cofile"; then
|
||||
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
|
||||
elif test -f "${cofile}bj"; then
|
||||
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
|
||||
fi
|
||||
|
||||
rmdir "$lockdir"
|
||||
exit $ret
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
1502
src/Onigmo/config.guess
vendored
Normal file
1502
src/Onigmo/config.guess
vendored
Normal file
File diff suppressed because it is too large
Load Diff
121
src/Onigmo/config.h.in
Normal file
121
src/Onigmo/config.h.in
Normal file
@ -0,0 +1,121 @@
|
||||
/* config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
|
||||
systems. This function is required for `alloca.c' support on those systems.
|
||||
*/
|
||||
#undef CRAY_STACKSEG_END
|
||||
|
||||
/* Define to 1 if using `alloca.c'. */
|
||||
#undef C_ALLOCA
|
||||
|
||||
/* Define to 1 if you have `alloca', as a function or macro. */
|
||||
#undef HAVE_ALLOCA
|
||||
|
||||
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
|
||||
*/
|
||||
#undef HAVE_ALLOCA_H
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define if compilerr supports prototypes */
|
||||
#undef HAVE_PROTOTYPES
|
||||
|
||||
/* Define if compiler supports stdarg prototypes */
|
||||
#undef HAVE_STDARG_PROTOTYPES
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/times.h> header file. */
|
||||
#undef HAVE_SYS_TIMES_H
|
||||
|
||||
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||
#undef HAVE_SYS_TIME_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||
#undef NO_MINUS_C_MINUS_O
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* The size of `int', as computed by sizeof. */
|
||||
#undef SIZEOF_INT
|
||||
|
||||
/* The size of `long', as computed by sizeof. */
|
||||
#undef SIZEOF_LONG
|
||||
|
||||
/* The size of `short', as computed by sizeof. */
|
||||
#undef SIZEOF_SHORT
|
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be
|
||||
automatically deduced at runtime.
|
||||
STACK_DIRECTION > 0 => grows toward higher addresses
|
||||
STACK_DIRECTION < 0 => grows toward lower addresses
|
||||
STACK_DIRECTION = 0 => direction of growth unknown */
|
||||
#undef STACK_DIRECTION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
|
||||
#undef TIME_WITH_SYS_TIME
|
||||
|
||||
/* Define if combination explosion check */
|
||||
#undef USE_COMBINATION_EXPLOSION_CHECK
|
||||
|
||||
/* Define if enable CR+NL as line terminator */
|
||||
#undef USE_CRNL_AS_LINE_TERMINATOR
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
1714
src/Onigmo/config.sub
vendored
Normal file
1714
src/Onigmo/config.sub
vendored
Normal file
File diff suppressed because it is too large
Load Diff
13803
src/Onigmo/configure
vendored
Normal file
13803
src/Onigmo/configure
vendored
Normal file
File diff suppressed because it is too large
Load Diff
96
src/Onigmo/configure.in
Normal file
96
src/Onigmo/configure.in
Normal file
@ -0,0 +1,96 @@
|
||||
dnl Process this file with autoconf to produce a configure script.
|
||||
AC_INIT(onig, 5.13.5)
|
||||
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
AM_INIT_AUTOMAKE(foreign)
|
||||
AC_CONFIG_HEADER(config.h)
|
||||
|
||||
|
||||
dnl default value for RUBYDIR
|
||||
RUBYDIR=".."
|
||||
AC_ARG_WITH(rubydir,
|
||||
[ --with-rubydir=RUBYDIR specify value for RUBYDIR (default ..)],
|
||||
[ RUBYDIR=$withval ])
|
||||
AC_SUBST(RUBYDIR)
|
||||
|
||||
dnl default value for STATISTICS
|
||||
STATISTICS=""
|
||||
AC_ARG_WITH(statistics,
|
||||
[ --with-statistics take matching time statistical data],
|
||||
[ STATISTICS=-DONIG_DEBUG_STATISTICS ])
|
||||
AC_SUBST(STATISTICS)
|
||||
|
||||
dnl check for COMBINATION_EXPLOSION
|
||||
AC_ARG_ENABLE(combination-explosion-check,
|
||||
[ --enable-combination-explosion-check enable combination explosion check],
|
||||
[comb_expl_check=$enableval])
|
||||
if test "${comb_expl_check}" = yes; then
|
||||
AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check])
|
||||
fi
|
||||
|
||||
dnl check for CRNL_AS_LINE_TERMINATOR
|
||||
AC_ARG_ENABLE(crnl-as-line-terminator,
|
||||
[ --enable-crnl-as-line-terminator enable CR+NL as line terminator],
|
||||
[crnl_as_line_terminator=$enableval])
|
||||
if test "${crnl_as_line_terminator}" = yes; then
|
||||
AC_DEFINE(USE_CRNL_AS_LINE_TERMINATOR,1,[Define if enable CR+NL as line terminator])
|
||||
fi
|
||||
|
||||
|
||||
dnl Checks for programs.
|
||||
AC_PROG_CC
|
||||
AC_PROG_LIBTOOL
|
||||
AM_PROG_CC_C_O
|
||||
LTVERSION="5:0:5"
|
||||
AC_SUBST(LTVERSION)
|
||||
|
||||
AC_PROG_INSTALL
|
||||
AC_PROG_MAKE_SET
|
||||
|
||||
dnl Checks for libraries.
|
||||
|
||||
dnl Checks for header files.
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS(stdlib.h string.h strings.h sys/time.h unistd.h sys/times.h stdint.h)
|
||||
|
||||
dnl Checks for typedefs, structures, and compiler characteristics.
|
||||
AC_CHECK_SIZEOF(int, 4)
|
||||
AC_CHECK_SIZEOF(short, 2)
|
||||
AC_CHECK_SIZEOF(long, 4)
|
||||
AC_C_CONST
|
||||
AC_HEADER_TIME
|
||||
|
||||
dnl Checks for library functions.
|
||||
AC_FUNC_ALLOCA
|
||||
AC_FUNC_MEMCMP
|
||||
|
||||
AC_CACHE_CHECK(for prototypes, _cv_have_prototypes,
|
||||
[AC_TRY_COMPILE([int foo(int x) { return 0; }], [return foo(10);],
|
||||
_cv_have_prototypes=yes,
|
||||
_cv_have_prototypes=no)])
|
||||
if test "$_cv_have_prototypes" = yes; then
|
||||
AC_DEFINE(HAVE_PROTOTYPES,1,[Define if compilerr supports prototypes])
|
||||
fi
|
||||
|
||||
AC_CACHE_CHECK(for variable length prototypes and stdarg.h, _cv_stdarg,
|
||||
[AC_TRY_COMPILE([
|
||||
#include <stdarg.h>
|
||||
int foo(int x, ...) {
|
||||
va_list va;
|
||||
va_start(va, x);
|
||||
va_arg(va, int);
|
||||
va_arg(va, char *);
|
||||
va_arg(va, double);
|
||||
va_end(va);
|
||||
return 0;
|
||||
}
|
||||
], [return foo(10, "", 3.14);],
|
||||
_cv_stdarg=yes,
|
||||
_cv_stdarg=no)])
|
||||
if test "$_cv_stdarg" = yes; then
|
||||
AC_DEFINE(HAVE_STDARG_PROTOTYPES,1,[Define if compiler supports stdarg prototypes])
|
||||
fi
|
||||
|
||||
|
||||
AC_OUTPUT([Makefile onig-config sample/Makefile], [chmod +x onig-config])
|
630
src/Onigmo/depcomp
Normal file
630
src/Onigmo/depcomp
Normal file
@ -0,0 +1,630 @@
|
||||
#! /bin/sh
|
||||
# depcomp - compile a program generating dependencies as side-effects
|
||||
|
||||
scriptversion=2009-04-28.21; # UTC
|
||||
|
||||
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009 Free
|
||||
# Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||
|
||||
case $1 in
|
||||
'')
|
||||
echo "$0: No command. Try \`$0 --help' for more information." 1>&2
|
||||
exit 1;
|
||||
;;
|
||||
-h | --h*)
|
||||
cat <<\EOF
|
||||
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||
|
||||
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||
as side-effects.
|
||||
|
||||
Environment variables:
|
||||
depmode Dependency tracking mode.
|
||||
source Source file read by `PROGRAMS ARGS'.
|
||||
object Object file output by `PROGRAMS ARGS'.
|
||||
DEPDIR directory where to store dependencies.
|
||||
depfile Dependency file to output.
|
||||
tmpdepfile Temporary file to use when outputing dependencies.
|
||||
libtool Whether libtool is used (yes/no).
|
||||
|
||||
Report bugs to <bug-automake@gnu.org>.
|
||||
EOF
|
||||
exit $?
|
||||
;;
|
||||
-v | --v*)
|
||||
echo "depcomp $scriptversion"
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
|
||||
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||
depfile=${depfile-`echo "$object" |
|
||||
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||
|
||||
rm -f "$tmpdepfile"
|
||||
|
||||
# Some modes work just like other modes, but use different flags. We
|
||||
# parameterize here, but still list the modes in the big case below,
|
||||
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||
# here, because this file can only contain one case statement.
|
||||
if test "$depmode" = hp; then
|
||||
# HP compiler uses -M and no extra arg.
|
||||
gccflag=-M
|
||||
depmode=gcc
|
||||
fi
|
||||
|
||||
if test "$depmode" = dashXmstdout; then
|
||||
# This is just like dashmstdout with a different argument.
|
||||
dashmflag=-xM
|
||||
depmode=dashmstdout
|
||||
fi
|
||||
|
||||
cygpath_u="cygpath -u -f -"
|
||||
if test "$depmode" = msvcmsys; then
|
||||
# This is just like msvisualcpp but w/o cygpath translation.
|
||||
# Just convert the backslash-escaped backslashes to single forward
|
||||
# slashes to satisfy depend.m4
|
||||
cygpath_u="sed s,\\\\\\\\,/,g"
|
||||
depmode=msvisualcpp
|
||||
fi
|
||||
|
||||
case "$depmode" in
|
||||
gcc3)
|
||||
## gcc 3 implements dependency tracking that does exactly what
|
||||
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||
## the command line argument order; so add the flags where they
|
||||
## appear in depend2.am. Note that the slowdown incurred here
|
||||
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||
*) set fnord "$@" "$arg" ;;
|
||||
esac
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
done
|
||||
"$@"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
mv "$tmpdepfile" "$depfile"
|
||||
;;
|
||||
|
||||
gcc)
|
||||
## There are various ways to get dependency output from gcc. Here's
|
||||
## why we pick this rather obscure method:
|
||||
## - Don't want to use -MD because we'd like the dependencies to end
|
||||
## up in a subdir. Having to rename by hand is ugly.
|
||||
## (We might end up doing this anyway to support other compilers.)
|
||||
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||
## -MM, not -M (despite what the docs say).
|
||||
## - Using -M directly means running the compiler twice (even worse
|
||||
## than renaming).
|
||||
if test -z "$gccflag"; then
|
||||
gccflag=-MD,
|
||||
fi
|
||||
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||
## The second -e expression handles DOS-style file names with drive letters.
|
||||
sed -e 's/^[^:]*: / /' \
|
||||
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||
## This next piece of magic avoids the `deleted header file' problem.
|
||||
## The problem is that when a header file which appears in a .P file
|
||||
## is deleted, the dependency causes make to die (because there is
|
||||
## typically no way to rebuild the header). We avoid this by adding
|
||||
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||
## this for us directly.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" |
|
||||
## Some versions of gcc put a space before the `:'. On the theory
|
||||
## that the space means something, we add a space to the output as
|
||||
## well.
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
sgi)
|
||||
if test "$libtool" = yes; then
|
||||
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||
else
|
||||
"$@" -MDupdate "$tmpdepfile"
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
|
||||
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||
echo "$object : \\" > "$depfile"
|
||||
|
||||
# Clip off the initial element (the dependent). Don't try to be
|
||||
# clever and replace this with sed code, as IRIX sed won't handle
|
||||
# lines with more than a fixed number of characters (4096 in
|
||||
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||
# the IRIX cc adds comments like `#:fec' to the end of the
|
||||
# dependency line.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||
tr '
|
||||
' ' ' >> "$depfile"
|
||||
echo >> "$depfile"
|
||||
|
||||
# The second pass generates a dummy entry for each header file.
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" \
|
||||
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||
>> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
aix)
|
||||
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||
# in a .u file. In older versions, this file always lives in the
|
||||
# current directory. Also, the AIX compiler puts `$object:' at the
|
||||
# start of each line; $object doesn't have directory information.
|
||||
# Version 6 uses the directory in both cases.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$base.u
|
||||
tmpdepfile3=$dir.libs/$base.u
|
||||
"$@" -Wc,-M
|
||||
else
|
||||
tmpdepfile1=$dir$base.u
|
||||
tmpdepfile2=$dir$base.u
|
||||
tmpdepfile3=$dir$base.u
|
||||
"$@" -M
|
||||
fi
|
||||
stat=$?
|
||||
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
# Each line is of the form `foo.o: dependent.h'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
# That's a tab and a space in the [].
|
||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
# The sourcefile does not contain any dependencies, so just
|
||||
# store a dummy comment line, to avoid errors with the Makefile
|
||||
# "include basename.Plo" scheme.
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
icc)
|
||||
# Intel's C compiler understands `-MD -MF file'. However on
|
||||
# icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||
# ICC 7.0 will fill foo.d with something like
|
||||
# foo.o: sub/foo.c
|
||||
# foo.o: sub/foo.h
|
||||
# which is wrong. We want:
|
||||
# sub/foo.o: sub/foo.c
|
||||
# sub/foo.o: sub/foo.h
|
||||
# sub/foo.c:
|
||||
# sub/foo.h:
|
||||
# ICC 7.1 will output
|
||||
# foo.o: sub/foo.c sub/foo.h
|
||||
# and will wrap long lines using \ :
|
||||
# foo.o: sub/foo.c ... \
|
||||
# sub/foo.h ... \
|
||||
# ...
|
||||
|
||||
"$@" -MD -MF "$tmpdepfile"
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile"
|
||||
exit $stat
|
||||
fi
|
||||
rm -f "$depfile"
|
||||
# Each line is of the form `foo.o: dependent.h',
|
||||
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||
# Do two passes, one to just change these to
|
||||
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
# correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
|
||||
sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
hp2)
|
||||
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||
# compilers, which have integrated preprocessors. The correct option
|
||||
# to use with these is +Maked; it writes dependencies to a file named
|
||||
# 'foo.d', which lands next to the object file, wherever that
|
||||
# happens to be.
|
||||
# Much of this is similar to the tru64 case; see comments there.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
if test "$libtool" = yes; then
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir.libs/$base.d
|
||||
"$@" -Wc,+Maked
|
||||
else
|
||||
tmpdepfile1=$dir$base.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
"$@" +Maked
|
||||
fi
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||
# Add `dependent.h:' lines.
|
||||
sed -ne '2,${
|
||||
s/^ *//
|
||||
s/ \\*$//
|
||||
s/$/:/
|
||||
p
|
||||
}' "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||
;;
|
||||
|
||||
tru64)
|
||||
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||
# effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
|
||||
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||
# dependencies in `foo.d' instead, so we check for that too.
|
||||
# Subdirectories are respected.
|
||||
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||
test "x$dir" = "x$object" && dir=
|
||||
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||
|
||||
if test "$libtool" = yes; then
|
||||
# With Tru64 cc, shared objects can also be used to make a
|
||||
# static library. This mechanism is used in libtool 1.4 series to
|
||||
# handle both shared and static libraries in a single compilation.
|
||||
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
||||
#
|
||||
# With libtool 1.5 this exception was removed, and libtool now
|
||||
# generates 2 separate objects for the 2 libraries. These two
|
||||
# compilations output dependencies in $dir.libs/$base.o.d and
|
||||
# in $dir$base.o.d. We have to check for both files, because
|
||||
# one of the two compilations can be disabled. We should prefer
|
||||
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||
# the former would cause a distcleancheck panic.
|
||||
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
||||
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
||||
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
||||
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||
"$@" -Wc,-MD
|
||||
else
|
||||
tmpdepfile1=$dir$base.o.d
|
||||
tmpdepfile2=$dir$base.d
|
||||
tmpdepfile3=$dir$base.d
|
||||
tmpdepfile4=$dir$base.d
|
||||
"$@" -MD
|
||||
fi
|
||||
|
||||
stat=$?
|
||||
if test $stat -eq 0; then :
|
||||
else
|
||||
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
exit $stat
|
||||
fi
|
||||
|
||||
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||
do
|
||||
test -f "$tmpdepfile" && break
|
||||
done
|
||||
if test -f "$tmpdepfile"; then
|
||||
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||
# That's a tab and a space in the [].
|
||||
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||
else
|
||||
echo "#dummy" > "$depfile"
|
||||
fi
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
#nosideeffect)
|
||||
# This comment above is used by automake to tell side-effect
|
||||
# dependency tracking mechanisms from slower ones.
|
||||
|
||||
dashmstdout)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout, regardless of -o.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove `-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
test -z "$dashmflag" && dashmflag=-M
|
||||
# Require at least two characters before searching for `:'
|
||||
# in the target name. This is to cope with DOS-style filenames:
|
||||
# a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
|
||||
"$@" $dashmflag |
|
||||
sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
tr ' ' '
|
||||
' < "$tmpdepfile" | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
dashXmstdout)
|
||||
# This case only exists to satisfy depend.m4. It is never actually
|
||||
# run, as this mode is specially recognized in the preamble.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
makedepend)
|
||||
"$@" || exit $?
|
||||
# Remove any Libtool call
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
# X makedepend
|
||||
shift
|
||||
cleared=no eat=no
|
||||
for arg
|
||||
do
|
||||
case $cleared in
|
||||
no)
|
||||
set ""; shift
|
||||
cleared=yes ;;
|
||||
esac
|
||||
if test $eat = yes; then
|
||||
eat=no
|
||||
continue
|
||||
fi
|
||||
case "$arg" in
|
||||
-D*|-I*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
# Strip any option that makedepend may not understand. Remove
|
||||
# the object too, otherwise makedepend will parse it as a source file.
|
||||
-arch)
|
||||
eat=yes ;;
|
||||
-*|$object)
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"; shift ;;
|
||||
esac
|
||||
done
|
||||
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||
touch "$tmpdepfile"
|
||||
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||
rm -f "$depfile"
|
||||
cat < "$tmpdepfile" > "$depfile"
|
||||
sed '1,2d' "$tmpdepfile" | tr ' ' '
|
||||
' | \
|
||||
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||
## correctly. Breaking it into two sed invocations is a workaround.
|
||||
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||
;;
|
||||
|
||||
cpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
# Remove `-o $object'.
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case $arg in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift # fnord
|
||||
shift # $arg
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
"$@" -E |
|
||||
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
||||
sed '$ s: \\$::' > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
cat < "$tmpdepfile" >> "$depfile"
|
||||
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvisualcpp)
|
||||
# Important note: in order to support this mode, a compiler *must*
|
||||
# always write the preprocessed file to stdout.
|
||||
"$@" || exit $?
|
||||
|
||||
# Remove the call to Libtool.
|
||||
if test "$libtool" = yes; then
|
||||
while test "X$1" != 'X--mode=compile'; do
|
||||
shift
|
||||
done
|
||||
shift
|
||||
fi
|
||||
|
||||
IFS=" "
|
||||
for arg
|
||||
do
|
||||
case "$arg" in
|
||||
-o)
|
||||
shift
|
||||
;;
|
||||
$object)
|
||||
shift
|
||||
;;
|
||||
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||
set fnord "$@"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
set fnord "$@" "$arg"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
"$@" -E 2>/dev/null |
|
||||
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||
rm -f "$depfile"
|
||||
echo "$object : \\" > "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
|
||||
echo " " >> "$depfile"
|
||||
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||
rm -f "$tmpdepfile"
|
||||
;;
|
||||
|
||||
msvcmsys)
|
||||
# This case exists only to let depend.m4 do its work. It works by
|
||||
# looking at the text of this script. This case will never be run,
|
||||
# since it is checked for above.
|
||||
exit 1
|
||||
;;
|
||||
|
||||
none)
|
||||
exec "$@"
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unknown depmode $depmode" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
620
src/Onigmo/doc/API
Normal file
620
src/Onigmo/doc/API
Normal file
@ -0,0 +1,620 @@
|
||||
Onigmo (Oniguruma-mod) API Version 5.11.4 2011/10/08
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
|
||||
# int onig_init(void)
|
||||
|
||||
Initialize library.
|
||||
|
||||
You don't have to call it explicitly, because it is called in onig_new().
|
||||
|
||||
|
||||
# int onig_error_code_to_str(UChar* err_buf, OnigPosition err_code, ...)
|
||||
|
||||
Get error message string.
|
||||
If this function is used for onig_new(),
|
||||
don't call this after the pattern argument of onig_new() is freed.
|
||||
|
||||
normal return: error message string length
|
||||
|
||||
arguments
|
||||
1 err_buf: error message string buffer.
|
||||
(required size: ONIG_MAX_ERROR_MESSAGE_LEN)
|
||||
2 err_code: error code returned by other API functions.
|
||||
3 err_info (optional): error info returned by onig_new().
|
||||
|
||||
|
||||
# void onig_set_warn_func(OnigWarnFunc func)
|
||||
|
||||
Set warning function.
|
||||
|
||||
WARNING:
|
||||
'[', '-', ']' in character class without escape.
|
||||
']' in pattern without escape.
|
||||
|
||||
arguments
|
||||
1 func: function pointer. void (*func)(char* warning_message)
|
||||
|
||||
|
||||
# void onig_set_verb_warn_func(OnigWarnFunc func)
|
||||
|
||||
Set verbose warning function.
|
||||
|
||||
WARNING:
|
||||
redundant nested repeat operator.
|
||||
|
||||
arguments
|
||||
1 func: function pointer. void (*func)(char* warning_message)
|
||||
|
||||
|
||||
# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
||||
OnigErrorInfo* err_info)
|
||||
|
||||
Create a regex object.
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
arguments
|
||||
1 reg: return regex object's address.
|
||||
2 pattern: regex pattern string.
|
||||
3 pattern_end: terminate address of pattern. (pattern + pattern length)
|
||||
4 option: compile time options.
|
||||
|
||||
ONIG_OPTION_NONE no option
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
|
||||
ONIG_OPTION_DOTALL '.' match with newline
|
||||
ONIG_OPTION_MULTILINE same as ONIG_OPTION_DOTALL
|
||||
ONIG_OPTION_IGNORECASE ambiguity match on
|
||||
ONIG_OPTION_EXTEND extended pattern form
|
||||
ONIG_OPTION_FIND_LONGEST find longest match
|
||||
ONIG_OPTION_FIND_NOT_EMPTY ignore empty match
|
||||
ONIG_OPTION_NEGATE_SINGLELINE
|
||||
clear ONIG_OPTION_SINGLELINE which is enabled on
|
||||
ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
|
||||
ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL58, ONIG_SYNTAX_PERL58_NG,
|
||||
ONIG_SYNTAX_JAVA, ONIG_SYNTAX_PYTHON
|
||||
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP only named group captured.
|
||||
ONIG_OPTION_CAPTURE_GROUP named and no-named group captured.
|
||||
|
||||
ONIG_OPTION_NEWLINE_CRLF
|
||||
Treat CR+LF as a newline too. (default: LF only)
|
||||
To use this option, you must enable the following line in regenc.h.
|
||||
|
||||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
|
||||
5 enc: character encoding.
|
||||
|
||||
ONIG_ENCODING_ASCII ASCII
|
||||
ONIG_ENCODING_ISO_8859_1 ISO 8859-1
|
||||
ONIG_ENCODING_ISO_8859_2 ISO 8859-2
|
||||
ONIG_ENCODING_ISO_8859_3 ISO 8859-3
|
||||
ONIG_ENCODING_ISO_8859_4 ISO 8859-4
|
||||
ONIG_ENCODING_ISO_8859_5 ISO 8859-5
|
||||
ONIG_ENCODING_ISO_8859_6 ISO 8859-6
|
||||
ONIG_ENCODING_ISO_8859_7 ISO 8859-7
|
||||
ONIG_ENCODING_ISO_8859_8 ISO 8859-8
|
||||
ONIG_ENCODING_ISO_8859_9 ISO 8859-9
|
||||
ONIG_ENCODING_ISO_8859_10 ISO 8859-10
|
||||
ONIG_ENCODING_ISO_8859_11 ISO 8859-11
|
||||
ONIG_ENCODING_ISO_8859_13 ISO 8859-13
|
||||
ONIG_ENCODING_ISO_8859_14 ISO 8859-14
|
||||
ONIG_ENCODING_ISO_8859_15 ISO 8859-15
|
||||
ONIG_ENCODING_ISO_8859_16 ISO 8859-16
|
||||
ONIG_ENCODING_UTF8 UTF-8
|
||||
ONIG_ENCODING_UTF16_BE UTF-16BE
|
||||
ONIG_ENCODING_UTF16_LE UTF-16LE
|
||||
ONIG_ENCODING_UTF32_BE UTF-32BE
|
||||
ONIG_ENCODING_UTF32_LE UTF-32LE
|
||||
ONIG_ENCODING_EUC_JP EUC-JP
|
||||
ONIG_ENCODING_EUC_TW EUC-TW
|
||||
ONIG_ENCODING_EUC_KR EUC-KR
|
||||
ONIG_ENCODING_EUC_CN EUC-CN
|
||||
ONIG_ENCODING_SJIS Shift_JIS
|
||||
ONIG_ENCODING_KOI8_R KOI8-R
|
||||
ONIG_ENCODING_CP1251 CP1251
|
||||
ONIG_ENCODING_BIG5 Big5
|
||||
ONIG_ENCODING_GB18030 GB18030
|
||||
|
||||
or any OnigEncodingType data address defined by user.
|
||||
|
||||
6 syntax: address of pattern syntax definition.
|
||||
|
||||
ONIG_SYNTAX_ASIS plain text
|
||||
ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
|
||||
ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
|
||||
ONIG_SYNTAX_EMACS Emacs
|
||||
ONIG_SYNTAX_GREP grep
|
||||
ONIG_SYNTAX_GNU_REGEX GNU regex
|
||||
ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
|
||||
ONIG_SYNTAX_PERL58 Perl 5.8
|
||||
ONIG_SYNTAX_PERL58_NG Perl 5.8 + named group
|
||||
ONIG_SYNTAX_PERL Perl 5.10+
|
||||
ONIG_SYNTAX_PYTHON Python
|
||||
ONIG_SYNTAX_RUBY Ruby
|
||||
ONIG_SYNTAX_DEFAULT default (== Ruby)
|
||||
onig_set_default_syntax()
|
||||
|
||||
or any OnigSyntaxType data address defined by user.
|
||||
|
||||
7 err_info: address for return optional error info.
|
||||
Use this value as 3rd argument of onig_error_code_to_str().
|
||||
|
||||
|
||||
|
||||
# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
||||
const UChar* pattern_end,
|
||||
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
||||
OnigErrorInfo* err_info)
|
||||
|
||||
Create a regex object.
|
||||
reg object area is not allocated in this function.
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
|
||||
|
||||
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
|
||||
Create a regex object.
|
||||
This function is deluxe version of onig_new().
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
arguments
|
||||
1 reg: return address of regex object.
|
||||
2 pattern: regex pattern string.
|
||||
3 pattern_end: terminate address of pattern. (pattern + pattern length)
|
||||
4 ci: compile time info.
|
||||
|
||||
ci->num_of_elements: number of elements in ci. (current version: 5)
|
||||
ci->pattern_enc: pattern string character encoding.
|
||||
ci->target_enc: target string character encoding.
|
||||
ci->syntax: address of pattern syntax definition.
|
||||
ci->option: compile time option.
|
||||
ci->case_fold_flag: character matching case fold bit flag for
|
||||
ONIG_OPTION_IGNORECASE mode.
|
||||
|
||||
ONIGENC_CASE_FOLD_MIN: minimum
|
||||
ONIGENC_CASE_FOLD_DEFAULT: minimum
|
||||
onig_set_default_case_fold_flag()
|
||||
|
||||
5 err_info: address for return optional error info.
|
||||
Use this value as 3rd argument of onig_error_code_to_str().
|
||||
|
||||
|
||||
Different character encoding combination is allowed for
|
||||
the following cases only.
|
||||
|
||||
pattern_enc: ASCII, ISO_8859_1
|
||||
target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
|
||||
|
||||
pattern_enc: UTF16_BE/LE
|
||||
target_enc: UTF16_LE/BE
|
||||
|
||||
pattern_enc: UTF32_BE/LE
|
||||
target_enc: UTF32_LE/BE
|
||||
|
||||
|
||||
# void onig_free(regex_t* reg)
|
||||
|
||||
Free memory used by regex object.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# void onig_free_body(regex_t* reg)
|
||||
|
||||
Free memory used by regex object. (Except reg oneself.)
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# OnigPosition onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* start, const UChar* range, OnigRegion* region,
|
||||
OnigOptionType option)
|
||||
# OnigPosition onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* global_pos,
|
||||
const UChar* start, const UChar* range, OnigRegion* region,
|
||||
OnigOptionType option)
|
||||
|
||||
Search string and return search result and matching region.
|
||||
|
||||
normal return: match position offset (i.e. p - str >= 0)
|
||||
not found: ONIG_MISMATCH (< 0)
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
2 str: target string
|
||||
3 end: terminate address of target string
|
||||
4 global_pos: position of \G
|
||||
if not need, set same value as str.
|
||||
5 start: search start address of target string
|
||||
6 range: search terminate address of target string
|
||||
in forward search (start <= searched string < range)
|
||||
in backward search (range <= searched string <= start)
|
||||
7 region: address for return group match range info (NULL is allowed)
|
||||
8 option: search time option
|
||||
|
||||
ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
|
||||
ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
|
||||
ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] of POSIX API.
|
||||
|
||||
|
||||
# OnigPosition onig_match(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* at, OnigRegion* region, OnigOptionType option)
|
||||
|
||||
Match string and return result and matching region.
|
||||
|
||||
normal return: match length (>= 0)
|
||||
not match: ONIG_MISMATCH ( < 0)
|
||||
|
||||
arguments
|
||||
1 reg: regex object
|
||||
2 str: target string
|
||||
3 end: terminate address of target string
|
||||
4 at: match address of target string
|
||||
5 region: address for return group match range info (NULL is allowed)
|
||||
6 option: search time option
|
||||
|
||||
ONIG_OPTION_NOTBOL string head(str) isn't considered as begin of line
|
||||
ONIG_OPTION_NOTEOL string end (end) isn't considered as end of line
|
||||
ONIG_OPTION_POSIX_REGION region argument is regmatch_t[] type of POSIX API.
|
||||
|
||||
|
||||
# OnigRegion* onig_region_new(void)
|
||||
|
||||
Create a region.
|
||||
|
||||
|
||||
# void onig_region_free(OnigRegion* region, int free_self)
|
||||
|
||||
Free memory used by region.
|
||||
|
||||
arguments
|
||||
1 region: target region
|
||||
2 free_self: [1: free all, 0: free memory used in region but not self]
|
||||
|
||||
|
||||
# void onig_region_copy(OnigRegion* to, OnigRegion* from)
|
||||
|
||||
Copy contents of region.
|
||||
|
||||
arguments
|
||||
1 to: target region
|
||||
2 from: source region
|
||||
|
||||
|
||||
# void onig_region_clear(OnigRegion* region)
|
||||
|
||||
Clear contents of region.
|
||||
|
||||
arguments
|
||||
1 region: target region
|
||||
|
||||
|
||||
# int onig_region_resize(OnigRegion* region, int n)
|
||||
|
||||
Resize group range area of region.
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
arguments
|
||||
1 region: target region
|
||||
2 n: new size
|
||||
|
||||
|
||||
# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
|
||||
int** num_list)
|
||||
|
||||
Return the group number list of the name.
|
||||
Named subexp is defined by (?<name>....).
|
||||
|
||||
normal return: number of groups for the name.
|
||||
(ex. /(?<x>..)(?<x>..)/ ==> 2)
|
||||
name not found: -1
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
2 name: group name.
|
||||
3 name_end: terminate address of group name.
|
||||
4 num_list: return list of group number.
|
||||
|
||||
|
||||
# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
|
||||
OnigRegion *region)
|
||||
|
||||
Return the group number corresponding to the named backref (\k<name>).
|
||||
If two or more regions for the groups of the name are effective,
|
||||
the greatest number in it is obtained.
|
||||
|
||||
normal return: group number.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
2 name: group name.
|
||||
3 name_end: terminate address of group name.
|
||||
4 region: search/match result region.
|
||||
|
||||
|
||||
# int onig_foreach_name(regex_t* reg,
|
||||
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
|
||||
void* arg)
|
||||
|
||||
Iterate function call for all names.
|
||||
|
||||
normal return: 0
|
||||
error: func's return value.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
2 func: callback function.
|
||||
func(name, name_end, <number of groups>, <group number's list>,
|
||||
reg, arg);
|
||||
if func does not return 0, then iteration is stopped.
|
||||
3 arg: argument for func.
|
||||
|
||||
|
||||
# int onig_number_of_names(regex_t* reg)
|
||||
|
||||
Return the number of names defined in the pattern.
|
||||
Multiple definitions of one name is counted as one.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# OnigEncoding onig_get_encoding(regex_t* reg)
|
||||
# OnigOptionType onig_get_options(regex_t* reg)
|
||||
# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
|
||||
# OnigSyntaxType* onig_get_syntax(regex_t* reg)
|
||||
|
||||
Return a value of the regex object.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# int onig_number_of_captures(regex_t* reg)
|
||||
|
||||
Return the number of capture group in the pattern.
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
# int onig_number_of_capture_histories(regex_t* reg)
|
||||
|
||||
Return the number of capture history defined in the pattern.
|
||||
|
||||
You can't use capture history if ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
|
||||
is disabled in the pattern syntax.(disabled in the default syntax)
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
|
||||
|
||||
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
|
||||
|
||||
Return the root node of capture history data tree.
|
||||
|
||||
This value is undefined if matching has faild.
|
||||
|
||||
arguments
|
||||
1 region: matching result.
|
||||
|
||||
|
||||
# int onig_capture_tree_traverse(OnigRegion* region, int at,
|
||||
int(*func)(int,OnigPosition,OnigPosition,int,int,void*),
|
||||
void* arg)
|
||||
|
||||
Traverse and callback in capture history data tree.
|
||||
|
||||
normal return: 0
|
||||
error: callback func's return value.
|
||||
|
||||
arguments
|
||||
1 region: match region data.
|
||||
2 at: callback position.
|
||||
|
||||
ONIG_TRAVERSE_CALLBACK_AT_FIRST: callback first, then traverse children.
|
||||
ONIG_TRAVERSE_CALLBACK_AT_LAST: traverse children first, then callback.
|
||||
ONIG_TRAVERSE_CALLBACK_AT_BOTH: callback first, then traverse children,
|
||||
and at last callback again.
|
||||
|
||||
3 func: callback function.
|
||||
if func does not return 0, then traverse is stopped.
|
||||
|
||||
int func(int group, OnigPosition beg, OnigPosition end,
|
||||
int level, int at, void* arg)
|
||||
|
||||
group: group number
|
||||
beg: capture start position
|
||||
end: capture end position
|
||||
level: nest level (from 0)
|
||||
at: callback position
|
||||
ONIG_TRAVERSE_CALLBACK_AT_FIRST
|
||||
ONIG_TRAVERSE_CALLBACK_AT_LAST
|
||||
arg: optional callback argument
|
||||
|
||||
4 arg; optional callback argument.
|
||||
|
||||
|
||||
# int onig_noname_group_capture_is_active(regex_t* reg)
|
||||
|
||||
Return noname group capture activity.
|
||||
|
||||
active: 1
|
||||
inactive: 0
|
||||
|
||||
arguments
|
||||
1 reg: regex object.
|
||||
|
||||
if option ONIG_OPTION_DONT_CAPTURE_GROUP == ON
|
||||
--> inactive
|
||||
|
||||
if the regex pattern have named group
|
||||
and syntax ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP == ON
|
||||
and option ONIG_OPTION_CAPTURE_GROUP == OFF
|
||||
--> inactive
|
||||
|
||||
else --> active
|
||||
|
||||
|
||||
# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
|
||||
Return previous character head address.
|
||||
|
||||
arguments
|
||||
1 enc: character encoding
|
||||
2 start: string address
|
||||
3 s: target address of string
|
||||
|
||||
|
||||
# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s)
|
||||
|
||||
Return left-adjusted head address of a character.
|
||||
|
||||
arguments
|
||||
1 enc: character encoding
|
||||
2 start: string address
|
||||
3 s: target address of string
|
||||
|
||||
|
||||
# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s)
|
||||
|
||||
Return right-adjusted head address of a character.
|
||||
|
||||
arguments
|
||||
1 enc: character encoding
|
||||
2 start: string address
|
||||
3 s: target address of string
|
||||
|
||||
|
||||
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
|
||||
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
|
||||
|
||||
Return number of characters in the string.
|
||||
|
||||
|
||||
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
|
||||
|
||||
Return number of bytes in the string.
|
||||
|
||||
|
||||
# int onig_set_default_syntax(OnigSyntaxType* syntax)
|
||||
|
||||
Set default syntax.
|
||||
|
||||
arguments
|
||||
1 syntax: address of pattern syntax definition.
|
||||
|
||||
|
||||
# void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
|
||||
|
||||
Copy syntax.
|
||||
|
||||
arguments
|
||||
1 to: destination address.
|
||||
2 from: source address.
|
||||
|
||||
|
||||
# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
|
||||
# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
|
||||
# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
|
||||
# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
|
||||
|
||||
# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
|
||||
# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
|
||||
# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
|
||||
# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
|
||||
|
||||
Get/Set elements of the syntax.
|
||||
|
||||
arguments
|
||||
1 syntax: syntax
|
||||
2 op, op2, behavior, options: value of element.
|
||||
|
||||
|
||||
# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
|
||||
|
||||
Copy encoding.
|
||||
|
||||
arguments
|
||||
1 to: destination address.
|
||||
2 from: source address.
|
||||
|
||||
|
||||
# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
|
||||
OnigCodePoint code)
|
||||
|
||||
Set a variable meta character to the code point value.
|
||||
Except for an escape character, this meta characters specification
|
||||
is not work, if ONIG_SYN_OP_VARIABLE_META_CHARACTERS is not effective
|
||||
by the syntax. (Build-in syntaxes are not effective.)
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
arguments
|
||||
1 syntax: target syntax
|
||||
2 what: specifies which meta character it is.
|
||||
|
||||
ONIG_META_CHAR_ESCAPE
|
||||
ONIG_META_CHAR_ANYCHAR
|
||||
ONIG_META_CHAR_ANYTIME
|
||||
ONIG_META_CHAR_ZERO_OR_ONE_TIME
|
||||
ONIG_META_CHAR_ONE_OR_MORE_TIME
|
||||
ONIG_META_CHAR_ANYCHAR_ANYTIME
|
||||
|
||||
3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
|
||||
|
||||
|
||||
# OnigCaseFoldType onig_get_default_case_fold_flag()
|
||||
|
||||
Get default case fold flag.
|
||||
|
||||
|
||||
# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
||||
|
||||
Set default case fold flag.
|
||||
|
||||
1 case_fold_flag: case fold flag
|
||||
|
||||
|
||||
# unsigned int onig_get_match_stack_limit_size(void)
|
||||
|
||||
Return the maximum number of stack size.
|
||||
(default: 0 == unlimited)
|
||||
|
||||
|
||||
# int onig_set_match_stack_limit_size(unsigned int size)
|
||||
|
||||
Set the maximum number of stack size.
|
||||
(size = 0: unlimited)
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
|
||||
# int onig_end(void)
|
||||
|
||||
The use of this library is finished.
|
||||
|
||||
normal return: ONIG_NORMAL
|
||||
|
||||
It is not allowed to use regex objects which created
|
||||
before onig_end() call.
|
||||
|
||||
|
||||
# const char* onig_version(void)
|
||||
|
||||
Return version string. (ex. "5.0.3")
|
||||
|
||||
// END
|
628
src/Onigmo/doc/API.ja
Normal file
628
src/Onigmo/doc/API.ja
Normal file
@ -0,0 +1,628 @@
|
||||
Onigmo インターフェース Version 5.11.4 2011/10/08
|
||||
|
||||
#include <oniguruma.h>
|
||||
|
||||
|
||||
# int onig_init(void)
|
||||
|
||||
ライブラリの初期化
|
||||
|
||||
onig_new()の中で呼び出されるので、この関数を明示的に呼び出さなくてもよい。
|
||||
|
||||
|
||||
# int onig_error_code_to_str(UChar* err_buf, OnigPosition err_code, ...)
|
||||
|
||||
エラーメッセージを取得する。
|
||||
|
||||
この関数を、onig_new()の結果に対して呼び出す場合には、onig_new()のpattern引数を
|
||||
メモリ解放するよりも前に呼び出さなければならない。
|
||||
|
||||
正常終了戻り値: エラーメッセージ文字列のバイト長
|
||||
|
||||
引数
|
||||
1 err_buf: エラーメッセージを格納する領域
|
||||
(必要なサイズ: ONIG_MAX_ERROR_MESSAGE_LEN)
|
||||
2 err_code: エラーコード
|
||||
3 err_info (optional): onig_new()のerr_info
|
||||
|
||||
|
||||
# void onig_set_warn_func(OnigWarnFunc func)
|
||||
|
||||
警告通知関数をセットする。
|
||||
|
||||
警告:
|
||||
'[', '-', ']' in character class without escape.
|
||||
']' in pattern without escape.
|
||||
|
||||
引数
|
||||
1 func: 警告関数 void (*func)(char* warning_message)
|
||||
|
||||
|
||||
# void onig_set_verb_warn_func(OnigWarnFunc func)
|
||||
|
||||
詳細警告通知関数をセットする。
|
||||
|
||||
詳細警告:
|
||||
redundant nested repeat operator.
|
||||
|
||||
引数
|
||||
1 func: 詳細警告関数 void (*func)(char* warning_message)
|
||||
|
||||
|
||||
# int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
||||
OnigErrorInfo* err_info)
|
||||
|
||||
正規表現オブジェクト(regex)を作成する。
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
引数
|
||||
1 reg: 作成された正規表現オブジェクトを返すアドレス
|
||||
2 pattern: 正規表現パターン文字列
|
||||
3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length)
|
||||
4 option: 正規表現コンパイル時オプション
|
||||
|
||||
ONIG_OPTION_NONE オプションなし
|
||||
ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z'
|
||||
ONIG_OPTION_DOTALL '.'が改行にマッチする
|
||||
ONIG_OPTION_MULTILINE ONIG_OPTION_DOTALLと同じ
|
||||
ONIG_OPTION_IGNORECASE 曖昧マッチ オン
|
||||
ONIG_OPTION_EXTEND パターン拡張形式
|
||||
ONIG_OPTION_FIND_LONGEST 最長マッチ
|
||||
ONIG_OPTION_FIND_NOT_EMPTY 空マッチを無視
|
||||
ONIG_OPTION_NEGATE_SINGLELINE
|
||||
ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED,
|
||||
ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL58, ONIG_SYNTAX_PERL58_NG,
|
||||
ONIG_SYNTAX_JAVA, ONIG_SYNTAX_PYTHONで
|
||||
デフォルトで有効なONIG_OPTION_SINGLELINEをクリアする。
|
||||
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP 名前付き捕獲式集合のみ捕獲
|
||||
ONIG_OPTION_CAPTURE_GROUP 名前無し捕獲式集合も捕獲
|
||||
|
||||
ONIG_OPTION_NEWLINE_CRLF
|
||||
CR+LFも改行として扱う。(デフォルトではLFのみ。)
|
||||
この機能を使うには、regenc.hの以下の行を有効にする必要がある。
|
||||
|
||||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
|
||||
5 enc: 文字エンコーディング
|
||||
|
||||
ONIG_ENCODING_ASCII ASCII
|
||||
ONIG_ENCODING_ISO_8859_1 ISO 8859-1
|
||||
ONIG_ENCODING_ISO_8859_2 ISO 8859-2
|
||||
ONIG_ENCODING_ISO_8859_3 ISO 8859-3
|
||||
ONIG_ENCODING_ISO_8859_4 ISO 8859-4
|
||||
ONIG_ENCODING_ISO_8859_5 ISO 8859-5
|
||||
ONIG_ENCODING_ISO_8859_6 ISO 8859-6
|
||||
ONIG_ENCODING_ISO_8859_7 ISO 8859-7
|
||||
ONIG_ENCODING_ISO_8859_8 ISO 8859-8
|
||||
ONIG_ENCODING_ISO_8859_9 ISO 8859-9
|
||||
ONIG_ENCODING_ISO_8859_10 ISO 8859-10
|
||||
ONIG_ENCODING_ISO_8859_11 ISO 8859-11
|
||||
ONIG_ENCODING_ISO_8859_13 ISO 8859-13
|
||||
ONIG_ENCODING_ISO_8859_14 ISO 8859-14
|
||||
ONIG_ENCODING_ISO_8859_15 ISO 8859-15
|
||||
ONIG_ENCODING_ISO_8859_16 ISO 8859-16
|
||||
ONIG_ENCODING_UTF8 UTF-8
|
||||
ONIG_ENCODING_UTF16_BE UTF-16BE
|
||||
ONIG_ENCODING_UTF16_LE UTF-16LE
|
||||
ONIG_ENCODING_UTF32_BE UTF-32BE
|
||||
ONIG_ENCODING_UTF32_LE UTF-32LE
|
||||
ONIG_ENCODING_EUC_JP EUC-JP
|
||||
ONIG_ENCODING_EUC_TW EUC-TW
|
||||
ONIG_ENCODING_EUC_KR EUC-KR
|
||||
ONIG_ENCODING_EUC_CN EUC-CN
|
||||
ONIG_ENCODING_SJIS Shift_JIS
|
||||
ONIG_ENCODING_KOI8_R KOI8-R
|
||||
ONIG_ENCODING_CP1251 CP1251
|
||||
ONIG_ENCODING_BIG5 Big5
|
||||
ONIG_ENCODING_GB18030 GB18030
|
||||
|
||||
または、ユーザが定義したOnigEncodingTypeデータのアドレス
|
||||
|
||||
6 syntax: 正規表現パターン文法定義
|
||||
|
||||
ONIG_SYNTAX_ASIS plain text
|
||||
ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE
|
||||
ONIG_SYNTAX_POSIX_EXTENDED POSIX Extended RE
|
||||
ONIG_SYNTAX_EMACS Emacs
|
||||
ONIG_SYNTAX_GREP grep
|
||||
ONIG_SYNTAX_GNU_REGEX GNU regex
|
||||
ONIG_SYNTAX_JAVA Java (Sun java.util.regex)
|
||||
ONIG_SYNTAX_PERL58 Perl 5.8
|
||||
ONIG_SYNTAX_PERL58_NG Perl 5.8 + 名前付き捕獲式集合
|
||||
ONIG_SYNTAX_PERL Perl 5.10以降
|
||||
ONIG_SYNTAX_PYTHON Python
|
||||
ONIG_SYNTAX_RUBY Ruby
|
||||
ONIG_SYNTAX_DEFAULT default (== Ruby)
|
||||
onig_set_default_syntax()
|
||||
|
||||
または、ユーザが定義したOnigSyntaxTypeデータのアドレス
|
||||
|
||||
7 err_info: エラー情報を返すためのアドレス
|
||||
onig_error_code_to_str()の三番目の引数として使用する
|
||||
|
||||
|
||||
|
||||
# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
||||
const UChar* pattern_end,
|
||||
OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
||||
OnigErrorInfo* err_info)
|
||||
|
||||
正規表現オブジェクト(regex)を作成する。
|
||||
regの領域を内部で割り当てない。
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
|
||||
|
||||
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
|
||||
正規表現オブジェクト(regex)を作成する。
|
||||
この関数は、onig_new()のデラックス版。
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
引数
|
||||
1 reg: 作成された正規表現オブジェクトを返すアドレス
|
||||
2 pattern: 正規表現パターン文字列
|
||||
3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length)
|
||||
4 ci: コンパイル情報
|
||||
|
||||
ci->num_of_elements: ciの要素数 (現在の版では: 5)
|
||||
ci->pattern_enc: パターン文字列の文字エンコーディング
|
||||
ci->target_enc: 対象文字列の文字エンコーディング
|
||||
ci->syntax: 正規表現パターン文法定義
|
||||
ci->option: 正規表現コンパイル時オプション
|
||||
ci->case_fold_flag: ONIG_OPTION_IGNORECASEモードでの
|
||||
文字曖昧マッチ指定ビットフラグ
|
||||
|
||||
ONIGENC_CASE_FOLD_MIN: 最小
|
||||
ONIGENC_CASE_FOLD_DEFAULT: 最小
|
||||
onig_set_default_case_fold_flag()
|
||||
|
||||
5 err_info: エラー情報を返すためのアドレス
|
||||
onig_error_code_to_str()の三番目の引数として使用する
|
||||
|
||||
|
||||
異なる文字エンコーディングの組み合わせは、以下の場合にのみ許される。
|
||||
|
||||
pattern_enc: ASCII, ISO_8859_1
|
||||
target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE
|
||||
|
||||
pattern_enc: UTF16_BE/LE
|
||||
target_enc: UTF16_LE/BE
|
||||
|
||||
pattern_enc: UTF32_BE/LE
|
||||
target_enc: UTF32_LE/BE
|
||||
|
||||
|
||||
# void onig_free(regex_t* reg)
|
||||
|
||||
正規表現オブジェクトのメモリを解放する。
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
# void onig_free_body(regex_t* reg)
|
||||
|
||||
正規表現オブジェクトのメモリを解放する。(reg自身の領域を除いて)
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
|
||||
# OnigPosition onig_search(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* start, const UChar* range, OnigRegion* region,
|
||||
OnigOptionType option)
|
||||
# OnigPosition onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* global_pos,
|
||||
const UChar* start, const UChar* range, OnigRegion* region,
|
||||
OnigOptionType option)
|
||||
|
||||
正規表現で文字列を検索し、検索結果とマッチ領域を返す。
|
||||
|
||||
正常終了戻り値: マッチ位置 (p - str >= 0)
|
||||
検索失敗: ONIG_MISMATCH (< 0)
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
2 str: 検索対象文字列
|
||||
3 end: 検索対象文字列の終端アドレス
|
||||
4 global_pos: \Gのアドレス
|
||||
不要時はstrと同じ値を設定すること
|
||||
5 start: 検索対象文字列の検索先頭位置アドレス
|
||||
6 range: 検索対象文字列の検索終了位置アドレス
|
||||
前方探索 (start <= 探索される文字列 < range)
|
||||
後方探索 (range <= 探索される文字列 <= start)
|
||||
7 region: マッチ領域情報(region) (NULLも許される)
|
||||
8 option: 検索時オプション
|
||||
|
||||
ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない
|
||||
ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない
|
||||
ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする
|
||||
|
||||
|
||||
# OnigPosition onig_match(regex_t* reg, const UChar* str, const UChar* end,
|
||||
const UChar* at, OnigRegion* region, OnigOptionType option)
|
||||
|
||||
文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。
|
||||
|
||||
正常終了戻り値: マッチしたバイト長 (>= 0)
|
||||
not match: ONIG_MISMATCH ( < 0)
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
2 str: 検索対象文字列
|
||||
3 end: 検索対象文字列の終端アドレス
|
||||
4 at: 検索対象文字列の検索アドレス
|
||||
5 region: マッチ領域情報(region) (NULLも許される)
|
||||
6 option: 検索時オプション
|
||||
|
||||
ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない
|
||||
ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない
|
||||
ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする
|
||||
|
||||
|
||||
# OnigRegion* onig_region_new(void)
|
||||
|
||||
マッチ領域情報(region)を作成する。
|
||||
|
||||
|
||||
# void onig_region_free(OnigRegion* region, int free_self)
|
||||
|
||||
マッチ領域情報(region)で使用されているメモリを解放する。
|
||||
|
||||
引数
|
||||
1 region: マッチ領域情報オブジェクト
|
||||
2 free_self: [1: region自身を含めて全て解放, 0: region自身は解放しない]
|
||||
|
||||
|
||||
# void onig_region_copy(OnigRegion* to, OnigRegion* from)
|
||||
|
||||
マッチ領域情報(region)を複製する。
|
||||
|
||||
引数
|
||||
1 to: 対象領域
|
||||
2 from: 元領域
|
||||
|
||||
|
||||
# void onig_region_clear(OnigRegion* region)
|
||||
|
||||
マッチ領域情報(region)の中味をクリアする。
|
||||
|
||||
引数
|
||||
1 region: 対象領域
|
||||
|
||||
|
||||
# int onig_region_resize(OnigRegion* region, int n)
|
||||
|
||||
マッチ領域情報(region)の捕獲式集合(グループ)数を変更する。
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
引数
|
||||
1 region: 対象領域
|
||||
2 n: 新しいサイズ
|
||||
|
||||
|
||||
# int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end,
|
||||
int** num_list)
|
||||
|
||||
指定した名前に対する名前付き捕獲式集合(グループ)の
|
||||
グループ番号リストを返す。
|
||||
名前付き捕獲式集合は、(?<name>....)によって定義できる。
|
||||
|
||||
正常終了戻り値: 指定された名前に対するグループ数
|
||||
(例 /(?<x>..)(?<x>..)/ ==> 2)
|
||||
名前に対するグループが存在しない: -1
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
2 name: 捕獲式集合(グループ)名
|
||||
3 name_end: 捕獲式集合(グループ)名の終端アドレス
|
||||
4 num_list: 番号リストを返すアドレス
|
||||
|
||||
|
||||
# int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end,
|
||||
OnigRegion *region)
|
||||
|
||||
指定された名前の後方参照(\k<name>)に対する捕獲式集合(グループ)の番号を返す。
|
||||
名前に対して、複数のマッチ領域が有効であれば、その中の最大の番号を返す。
|
||||
名前に対する捕獲式集合が一個しかないときには、対応するマッチ領域が有効か
|
||||
どうかに関係なく、その番号を返す。(従って、regionにはNULLを渡してもよい。)
|
||||
|
||||
正常終了戻り値: 番号
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
2 name: 捕獲式集合(グループ)名
|
||||
3 name_end: 捕獲式集合(グループ)名の終端アドレス
|
||||
4 region: search/match結果のマッチ領域
|
||||
|
||||
|
||||
# int onig_foreach_name(regex_t* reg,
|
||||
int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*),
|
||||
void* arg)
|
||||
|
||||
全ての名前に対してコールバック関数呼び出しを実行する。
|
||||
|
||||
正常終了戻り値: 0
|
||||
エラー: コールバック関数の戻り値
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
2 func: コールバック関数
|
||||
func(name, name_end, <number of groups>, <group number's list>,
|
||||
reg, arg);
|
||||
|
||||
funcが0以外の値を返すと、それ以降のコールバックは行なわずに
|
||||
終了する。
|
||||
|
||||
3 arg: funcに対する追加引数
|
||||
|
||||
|
||||
# int onig_number_of_names(regex_t* reg)
|
||||
|
||||
パターン中で定義された名前の数を返す。
|
||||
一個の名前の多重定義は一個と看做す。
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
# OnigEncoding onig_get_encoding(regex_t* reg)
|
||||
# OnigOptionType onig_get_options(regex_t* reg)
|
||||
# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
|
||||
# OnigSyntaxType* onig_get_syntax(regex_t* reg)
|
||||
|
||||
正規表現オブジェクトに対して、対応する値を返す。
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
# int onig_number_of_captures(regex_t* reg)
|
||||
|
||||
パターン中で定義された捕獲グループの数を返す。
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
# int onig_number_of_capture_histories(regex_t* reg)
|
||||
|
||||
パターン中で定義された捕獲履歴(?@...)の数を返す。
|
||||
|
||||
使用する文法で捕獲履歴機能が有効(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)
|
||||
でなければ、捕獲履歴機能は使用できない。
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
# OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region)
|
||||
|
||||
捕獲履歴データのルートノードを返す。
|
||||
|
||||
マッチが失敗している場合には、この値は不定である。
|
||||
|
||||
引数
|
||||
1 region: マッチ領域
|
||||
|
||||
|
||||
# int onig_capture_tree_traverse(OnigRegion* region, int at,
|
||||
int(*func)(int,OnigPosition,OnigPosition,int,int,void*),
|
||||
void* arg)
|
||||
|
||||
捕獲履歴データ木を巡回してコールバックする。
|
||||
|
||||
正常終了戻り値: 0
|
||||
エラー: コールバック関数の戻り値
|
||||
|
||||
引数
|
||||
1 region: マッチ領域
|
||||
2 at: コールバックを行なうタイミング
|
||||
|
||||
ONIG_TRAVERSE_CALLBACK_AT_FIRST:
|
||||
最初にコールバックして、子ノードを巡回
|
||||
ONIG_TRAVERSE_CALLBACK_AT_LAST:
|
||||
子ノードを巡回して、コールバック
|
||||
ONIG_TRAVERSE_CALLBACK_AT_BOTH:
|
||||
最初にコールバックして、子ノードを巡回、最後にもう一度コールバック
|
||||
|
||||
3 func: コールバック関数
|
||||
funcが0以外の値を返すと、それ以降の巡回は行なわずに
|
||||
終了する。
|
||||
|
||||
int func(int group, OnigPosition beg, OnigPosition end,
|
||||
int level, int at, void* arg)
|
||||
group: グループ番号
|
||||
beg: マッチ開始位置
|
||||
end マッチ終了位置
|
||||
level: ネストレベル (0から)
|
||||
at: コールバックが呼び出されたタイミング
|
||||
ONIG_TRAVERSE_CALLBACK_AT_FIRST
|
||||
ONIG_TRAVERSE_CALLBACK_AT_LAST
|
||||
arg: 追加引数
|
||||
|
||||
4 arg; funcに対する追加引数
|
||||
|
||||
|
||||
# int onig_noname_group_capture_is_active(regex_t* reg)
|
||||
|
||||
名前なし式集合の捕獲機能が有効かどうかを返す。
|
||||
|
||||
有効: 1
|
||||
無効: 0
|
||||
|
||||
引数
|
||||
1 reg: 正規表現オブジェクト
|
||||
|
||||
|
||||
オプションのONIG_OPTION_DONT_CAPTURE_GROUPがON --> 無効
|
||||
|
||||
パターンが名前つき式集合を使用している
|
||||
AND 使用文法で、ONIG_SYN_CAPTURE_ONLY_NAMED_GROUPがON
|
||||
AND オプションのONIG_OPTION_CAPTURE_GROUPがOFF
|
||||
--> 無効
|
||||
|
||||
上記以外の場合 --> 有効
|
||||
|
||||
|
||||
# UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
|
||||
文字一個分前の文字列位置を返す。
|
||||
|
||||
引数
|
||||
1 enc: 文字エンコーディング
|
||||
2 start: 文字列の先頭アドレス
|
||||
3 s: 文字列中の位置
|
||||
|
||||
|
||||
# UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s)
|
||||
|
||||
文字の先頭バイト位置になるように左側に調整したアドレスを返す。
|
||||
|
||||
引数
|
||||
1 enc: 文字エンコーディング
|
||||
2 start: 文字列の先頭アドレス
|
||||
3 s: 文字列中の位置
|
||||
|
||||
|
||||
# UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s)
|
||||
|
||||
文字の先頭バイト位置になるように右側に調整したアドレスを返す。
|
||||
|
||||
引数
|
||||
1 enc: 文字エンコーディング
|
||||
2 start: 文字列の先頭アドレス
|
||||
3 s: 文字列中の位置
|
||||
|
||||
|
||||
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
|
||||
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
|
||||
|
||||
文字列の文字数を返す。
|
||||
|
||||
|
||||
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
|
||||
|
||||
文字列のバイト数を返す。
|
||||
|
||||
|
||||
# int onig_set_default_syntax(OnigSyntaxType* syntax)
|
||||
|
||||
デフォルトの正規表現パターン文法をセットする。
|
||||
|
||||
引数
|
||||
1 syntax: 正規表現パターン文法
|
||||
|
||||
|
||||
# void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
|
||||
|
||||
正規表現パターン文法をコピーする。
|
||||
|
||||
引数
|
||||
1 to: 対象
|
||||
2 from: 元
|
||||
|
||||
|
||||
# unsigned int onig_get_syntax_op(OnigSyntaxType* syntax)
|
||||
# unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax)
|
||||
# unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax)
|
||||
# OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax)
|
||||
|
||||
# void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
|
||||
# void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
|
||||
# void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
|
||||
# void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
|
||||
|
||||
正規表現パターン文法の要素を参照/取得する。
|
||||
|
||||
引数
|
||||
1 syntax: 正規表現パターン文法
|
||||
2 op, op2, behavior, options: 要素の値
|
||||
|
||||
|
||||
# void onig_copy_encoding(OnigEncoding to, OnigOnigEncoding from)
|
||||
|
||||
文字エンコーディングをコピーする。
|
||||
|
||||
引数
|
||||
1 to: 対象
|
||||
2 from: 元
|
||||
|
||||
|
||||
# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
|
||||
OnigCodePoint code)
|
||||
|
||||
メタ文字を指定したコードポイント値にセットする。
|
||||
ONIG_SYN_OP_VARIABLE_META_CHARACTERSが正規表現パターン文法で有効に
|
||||
なっていない場合には、エスケープ文字を除いて、ここで指定したメタ文字は
|
||||
機能しない。(組込みの文法では有効にしていない。)
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
引数
|
||||
1 syntax: 対象文法
|
||||
2 what: メタ文字機能の指定
|
||||
|
||||
ONIG_META_CHAR_ESCAPE
|
||||
ONIG_META_CHAR_ANYCHAR
|
||||
ONIG_META_CHAR_ANYTIME
|
||||
ONIG_META_CHAR_ZERO_OR_ONE_TIME
|
||||
ONIG_META_CHAR_ONE_OR_MORE_TIME
|
||||
ONIG_META_CHAR_ANYCHAR_ANYTIME
|
||||
|
||||
3 code: メタ文字のコードポイント または ONIG_INEFFECTIVE_META_CHAR.
|
||||
|
||||
|
||||
# OnigCaseFoldType onig_get_default_case_fold_flag()
|
||||
|
||||
デフォルトのcase foldフラグを取得する。
|
||||
|
||||
|
||||
# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
||||
|
||||
デフォルトのcase foldフラグをセットする。
|
||||
|
||||
引数
|
||||
1 case_fold_flag: case foldフラグ
|
||||
|
||||
|
||||
# unsigned int onig_get_match_stack_limit_size(void)
|
||||
|
||||
マッチスタックサイズの最大値を返す。
|
||||
(デフォルト: 0 == 無制限)
|
||||
|
||||
|
||||
# int onig_set_match_stack_limit_size(unsigned int size)
|
||||
|
||||
マッチスタックサイズの最大値を指定する。
|
||||
(size = 0: 無制限)
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
|
||||
# int onig_end(void)
|
||||
|
||||
ライブラリの使用を終了する。
|
||||
|
||||
正常終了戻り値: ONIG_NORMAL
|
||||
|
||||
onig_init()を再度呼び出しても、以前に作成した正規表現オブジェクト
|
||||
を使用することはできない。
|
||||
|
||||
|
||||
# const char* onig_version(void)
|
||||
|
||||
バージョン文字列を返す。(例 "5.0.3")
|
||||
|
||||
// END
|
47
src/Onigmo/doc/FAQ
Normal file
47
src/Onigmo/doc/FAQ
Normal file
@ -0,0 +1,47 @@
|
||||
FAQ 2011/09/18
|
||||
|
||||
1. Longest match
|
||||
|
||||
You can execute longest match by using ONIG_OPTION_FIND_LONGEST option
|
||||
in onig_new().
|
||||
|
||||
|
||||
2. Thread safe
|
||||
|
||||
In order to make thread safe, which of (A) or (B) must be done.
|
||||
|
||||
(A) Onigmo Layer
|
||||
|
||||
Define the macro below in oniguruma/regint.h.
|
||||
|
||||
USE_MULTI_THREAD_SYSTEM
|
||||
THREAD_ATOMIC_START
|
||||
THREAD_ATOMIC_END
|
||||
THREAD_PASS
|
||||
|
||||
THREAD_SYSTEM_INIT
|
||||
THREAD_SYSTEM_END
|
||||
|
||||
|
||||
(B) Application Layer
|
||||
|
||||
The plural threads should not do simultaneously that making
|
||||
new regexp objects or re-compiling objects or freeing objects,
|
||||
even if these objects are differ.
|
||||
|
||||
|
||||
3. CR + LF
|
||||
|
||||
DOS newline (CR (0x0c) + LF (0x0a) sequence)
|
||||
|
||||
Enable the following line in regenc.h, and use ONIG_OPTION_NEWLINE_CRLF
|
||||
option in onig_new().
|
||||
|
||||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
|
||||
|
||||
4. Mailing list
|
||||
|
||||
There is no mailing list about Onigmo/Oniguruma.
|
||||
|
||||
// END
|
131
src/Onigmo/doc/FAQ.ja
Normal file
131
src/Onigmo/doc/FAQ.ja
Normal file
@ -0,0 +1,131 @@
|
||||
FAQ 2011/09/18
|
||||
|
||||
1. 最長マッチ
|
||||
|
||||
onig_new()の中で、ONIG_OPTION_FIND_LONGESTオプション
|
||||
を使用すれば最長マッチになる。
|
||||
|
||||
|
||||
2. スレッドセーフ
|
||||
|
||||
スレッドセーフにするには、以下の(A)と(B)のどちらかを行なえば
|
||||
よい。
|
||||
|
||||
(A) Onigmo Layer
|
||||
|
||||
oniguruma/regint.hの中の以下のマクロを定義する。
|
||||
|
||||
USE_MULTI_THREAD_SYSTEM
|
||||
THREAD_ATOMIC_START
|
||||
THREAD_ATOMIC_END
|
||||
THREAD_PASS
|
||||
|
||||
何らかの初期化/終了処理が必要であれば、以下のマクロに定義する。
|
||||
THREAD_SYSTEM_INIT
|
||||
THREAD_SYSTEM_END
|
||||
|
||||
|
||||
(B) Application Layer
|
||||
|
||||
同時に複数のスレッドが、正規表現オブジェクトを作成する、
|
||||
または解放する、ことを行なってはならない。
|
||||
それらのオブジェクトが全く別のものであっても。
|
||||
|
||||
もう少し詳しい説明は、このドキュメントの中の
|
||||
"スレッドセーフに関する補足"に書いておいた。
|
||||
|
||||
|
||||
3. CR + LF
|
||||
|
||||
DOSの改行(CR(0x0c) + LF(0x0a)の連続)
|
||||
|
||||
regenc.hの中の、以下の部分を有効にし、onig_new()で
|
||||
ONIG_OPTION_NEWLINE_CRLFオプションを使用する。
|
||||
|
||||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
|
||||
|
||||
4. メーリングリスト
|
||||
|
||||
鬼雲/鬼車に関するメーリングリストは存在しない。
|
||||
|
||||
//END
|
||||
|
||||
|
||||
|
||||
スレッドセーフに関する補足
|
||||
|
||||
スレッドセーフにするには、個別のアプリケーションの中で行うか、
|
||||
Onigurumaライブラリの中で行うか、どちらかを選ぶことができます。
|
||||
(Onigurumaを使用する側で対処するか、Onigurumaに対処させるか
|
||||
どちらか片方で行う必要があるということです。)
|
||||
|
||||
これらの方法について、以下(A)と(B)で説明します。
|
||||
|
||||
マルチスレッドAPIは、それぞれのプラットフォームによっても
|
||||
異なりますので、以下の説明の中で具体的に何を呼ぶのかを
|
||||
書くことは無理です。実際に使用されるマルチスレッドAPIで、
|
||||
対応する機能のものを指定してください。
|
||||
|
||||
(A) Onigurumaの中で対応する場合
|
||||
|
||||
oniguruma/regint.hの中で以下のマクロを定義して再コンパイルしてください。
|
||||
|
||||
USE_MULTI_THREAD_SYSTEM
|
||||
|
||||
単に有効にすればよいです。
|
||||
|
||||
THREAD_ATOMIC_START
|
||||
THREAD_ATOMIC_END
|
||||
|
||||
THREAD_ATOMIC_STARTからTHREAD_ATOMIC_ENDで囲まれた
|
||||
プログラムのコード部分をあるスレッドが実行中に、他の
|
||||
スレッドに実行権が移動しないことを保障するものに定義
|
||||
してください。
|
||||
(名前の通り、囲まれたコード部分をスレッドアトミックに
|
||||
するという意味)
|
||||
|
||||
THREAD_PASS
|
||||
|
||||
これを実行したスレッドから、他のスレッドに実行権を委譲
|
||||
するものに定義をしてください。(再スケジュールを呼び出す
|
||||
という意味)
|
||||
対応する機能が全くなければ、空定義にしてください。
|
||||
|
||||
(参考例)
|
||||
Rubyの場合を例にすると、
|
||||
Rubyは自分自身で独自のスレッド機能を実装しています。
|
||||
その機能を使用すると、以下のように定義すればよいことに
|
||||
なります。
|
||||
|
||||
#define USE_MULTI_THREAD_SYSTEM
|
||||
#define THREAD_SYSTEM_INIT
|
||||
#define THREAD_SYSTEM_END
|
||||
#define THREAD_ATOMIC_START DEFER_INTS
|
||||
#define THREAD_ATOMIC_END ENABLE_INTS
|
||||
#define THREAD_PASS rb_thread_schedule()
|
||||
|
||||
Rubyの場合、タイマ割り込みを使用して、スレッドの切り替えを
|
||||
行っています。DEFER_INTSは割り込みハンドラの実行を一時的に
|
||||
止めるためのマクロです。ENABLE_INTSマクロで割り込みハンドラ
|
||||
の実行を許可します。
|
||||
これによって、THREAD_ATOMIC_STARTからTHREAD_ATOMIC_END
|
||||
で囲まれた部分の実行中に、他のスレッドに実行権が移動しません。
|
||||
|
||||
|
||||
(B) アプリケーションの中で対応する場合
|
||||
|
||||
以下を保障するように、スレッドの実行を制御してください。
|
||||
|
||||
同時に複数のスレッドが、正規表現オブジェクトを作成する、または解放する、ことを
|
||||
行なってはならない。それらのオブジェクトが全く別のものであっても。
|
||||
|
||||
onig_new(), onig_new_deluxe(), onig_free()のどれかの呼び出しを、
|
||||
複数のスレッドが同時に実行することを避けてください。同時でなければ別にかまいません。
|
||||
|
||||
これは何故必要なのかというと、正規表現オブジェクトを作成する
|
||||
過程で、内部で共通に参照するテーブルがあります。
|
||||
このテーブルに対してのデータ登録処理が複数のスレッドで衝突して
|
||||
異常な状態にならないために必要です。
|
||||
|
||||
// END
|
522
src/Onigmo/doc/RE
Normal file
522
src/Onigmo/doc/RE
Normal file
@ -0,0 +1,522 @@
|
||||
Onigmo (Oniguruma-mod) Regular Expressions Version 5.13.0 2012/01/19
|
||||
|
||||
syntax: ONIG_SYNTAX_RUBY (default)
|
||||
|
||||
|
||||
1. Syntax elements
|
||||
|
||||
\ escape (enable or disable meta character meaning)
|
||||
| alternation
|
||||
(...) group
|
||||
[...] character class
|
||||
|
||||
|
||||
2. Characters
|
||||
|
||||
\t horizontal tab (0x09)
|
||||
\v vertical tab (0x0B)
|
||||
\n newline (0x0A)
|
||||
\r return (0x0D)
|
||||
\b back space (0x08)
|
||||
\f form feed (0x0C)
|
||||
\a bell (0x07)
|
||||
\e escape (0x1B)
|
||||
\nnn octal char (encoded byte value)
|
||||
\xHH hexadecimal char (encoded byte value)
|
||||
\x{7HHHHHHH} wide hexadecimal char (character code point value)
|
||||
\cx control char (character code point value)
|
||||
\C-x control char (character code point value)
|
||||
\M-x meta (x|0x80) (character code point value)
|
||||
\M-\C-x meta control char (character code point value)
|
||||
|
||||
(* \b is effective in character class [...] only)
|
||||
|
||||
|
||||
3. Character types
|
||||
|
||||
. any character (except newline)
|
||||
|
||||
\w word character
|
||||
|
||||
Not Unicode:
|
||||
alphanumeric and "_".
|
||||
|
||||
Unicode:
|
||||
General_Category -- (Letter|Mark|Number|Connector_Punctuation)
|
||||
|
||||
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
|
||||
includes or not.
|
||||
|
||||
\W non word char
|
||||
|
||||
\s whitespace char
|
||||
|
||||
Not Unicode:
|
||||
\t, \n, \v, \f, \r, \x20
|
||||
|
||||
Unicode:
|
||||
0009, 000A, 000B, 000C, 000D, 0085(NEL),
|
||||
General_Category -- Line_Separator
|
||||
-- Paragraph_Separator
|
||||
-- Space_Separator
|
||||
|
||||
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
|
||||
includes or not.
|
||||
|
||||
\S non whitespace char
|
||||
|
||||
\d decimal digit char
|
||||
|
||||
Unicode: General_Category -- Decimal_Number
|
||||
|
||||
It depends on ONIG_OPTION_ASCII_RANGE option that non-ASCII char
|
||||
includes or not.
|
||||
|
||||
\D non decimal digit char
|
||||
|
||||
\h hexadecimal digit char [0-9a-fA-F]
|
||||
|
||||
\H non hexadecimal digit char
|
||||
|
||||
|
||||
Character Property
|
||||
|
||||
* \p{property-name}
|
||||
* \p{^property-name} (negative)
|
||||
* \P{property-name} (negative)
|
||||
|
||||
property-name:
|
||||
|
||||
+ works on all encodings
|
||||
Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
|
||||
Print, Punct, Space, Upper, XDigit, Word, ASCII,
|
||||
|
||||
+ works on EUC_JP, Shift_JIS, CP932
|
||||
Hiragana, Katakana, Han, Latin, Greek, Cyrillic
|
||||
|
||||
+ works on UTF8, UTF16, UTF32
|
||||
see UnicodeProps.txt
|
||||
|
||||
|
||||
\R Linebreak
|
||||
|
||||
Unicode:
|
||||
(?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}])
|
||||
|
||||
Not Unicode:
|
||||
(?>\x0D\x0A|[\x0A-\x0D])
|
||||
|
||||
\X eXtended grapheme cluster
|
||||
|
||||
Unicode:
|
||||
(?>\P{M}\p{M}*)
|
||||
|
||||
Not Unicode:
|
||||
(?m:.)
|
||||
|
||||
|
||||
|
||||
4. Quantifier
|
||||
|
||||
greedy
|
||||
|
||||
? 1 or 0 times
|
||||
* 0 or more times
|
||||
+ 1 or more times
|
||||
{n,m} at least n but not more than m times
|
||||
{n,} at least n times
|
||||
{,n} at least 0 but not more than n times ({0,n})
|
||||
{n} n times
|
||||
|
||||
reluctant
|
||||
|
||||
?? 1 or 0 times
|
||||
*? 0 or more times
|
||||
+? 1 or more times
|
||||
{n,m}? at least n but not more than m times
|
||||
{n,}? at least n times
|
||||
{,n}? at least 0 but not more than n times (== {0,n}?)
|
||||
|
||||
possessive (greedy and does not backtrack after repeated)
|
||||
|
||||
?+ 1 or 0 times
|
||||
*+ 0 or more times
|
||||
++ 1 or more times
|
||||
|
||||
({n,m}+, {n,}+, {n}+ are possessive op. in ONIG_SYNTAX_JAVA and
|
||||
ONIG_SYNTAX_PERL only)
|
||||
|
||||
ex. /a*+/ === /(?>a*)/
|
||||
|
||||
|
||||
5. Anchors
|
||||
|
||||
^ beginning of the line
|
||||
$ end of the line
|
||||
\b word boundary
|
||||
\B not word boundary
|
||||
\A beginning of string
|
||||
\Z end of string, or before newline at the end
|
||||
\z end of string
|
||||
\G matching start position
|
||||
|
||||
|
||||
6. Character class
|
||||
|
||||
^... negative class (lowest precedence operator)
|
||||
x-y range from x to y
|
||||
[...] set (character class in character class)
|
||||
..&&.. intersection (low precedence at the next of ^)
|
||||
|
||||
ex. [a-w&&[^c-g]z] ==> ([a-w] AND ([^c-g] OR z)) ==> [abh-w]
|
||||
|
||||
* If you want to use '[', '-', ']' as a normal character
|
||||
in a character class, you should escape these characters by '\'.
|
||||
|
||||
|
||||
POSIX bracket ([:xxxxx:], negate [:^xxxxx:])
|
||||
|
||||
Not Unicode Case:
|
||||
|
||||
alnum alphabet or digit char
|
||||
alpha alphabet
|
||||
ascii code value: [0 - 127]
|
||||
blank \t, \x20
|
||||
cntrl
|
||||
digit 0-9
|
||||
graph \x21-\x7E and all of multibyte encoded characters
|
||||
lower
|
||||
print \x20-\x7E and all of multibyte encoded characters
|
||||
punct
|
||||
space \t, \n, \v, \f, \r, \x20
|
||||
upper
|
||||
xdigit 0-9, a-f, A-F
|
||||
word alphanumeric, "_" and multibyte characters
|
||||
|
||||
|
||||
Unicode Case:
|
||||
|
||||
alnum Letter | Mark | Decimal_Number
|
||||
alpha Letter | Mark
|
||||
ascii 0000 - 007F
|
||||
blank Space_Separator | 0009
|
||||
cntrl Control | Format | Unassigned | Private_Use | Surrogate
|
||||
digit Decimal_Number
|
||||
graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
|
||||
lower Lowercase_Letter
|
||||
print [[:graph:]] | Space_Separator
|
||||
punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
|
||||
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
|
||||
Open_Punctuation
|
||||
space Space_Separator | Line_Separator | Paragraph_Separator |
|
||||
0009 | 000A | 000B | 000C | 000D | 0085
|
||||
upper Uppercase_Letter
|
||||
xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
|
||||
(0-9, a-f, A-F)
|
||||
word Letter | Mark | Decimal_Number | Connector_Punctuation
|
||||
|
||||
|
||||
It depends on ONIG_OPTION_ASCII_RANGE option and
|
||||
ONIG_OPTION_POSIX_BRACKET_ALL_RANGE option that POSIX brackets
|
||||
match non-ASCII char or not.
|
||||
|
||||
|
||||
|
||||
7. Extended groups
|
||||
|
||||
(?#...) comment
|
||||
|
||||
(?imxdau-imx) option on/off
|
||||
i: ignore case
|
||||
m: multi-line (dot(.) match newline)
|
||||
x: extended form
|
||||
|
||||
character set option (character range option)
|
||||
d: Default (compatible with Ruby 1.9.3)
|
||||
\w, \d and \s doesn't match non-ASCII characters.
|
||||
\b, \B and POSIX brackets use the each encoding's
|
||||
rules.
|
||||
a: ASCII
|
||||
ONIG_OPTION_ASCII_RANGE option is turned on.
|
||||
\w, \d, \s and POSIX brackets doesn't match
|
||||
non-ASCII characters.
|
||||
\b and \B use the ASCII rules.
|
||||
u: Unicode
|
||||
ONIG_OPTION_ASCII_RANGE option is turned off.
|
||||
\w (\W), \d (\D), \s (\S), \b (\B) and POSIX
|
||||
brackets use the each encoding's rules.
|
||||
|
||||
(?imxdau-imx:subexp)
|
||||
option on/off for subexp
|
||||
|
||||
(?:subexp) not captured group
|
||||
(subexp) captured group
|
||||
|
||||
(?=subexp) look-ahead
|
||||
(?!subexp) negative look-ahead
|
||||
(?<=subexp) look-behind
|
||||
(?<!subexp) negative look-behind
|
||||
|
||||
Subexp of look-behind must be fixed character length.
|
||||
But different character length is allowed in top level
|
||||
alternatives only.
|
||||
ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed.
|
||||
|
||||
In negative-look-behind, captured group isn't allowed,
|
||||
but shy group(?:) is allowed.
|
||||
|
||||
\K keep
|
||||
Another expression of look-behind. Keep the stuff left
|
||||
of the \K, don't include it in the result.
|
||||
|
||||
(?>subexp) atomic group
|
||||
don't backtrack in subexp.
|
||||
|
||||
(?<name>subexp), (?'name'subexp)
|
||||
define named group
|
||||
(All characters of the name must be a word character.)
|
||||
|
||||
Not only a name but a number is assigned like a captured
|
||||
group.
|
||||
|
||||
Assigning the same name as two or more subexps is allowed.
|
||||
In this case, a subexp call can not be performed although
|
||||
the back reference is possible.
|
||||
(ONIG_SYNTAX_PERL: a subexp call is allowed in this case.)
|
||||
|
||||
(?(cond)yes-subexp), (?(cond)yes-subexp|no-subexp)
|
||||
conditional expression
|
||||
Matches yes-subexp if (cond) yields a true value, matches
|
||||
no-subexp otherwise.
|
||||
Following (cond) can be used:
|
||||
|
||||
(n) (n >= 1)
|
||||
Checks if the numbered capturing group has matched
|
||||
something.
|
||||
|
||||
(<name>), ('name')
|
||||
Checks if a group with the given name has matched
|
||||
something.
|
||||
|
||||
|
||||
8. Back reference
|
||||
|
||||
\n back reference by group number (n >= 1)
|
||||
\k<n> back reference by group number (n >= 1)
|
||||
\k'n' back reference by group number (n >= 1)
|
||||
\k<-n> back reference by relative group number (n >= 1)
|
||||
\k'-n' back reference by relative group number (n >= 1)
|
||||
\k<name> back reference by group name
|
||||
\k'name' back reference by group name
|
||||
|
||||
In the back reference by the multiplex definition name,
|
||||
a subexp with a large number is referred to preferentially.
|
||||
(When not matched, a group of the small number is referred to.)
|
||||
|
||||
* Back reference by group number is forbidden if named group is defined
|
||||
in the pattern and ONIG_OPTION_CAPTURE_GROUP is not set.
|
||||
|
||||
* ONIG_SYNTAX_PERL: \g{n}, \g{-n} and \g{name} can also be used.
|
||||
|
||||
|
||||
back reference with nest level
|
||||
|
||||
level: 0, 1, 2, ...
|
||||
|
||||
\k<n+level> (n >= 1)
|
||||
\k<n-level> (n >= 1)
|
||||
\k'n+level' (n >= 1)
|
||||
\k'n-level' (n >= 1)
|
||||
\k<-n+level> (n >= 1)
|
||||
\k<-n-level> (n >= 1)
|
||||
\k'-n+level' (n >= 1)
|
||||
\k'-n-level' (n >= 1)
|
||||
|
||||
\k<name+level>
|
||||
\k<name-level>
|
||||
\k'name+level'
|
||||
\k'name-level'
|
||||
|
||||
Destinate relative nest level from back reference position.
|
||||
|
||||
ex 1.
|
||||
|
||||
/\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
|
||||
|
||||
ex 2.
|
||||
|
||||
r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
|
||||
(?<element> \g<stag> \g<content>* \g<etag> ){0}
|
||||
(?<stag> < \g<name> \s* > ){0}
|
||||
(?<name> [a-zA-Z_:]+ ){0}
|
||||
(?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
|
||||
(?<etag> </ \k<name+1> >){0}
|
||||
\g<element>
|
||||
__REGEXP__
|
||||
|
||||
p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
|
||||
|
||||
|
||||
|
||||
9. Subexp call ("Tanaka Akira special")
|
||||
|
||||
\g<name> call by group name
|
||||
\g'name' call by group name
|
||||
\g<n> call by group number (n >= 1)
|
||||
\g'n' call by group number (n >= 1)
|
||||
\g<0> call the whole pattern recursively
|
||||
\g'0' call the whole pattern recursively
|
||||
\g<-n> call by relative group number (n >= 1)
|
||||
\g'-n' call by relative group number (n >= 1)
|
||||
\g<+n> call by relative group number (n >= 1)
|
||||
\g'+n' call by relative group number (n >= 1)
|
||||
|
||||
* left-most recursive call is not allowed.
|
||||
ex. (?<name>a|\g<name>b) => error
|
||||
(?<name>a|b\g<name>c) => OK
|
||||
|
||||
* Call by group number is forbidden if named group is defined in the pattern
|
||||
and ONIG_OPTION_CAPTURE_GROUP is not set.
|
||||
|
||||
* If the option status of called group is different from calling position
|
||||
then the group's option is effective.
|
||||
|
||||
ex. (?-i:\g<name>)(?i:(?<name>a)){0} match to "A"
|
||||
|
||||
* ONIG_SYNTAX_PERL: use (?&name), (?n), (?-n), (?+n), (?R) or (?0) instead.
|
||||
|
||||
|
||||
10. Captured group
|
||||
|
||||
Behavior of the no-named group (...) changes with the following conditions.
|
||||
(But named group is not changed.)
|
||||
|
||||
case 1. /.../ (named group is not used, no option)
|
||||
|
||||
(...) is treated as a captured group.
|
||||
|
||||
case 2. /.../g (named group is not used, 'g' option)
|
||||
|
||||
(...) is treated as a no-captured group (?:...).
|
||||
|
||||
case 3. /..(?<name>..)../ (named group is used, no option)
|
||||
|
||||
(...) is treated as a no-captured group (?:...).
|
||||
numbered-backref/call is not allowed.
|
||||
|
||||
case 4. /..(?<name>..)../G (named group is used, 'G' option)
|
||||
|
||||
(...) is treated as a captured group.
|
||||
numbered-backref/call is allowed.
|
||||
|
||||
where
|
||||
g: ONIG_OPTION_DONT_CAPTURE_GROUP
|
||||
G: ONIG_OPTION_CAPTURE_GROUP
|
||||
|
||||
('g' and 'G' options are argued in ruby-dev ML)
|
||||
|
||||
|
||||
|
||||
-----------------------------
|
||||
A-1. Syntax depend options
|
||||
|
||||
+ ONIG_SYNTAX_RUBY
|
||||
(?m): dot(.) match newline
|
||||
|
||||
+ ONIG_SYNTAX_PERL, ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PYTHON
|
||||
(?s): dot(.) match newline
|
||||
(?m): ^ match after newline, $ match before newline
|
||||
|
||||
+ ONIG_SYNTAX_PERL
|
||||
(?d), (?l): same as (?u)
|
||||
|
||||
|
||||
A-2. Original extensions
|
||||
|
||||
+ hexadecimal digit char type \h, \H
|
||||
+ named group (?<name>...), (?'name'...)
|
||||
+ named backref \k<name>
|
||||
+ subexp call \g<name>, \g<group-num>
|
||||
|
||||
|
||||
A-3. Lacked features compare with perl 5.14.0
|
||||
|
||||
+ \N{name}, \N{U+xxxx}, \N
|
||||
+ \l,\u,\L,\U, \C
|
||||
+ \v, \V, \h, \H, \o{xxx}
|
||||
+ (?{code})
|
||||
+ (??{code})
|
||||
+ (?|...)
|
||||
+ (*VERB:ARG)
|
||||
|
||||
* \Q...\E
|
||||
This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
|
||||
|
||||
|
||||
A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
|
||||
|
||||
+ add character property (\p{property}, \P{property})
|
||||
+ add hexadecimal digit char type (\h, \H)
|
||||
+ add look-behind
|
||||
(?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
|
||||
+ add possessive quantifier. ?+, *+, ++
|
||||
+ add operations in character class. [], &&
|
||||
('[' must be escaped as an usual char in character class.)
|
||||
+ add named group and subexp call.
|
||||
+ octal or hexadecimal number sequence can be treated as
|
||||
a multibyte code char in character class if multibyte encoding
|
||||
is specified.
|
||||
(ex. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
|
||||
+ allow the range of single byte char and multibyte char in character
|
||||
class.
|
||||
ex. /[a-<<any EUC-JP character>>]/ in EUC-JP encoding.
|
||||
+ effect range of isolated option is to next ')'.
|
||||
ex. (?:(?i)a|b) is interpreted as (?:(?i:a|b)), not (?:(?i:a)|b).
|
||||
+ isolated option is not transparent to previous pattern.
|
||||
ex. a(?i)* is a syntax error pattern.
|
||||
+ allowed incomplete left brace as an usual string.
|
||||
ex. /{/, /({)/, /a{2,3/ etc...
|
||||
+ negative POSIX bracket [:^xxxx:] is supported.
|
||||
+ POSIX bracket [:ascii:] is added.
|
||||
+ repeat of look-ahead is not allowed.
|
||||
ex. /(?=a)*/, /(?!b){5}/
|
||||
+ Ignore case option is effective to numbered character.
|
||||
ex. /\x61/i =~ "A"
|
||||
+ In the range quantifier, the number of the minimum is omissible.
|
||||
/a{,n}/ == /a{0,n}/
|
||||
The simultaneous abbreviation of the number of times of the minimum
|
||||
and the maximum is not allowed. (/a{,}/)
|
||||
+ /a{n}?/ is not a non-greedy operator.
|
||||
/a{n}?/ == /(?:a{n})?/
|
||||
+ invalid back reference is checked and cause error.
|
||||
/\1/, /(a)\2/
|
||||
+ Zero-length match in infinite repeat stops the repeat,
|
||||
then changes of the capture group status are checked as stop condition.
|
||||
/(?:()|())*\1\2/ =~ ""
|
||||
/(?:\1a|())*/ =~ "a"
|
||||
|
||||
|
||||
A-5. Disabled functions by default syntax
|
||||
|
||||
+ capture history
|
||||
|
||||
(?@...) and (?@<name>...)
|
||||
|
||||
ex. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
|
||||
|
||||
see sample/listcap.c file.
|
||||
|
||||
|
||||
A-6. Problems
|
||||
|
||||
+ Invalid encoding byte sequence is not checked.
|
||||
|
||||
ex. UTF-8
|
||||
|
||||
* Invalid first byte is treated as a character.
|
||||
/./u =~ "\xa3"
|
||||
|
||||
* Incomplete byte sequence is not checked.
|
||||
/\w+/ =~ "a\xf3\x8ec"
|
||||
|
||||
// END
|
532
src/Onigmo/doc/RE.ja
Normal file
532
src/Onigmo/doc/RE.ja
Normal file
@ -0,0 +1,532 @@
|
||||
鬼雲 (鬼車改) 正規表現 Version 5.13.0 2012/01/19
|
||||
|
||||
使用文法: ONIG_SYNTAX_RUBY (既定値)
|
||||
|
||||
|
||||
1. 基本要素
|
||||
|
||||
\ 退避修飾 (エスケープ) 正規表現記号の有効/無効の制御
|
||||
| 選択子
|
||||
(...) 式集合 (グループ)
|
||||
[...] 文字集合 (文字クラス)
|
||||
|
||||
|
||||
2. 文字
|
||||
|
||||
\t 水平タブ (0x09)
|
||||
\v 垂直タブ (0x0B)
|
||||
\n 改行 (0x0A)
|
||||
\r 復帰 (0x0D)
|
||||
\b 後退空白 (0x08)
|
||||
\f 改頁 (0x0C)
|
||||
\a 鐘 (0x07)
|
||||
\e 退避修飾 (0x1B)
|
||||
\nnn 八進数表現 符号化バイト値(の一部)
|
||||
\xHH 十六進数表現 符号化バイト値(の一部)
|
||||
\x{7HHHHHHH} 拡張十六進数表現 コードポイント値
|
||||
\cx 制御文字表現 コードポイント値
|
||||
\C-x 制御文字表現 コードポイント値
|
||||
\M-x 超 (x|0x80) コードポイント値
|
||||
\M-\C-x 超 + 制御文字表現 コードポイント値
|
||||
|
||||
※ \bは、文字集合内でのみ有効
|
||||
|
||||
|
||||
3. 文字種
|
||||
|
||||
. 任意文字 (改行を除く)
|
||||
|
||||
\w 単語構成文字
|
||||
|
||||
Unicode以外の場合:
|
||||
英数字 および "_"。
|
||||
|
||||
Unicodeの場合:
|
||||
General_Category -- (Letter|Mark|Number|Connector_Punctuation)
|
||||
|
||||
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
|
||||
依存する。
|
||||
|
||||
\W 非単語構成文字
|
||||
|
||||
\s 空白文字
|
||||
|
||||
Unicode以外の場合:
|
||||
\t, \n, \v, \f, \r, \x20
|
||||
|
||||
Unicodeの場合:
|
||||
0009, 000A, 000B, 000C, 000D, 0085(NEL),
|
||||
General_Category -- Line_Separator
|
||||
-- Paragraph_Separator
|
||||
-- Space_Separator
|
||||
|
||||
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
|
||||
依存する。
|
||||
|
||||
\S 非空白文字
|
||||
|
||||
\d 10進数字
|
||||
|
||||
Unicodeの場合: General_Category -- Decimal_Number
|
||||
|
||||
ASCII外の文字を含むかどうかは ONIG_OPTION_ASCII_RANGE オプションに
|
||||
依存する。
|
||||
|
||||
\D 非10進数字
|
||||
|
||||
\h 16進数字 [0-9a-fA-F]
|
||||
|
||||
\H 非16進数字
|
||||
|
||||
|
||||
Character Property
|
||||
|
||||
* \p{property-name}
|
||||
* \p{^property-name} (negative)
|
||||
* \P{property-name} (negative)
|
||||
|
||||
property-name:
|
||||
|
||||
+ 全てのエンコーディングで有効
|
||||
Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
|
||||
Print, Punct, Space, Upper, XDigit, Word, ASCII,
|
||||
|
||||
+ EUC-JP, Shift_JIS, CP932で有効
|
||||
Hiragana, Katakana, Han, Latin, Greek, Cyrillic
|
||||
|
||||
+ UTF8, UTF16, UTF32で有効
|
||||
UnicodeProps.txt 参照
|
||||
|
||||
|
||||
\R 改行文字 (Linebreak)
|
||||
|
||||
Unicodeの場合:
|
||||
(?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}])
|
||||
|
||||
Unicode以外の場合:
|
||||
(?>\x0D\x0A|[\x0A-\x0D])
|
||||
|
||||
\X eXtended grapheme cluster
|
||||
|
||||
Unicodeの場合:
|
||||
(?>\P{M}\p{M}*)
|
||||
|
||||
Unicode以外の場合:
|
||||
(?m:.)
|
||||
|
||||
|
||||
|
||||
4. 量指定子
|
||||
|
||||
欲張り
|
||||
|
||||
? 一回または零回
|
||||
* 零回以上
|
||||
+ 一回以上
|
||||
{n,m} n回以上m回以下
|
||||
{n,} n回以上
|
||||
{,n} 零回以上n回以下 ({0,n})
|
||||
{n} n回
|
||||
|
||||
無欲
|
||||
|
||||
?? 一回または零回
|
||||
*? 零回以上
|
||||
+? 一回以上
|
||||
{n,m}? n回以上m回以下
|
||||
{n,}? n回以上
|
||||
{,n}? 零回以上n回以下 (== {0,n}?)
|
||||
|
||||
強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない)
|
||||
|
||||
?+ 一回または零回
|
||||
*+ 零回以上
|
||||
++ 一回以上
|
||||
|
||||
({n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ強欲な
|
||||
指定子)
|
||||
|
||||
例. /a*+/ === /(?>a*)/
|
||||
|
||||
|
||||
5. 錨
|
||||
|
||||
^ 行頭
|
||||
$ 行末
|
||||
\b 単語境界
|
||||
\B 非単語境界
|
||||
\A 文字列先頭
|
||||
\Z 文字列末尾、または文字列末尾の改行の直前
|
||||
\z 文字列末尾
|
||||
\G 照合開始位置
|
||||
|
||||
|
||||
6. 文字集合
|
||||
|
||||
^... 否定 (最低優先度演算子)
|
||||
x-y 範囲 (xからyまで)
|
||||
[...] 集合 (文字集合内文字集合)
|
||||
..&&.. 積演算 (^の次に優先度が低い演算子)
|
||||
|
||||
例. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w]
|
||||
|
||||
※ '[', '-', ']'を、文字集合内で通常文字の意味で使用したい場合には、
|
||||
これらの文字を'\'で退避修飾しなければならない。
|
||||
|
||||
|
||||
POSIXブラケット ([:xxxxx:], 否定 [:^xxxxx:])
|
||||
|
||||
Unicode以外の場合:
|
||||
|
||||
alnum 英数字
|
||||
alpha 英字
|
||||
ascii 0 - 127
|
||||
blank \t, \x20
|
||||
cntrl
|
||||
digit 0-9
|
||||
graph \x21-\x7E および 多バイト文字全部を含む
|
||||
lower
|
||||
print \x20-\x7E および 多バイト文字全部を含む
|
||||
punct
|
||||
space \t, \n, \v, \f, \r, \x20
|
||||
upper
|
||||
xdigit 0-9, a-f, A-F
|
||||
word 英数字, "_" および 多バイト文字
|
||||
|
||||
Unicodeの場合:
|
||||
|
||||
alnum Letter | Mark | Decimal_Number
|
||||
alpha Letter | Mark
|
||||
ascii 0000 - 007F
|
||||
blank Space_Separator | 0009
|
||||
cntrl Control | Format | Unassigned | Private_Use | Surrogate
|
||||
digit Decimal_Number
|
||||
graph [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
|
||||
lower Lowercase_Letter
|
||||
print [[:graph:]] | Space_Separator
|
||||
punct Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
|
||||
Final_Punctuation | Initial_Punctuation | Other_Punctuation |
|
||||
Open_Punctuation
|
||||
space Space_Separator | Line_Separator | Paragraph_Separator |
|
||||
0009 | 000A | 000B | 000C | 000D | 0085
|
||||
upper Uppercase_Letter
|
||||
xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066
|
||||
(0-9, a-f, A-F)
|
||||
word Letter | Mark | Decimal_Number | Connector_Punctuation
|
||||
|
||||
|
||||
POSIXブラケットがASCII外の文字にマッチするかどうかは
|
||||
ONIG_OPTION_ASCII_RANGEオプションとONIG_OPTION_POSIX_BRACKET_ALL_RANGE
|
||||
オプションに依存する。
|
||||
|
||||
|
||||
|
||||
7. 拡張式集合
|
||||
|
||||
(?#...) 注釈
|
||||
(?imxdau-imx) 孤立オプション
|
||||
i: 大文字小文字照合
|
||||
m: 複数行
|
||||
x: 拡張形式
|
||||
|
||||
文字集合オプション (文字範囲オプション)
|
||||
d: デフォルト (Ruby 1.9.3 互換)
|
||||
\w, \d, \s は、非ASCII文字にマッチしない。
|
||||
\b, \B, POSIXブラケットは、各エンコーディングの
|
||||
ルールに従う。
|
||||
a: ASCII
|
||||
ONIG_OPTION_ASCII_RANGEオプションがオンになる。
|
||||
\w, \d, \s, POSIXブラケットは、非ASCII文字に
|
||||
マッチしない。
|
||||
\b, \B は、ASCIIのルールに従う。
|
||||
u: Unicode
|
||||
ONIG_OPTION_ASCII_RANGEオプションがオフになる。
|
||||
\w (\W), \d (\D), \s (\S), \b (\B), POSIXブラケット
|
||||
は、各エンコーディングのルールに従う。
|
||||
|
||||
(?imxdau-imx:式) 式オプション
|
||||
|
||||
(式) 捕獲式集合
|
||||
(?:式) 非捕獲式集合
|
||||
|
||||
(?=式) 先読み
|
||||
(?!式) 否定先読み
|
||||
(?<=式) 戻り読み
|
||||
(?<!式) 否定戻り読み
|
||||
|
||||
戻り読みの式は固定文字長でなければならない。
|
||||
しかし、最上位の選択子だけは異なった文字長が許される。
|
||||
例. (?<=a|bc) は許可. (?<=aaa(?:b|cd)) は不許可
|
||||
|
||||
否定戻り読みでは、捕獲式集合は許されないが、
|
||||
非捕獲式集合は許される。
|
||||
|
||||
\K 保持
|
||||
戻り読みの別表記。\K の左側を保持し、検索結果に含まない。
|
||||
|
||||
(?>式) 原子的式集合
|
||||
式全体を通過したとき、式の中での後退再試行を行なわない
|
||||
|
||||
(?<name>式), (?'name'式)
|
||||
名前付き捕獲式集合
|
||||
式集合に名前を割り当てる(定義する)。
|
||||
(名前は単語構成文字でなければならない。)
|
||||
|
||||
名前だけでなく、捕獲式集合と同様に番号も割り当てられる。
|
||||
番号指定が禁止されていない状態 (10. 捕獲式集合 を参照)
|
||||
のときは、名前を使わないで番号でも参照できる。
|
||||
|
||||
複数の式集合に同じ名前を与えることは許されている。
|
||||
この場合には、この名前を使用した後方参照は可能であるが、
|
||||
部分式呼出しはできない。
|
||||
(ONIG_SYNTAX_PERLでは部分式呼出しも可能。)
|
||||
|
||||
(?(条件)真の式), (?(条件)真の式|偽の式)
|
||||
条件式
|
||||
(条件)が真であれば真の式がマッチし、偽であれば偽の式が
|
||||
マッチする。
|
||||
(条件)には以下のものが使用できる。
|
||||
|
||||
(n) (n >= 1)
|
||||
番号指定の後方参照が何かにマッチしていれば真、
|
||||
マッチしていなければ偽
|
||||
|
||||
(<name>), ('name')
|
||||
名前指定の後方参照が何かにマッチしていれば真、
|
||||
マッチしていなければ偽
|
||||
|
||||
|
||||
8. 後方参照
|
||||
|
||||
\n 番号指定参照 (n >= 1)
|
||||
\k<n> 番号指定参照 (n >= 1)
|
||||
\k'n' 番号指定参照 (n >= 1)
|
||||
\k<-n> 相対番号指定参照 (n >= 1)
|
||||
\k'-n' 相対番号指定参照 (n >= 1)
|
||||
\k<name> 名前指定参照
|
||||
\k'name' 名前指定参照
|
||||
|
||||
名前指定参照で、その名前が複数の式集合で多重定義されている場合には、
|
||||
番号の大きい式集合から優先的に参照される。
|
||||
(マッチしないときには番号の小さい式集合が参照される)
|
||||
|
||||
※ 番号指定参照は、名前付き捕獲式集合が定義され、
|
||||
かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、
|
||||
禁止される。(10. 捕獲式集合 を参照)
|
||||
|
||||
※ ONIG_SYNTAX_PERLでは、\g{n}, \g{-n}, \g{name} も使用可能。
|
||||
|
||||
|
||||
ネストレベル付き後方参照
|
||||
|
||||
level: 0, 1, 2, ...
|
||||
|
||||
\k<n+level> (n >= 1)
|
||||
\k<n-level> (n >= 1)
|
||||
\k'n+level' (n >= 1)
|
||||
\k'n-level' (n >= 1)
|
||||
\k<-n+level> (n >= 1)
|
||||
\k<-n-level> (n >= 1)
|
||||
\k'-n+level' (n >= 1)
|
||||
\k'-n-level' (n >= 1)
|
||||
|
||||
\k<name+level>
|
||||
\k<name-level>
|
||||
\k'name+level'
|
||||
\k'name-level'
|
||||
|
||||
後方参照の位置から相対的な部分式呼出しネストレベルを指定して、そのレベルでの
|
||||
捕獲値を参照する。
|
||||
|
||||
例-1.
|
||||
|
||||
/\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer")
|
||||
|
||||
例-2.
|
||||
|
||||
r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED)
|
||||
(?<element> \g<stag> \g<content>* \g<etag> ){0}
|
||||
(?<stag> < \g<name> \s* > ){0}
|
||||
(?<name> [a-zA-Z_:]+ ){0}
|
||||
(?<content> [^<&]+ (\g<element> | [^<&]+)* ){0}
|
||||
(?<etag> </ \k<name+1> >){0}
|
||||
\g<element>
|
||||
__REGEXP__
|
||||
|
||||
p r.match('<foo>f<bar>bbb</bar>f</foo>').captures
|
||||
|
||||
|
||||
|
||||
9. 部分式呼出し ("田中哲スペシャル")
|
||||
|
||||
\g<name> 名前指定呼出し
|
||||
\g'name' 名前指定呼出し
|
||||
\g<n> 番号指定呼出し (n >= 1)
|
||||
\g'n' 番号指定呼出し (n >= 1)
|
||||
\g<0> パターン全体の再帰呼び出し
|
||||
\g'0' パターン全体の再帰呼び出し
|
||||
\g<-n> 相対番号指定呼出し (n >= 1)
|
||||
\g'-n' 相対番号指定呼出し (n >= 1)
|
||||
\g<+n> 相対番号指定呼出し (n >= 1)
|
||||
\g'+n' 相対番号指定呼出し (n >= 1)
|
||||
|
||||
※ 最左位置での再帰呼出しは禁止される。
|
||||
例. (?<name>a|\g<name>b) => error
|
||||
(?<name>a|b\g<name>c) => OK
|
||||
|
||||
※ 番号指定呼出しは、名前付き捕獲式集合が定義され、
|
||||
かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、
|
||||
禁止される。 (10. 捕獲式集合 を参照)
|
||||
|
||||
※ 呼び出された式集合のオプション状態が呼出し側のオプション状態と異なっている
|
||||
とき、呼び出された側のオプション状態が有効である。
|
||||
|
||||
例. (?-i:\g<name>)(?i:(?<name>a)){0} は "A" に照合成功する。
|
||||
|
||||
※ ONIG_SYNTAX_PERLでは代わりに (?&name), (?n), (?-n), (?+n), (?R), (?0) を
|
||||
使用する。
|
||||
|
||||
|
||||
10. 捕獲式集合
|
||||
|
||||
捕獲式集合(...)は、以下の条件に応じて振舞が変化する。
|
||||
(名前付き捕獲式集合は変化しない)
|
||||
|
||||
case 1. /.../ (名前付き捕獲式集合は不使用、オプションなし)
|
||||
|
||||
(...) は、捕獲式集合として扱われる。
|
||||
|
||||
case 2. /.../g (名前付き捕獲式集合は不使用、オプション 'g'を指定)
|
||||
|
||||
(...) は、非捕獲式集合として扱われる。
|
||||
|
||||
case 3. /..(?<name>..)../ (名前付き捕獲式集合は使用、オプションなし)
|
||||
|
||||
(...) は、非捕獲式集合として扱われる。
|
||||
番号指定参照/呼び出しは不許可。
|
||||
|
||||
case 4. /..(?<name>..)../G (名前付き捕獲式集合は使用、オプション 'G'を指定)
|
||||
|
||||
(...) は、捕獲式集合として扱われる。
|
||||
番号指定参照/呼び出しは許可。
|
||||
|
||||
但し
|
||||
g: ONIG_OPTION_DONT_CAPTURE_GROUP
|
||||
G: ONIG_OPTION_CAPTURE_GROUP
|
||||
('g'と'G'オプションは、ruby-dev MLで議論された。)
|
||||
|
||||
これらの振舞の意味は、
|
||||
名前付き捕獲と名前無し捕獲を同時に使用する必然性のある場面は少ないであろう
|
||||
という理由から考えられたものである。
|
||||
|
||||
|
||||
-----------------------------
|
||||
補記 1. 文法依存オプション
|
||||
|
||||
+ ONIG_SYNTAX_RUBY
|
||||
(?m): 終止符記号(.)は改行と照合成功
|
||||
|
||||
+ ONIG_SYNTAX_PERL、ONIG_SYNTAX_JAVA、ONIG_SYNTAX_PYTHON
|
||||
(?s): 終止符記号(.)は改行と照合成功
|
||||
(?m): ^ は改行の直後に照合する、$ は改行の直前に照合する
|
||||
|
||||
+ ONIG_SYNTAX_PERL
|
||||
(?d), (?l): (?u)と同じ
|
||||
|
||||
|
||||
補記 2. 独自拡張機能
|
||||
|
||||
+ 16進数数字、非16進数字 \h, \H
|
||||
+ 名前付き捕獲式集合 (?<name>...), (?'name'...)
|
||||
+ 名前指定後方参照 \k<name>
|
||||
+ 部分式呼出し \g<name>, \g<group-num>
|
||||
|
||||
|
||||
補記 3. Perl 5.14.0と比較して存在しない機能
|
||||
|
||||
+ \N{name}, \N{U+xxxx}, \N
|
||||
+ \l,\u,\L,\U, \C
|
||||
+ \v, \V, \h, \H, \o{xxx}
|
||||
+ (?{code})
|
||||
+ (??{code})
|
||||
+ (?|...)
|
||||
+ (*VERB:ARG)
|
||||
|
||||
* \Q...\E
|
||||
但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効
|
||||
|
||||
|
||||
補記 4. Ruby 1.8 の日本語化 GNU regex(version 0.12)との違い
|
||||
|
||||
+ 文字Property機能追加 (\p{property}, \P{Property})
|
||||
+ 16進数字タイプ追加 (\h, \H)
|
||||
+ 戻り読み機能を追加
|
||||
+ 強欲な繰り返し指定子を追加 (?+, *+, ++)
|
||||
+ 文字集合の中の演算子を追加 ([...], &&)
|
||||
('[' は、文字集合の中で通常の文字として使用するときには
|
||||
退避修飾しなければならない)
|
||||
+ 名前付き捕獲式集合と、部分式呼出し機能追加
|
||||
+ 多バイト文字コードが指定されているとき、
|
||||
文字集合の中で八進数または十六進数表現の連続は、多バイト符合で表現された
|
||||
一個の文字と解釈される
|
||||
(例. [\xa1\xa2], [\xa1\xa7-\xa4\xa1])
|
||||
+ 文字集合の中で、一バイト文字と多バイト文字の範囲指定は許される。
|
||||
ex. /[a-あ]/
|
||||
+ 孤立オプションの有効範囲は、その孤立オプションを含んでいる式集合の
|
||||
終わりまでである
|
||||
例. (?:(?i)a|b) は (?:(?i:a|b)) と解釈される、(?:(?i:a)|b)ではない
|
||||
+ 孤立オプションはその前の式に対して透過的ではない
|
||||
例. /a(?i)*/ は文法エラーとなる
|
||||
+ 不完全な繰り返し範囲指定子は通常の文字列として許可される
|
||||
例. /{/, /({)/, /a{2,3/
|
||||
+ 否定的POSIXブラケット [:^xxxx:] を追加
|
||||
+ POSIXブラケット [:ascii:] を追加
|
||||
+ 先読みの繰り返しは不許可
|
||||
例. /(?=a)*/, /(?!b){5}/
|
||||
+ 数値で指定された文字に対しても、大文字小文字照合オプションは有効
|
||||
例. /\x61/i =~ "A"
|
||||
+ 繰り返し回数指定で、最低回数の省略(0回)ができる
|
||||
/a{,n}/ == /a{0,n}/
|
||||
最低回数と最大回数の同時省略は許されない。(/a{,}/)
|
||||
+ /a{n}?/は無欲な演算子ではない。
|
||||
/a{n}?/ == /(?:a{n})?/
|
||||
+ 無効な後方参照をチェックしてエラーにする。
|
||||
/\1/, /(a)\2/
|
||||
+ 無限繰り返しの中で、長さ零での照合成功は繰り返しを中断させるが、
|
||||
このとき、中断すべきかどうかの判定として、捕獲式集合の捕獲状態の
|
||||
変化まで考慮している
|
||||
/(?:()|())*\1\2/ =~ ""
|
||||
/(?:\1a|())*/ =~ "a"
|
||||
|
||||
|
||||
|
||||
補記 5. 実装されているが、既定値では有効にしていない機能
|
||||
|
||||
+ 捕獲履歴参照
|
||||
|
||||
(?@...) と (?@<name>...)
|
||||
|
||||
例. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>]
|
||||
|
||||
使用方法は、sample/listcap.cを参照
|
||||
|
||||
有効にしていない理由は、どの程度役に立つかはっきりしないため。
|
||||
|
||||
|
||||
補記 6. 問題点
|
||||
|
||||
+ エンコーディングバイト値が適正な価かどうかのチェックは行なっていない。
|
||||
|
||||
例: UTF-8
|
||||
|
||||
* 先頭バイトとして不正なバイトを一文字とみなす
|
||||
/./u =~ "\xa3"
|
||||
|
||||
* 不完全なバイトシーケンスのチェックをしない
|
||||
/\w+/u =~ "a\xf3\x8ec"
|
||||
|
||||
これを調べることは可能ではあるが、遅くなるので行なわない。
|
||||
|
||||
文字列として、そのようなバイト列を指定した場合の動作は保証しない。
|
||||
|
||||
終り
|
640
src/Onigmo/doc/UnicodeProps.txt
Normal file
640
src/Onigmo/doc/UnicodeProps.txt
Normal file
@ -0,0 +1,640 @@
|
||||
Onigmo (Oniguruma-mod) Unicode Properties Version 5.13.1 2012/02/01
|
||||
|
||||
* POSIX brackets
|
||||
Alpha
|
||||
Blank
|
||||
Cntrl
|
||||
Digit
|
||||
Graph
|
||||
Lower
|
||||
Print
|
||||
Punct
|
||||
Space
|
||||
Upper
|
||||
XDigit
|
||||
Word
|
||||
Alnum
|
||||
ASCII
|
||||
|
||||
* Special
|
||||
Any
|
||||
Assigned
|
||||
|
||||
* Major and General Categories
|
||||
C
|
||||
Cc
|
||||
Cf
|
||||
Cn
|
||||
Co
|
||||
Cs
|
||||
L
|
||||
LC
|
||||
Ll
|
||||
Lm
|
||||
Lo
|
||||
Lt
|
||||
Lu
|
||||
M
|
||||
Mc
|
||||
Me
|
||||
Mn
|
||||
N
|
||||
Nd
|
||||
Nl
|
||||
No
|
||||
P
|
||||
Pc
|
||||
Pd
|
||||
Pe
|
||||
Pf
|
||||
Pi
|
||||
Po
|
||||
Ps
|
||||
S
|
||||
Sc
|
||||
Sk
|
||||
Sm
|
||||
So
|
||||
Z
|
||||
Zl
|
||||
Zp
|
||||
Zs
|
||||
|
||||
* Scripts
|
||||
Arabic
|
||||
Armenian
|
||||
Avestan
|
||||
Balinese
|
||||
Bamum
|
||||
Batak
|
||||
Bengali
|
||||
Bopomofo
|
||||
Brahmi
|
||||
Braille
|
||||
Buginese
|
||||
Buhid
|
||||
Canadian_Aboriginal
|
||||
Carian
|
||||
Chakma
|
||||
Cham
|
||||
Cherokee
|
||||
Common
|
||||
Coptic
|
||||
Cuneiform
|
||||
Cypriot
|
||||
Cyrillic
|
||||
Deseret
|
||||
Devanagari
|
||||
Egyptian_Hieroglyphs
|
||||
Ethiopic
|
||||
Georgian
|
||||
Glagolitic
|
||||
Gothic
|
||||
Greek
|
||||
Gujarati
|
||||
Gurmukhi
|
||||
Han
|
||||
Hangul
|
||||
Hanunoo
|
||||
Hebrew
|
||||
Hiragana
|
||||
Imperial_Aramaic
|
||||
Inherited
|
||||
Inscriptional_Pahlavi
|
||||
Inscriptional_Parthian
|
||||
Javanese
|
||||
Kaithi
|
||||
Kannada
|
||||
Katakana
|
||||
Kayah_Li
|
||||
Kharoshthi
|
||||
Khmer
|
||||
Lao
|
||||
Latin
|
||||
Lepcha
|
||||
Limbu
|
||||
Linear_B
|
||||
Lisu
|
||||
Lycian
|
||||
Lydian
|
||||
Malayalam
|
||||
Mandaic
|
||||
Meetei_Mayek
|
||||
Meroitic_Cursive
|
||||
Meroitic_Hieroglyphs
|
||||
Miao
|
||||
Mongolian
|
||||
Myanmar
|
||||
New_Tai_Lue
|
||||
Nko
|
||||
Ogham
|
||||
Ol_Chiki
|
||||
Old_Italic
|
||||
Old_Persian
|
||||
Old_South_Arabian
|
||||
Old_Turkic
|
||||
Oriya
|
||||
Osmanya
|
||||
Phags_Pa
|
||||
Phoenician
|
||||
Rejang
|
||||
Runic
|
||||
Samaritan
|
||||
Saurashtra
|
||||
Sharada
|
||||
Shavian
|
||||
Sinhala
|
||||
Sora_Sompeng
|
||||
Sundanese
|
||||
Syloti_Nagri
|
||||
Syriac
|
||||
Tagalog
|
||||
Tagbanwa
|
||||
Tai_Le
|
||||
Tai_Tham
|
||||
Tai_Viet
|
||||
Takri
|
||||
Tamil
|
||||
Telugu
|
||||
Thaana
|
||||
Thai
|
||||
Tibetan
|
||||
Tifinagh
|
||||
Ugaritic
|
||||
Unknown
|
||||
Vai
|
||||
Yi
|
||||
|
||||
* DerivedCoreProperties
|
||||
Alphabetic
|
||||
Case_Ignorable
|
||||
Cased
|
||||
Changes_When_Casefolded
|
||||
Changes_When_Casemapped
|
||||
Changes_When_Lowercased
|
||||
Changes_When_Titlecased
|
||||
Changes_When_Uppercased
|
||||
Default_Ignorable_Code_Point
|
||||
Grapheme_Base
|
||||
Grapheme_Extend
|
||||
Grapheme_Link
|
||||
ID_Continue
|
||||
ID_Start
|
||||
Lowercase
|
||||
Math
|
||||
Uppercase
|
||||
XID_Continue
|
||||
XID_Start
|
||||
|
||||
* PropList
|
||||
ASCII_Hex_Digit
|
||||
Bidi_Control
|
||||
Dash
|
||||
Deprecated
|
||||
Diacritic
|
||||
Extender
|
||||
Hex_Digit
|
||||
Hyphen
|
||||
IDS_Binary_Operator
|
||||
IDS_Trinary_Operator
|
||||
Ideographic
|
||||
Join_Control
|
||||
Logical_Order_Exception
|
||||
Noncharacter_Code_Point
|
||||
Other_Alphabetic
|
||||
Other_Default_Ignorable_Code_Point
|
||||
Other_Grapheme_Extend
|
||||
Other_ID_Continue
|
||||
Other_ID_Start
|
||||
Other_Lowercase
|
||||
Other_Math
|
||||
Other_Uppercase
|
||||
Pattern_Syntax
|
||||
Pattern_White_Space
|
||||
Quotation_Mark
|
||||
Radical
|
||||
STerm
|
||||
Soft_Dotted
|
||||
Terminal_Punctuation
|
||||
Unified_Ideograph
|
||||
Variation_Selector
|
||||
White_Space
|
||||
|
||||
* PropertyAliases
|
||||
AHex
|
||||
Bidi_C
|
||||
CI
|
||||
CWCF
|
||||
CWCM
|
||||
CWL
|
||||
CWT
|
||||
CWU
|
||||
Dep
|
||||
DI
|
||||
Dia
|
||||
Ext
|
||||
Gr_Base
|
||||
Gr_Ext
|
||||
Gr_Link
|
||||
Hex
|
||||
IDC
|
||||
Ideo
|
||||
IDS
|
||||
IDSB
|
||||
IDST
|
||||
Join_C
|
||||
LOE
|
||||
NChar
|
||||
OAlpha
|
||||
ODI
|
||||
OGr_Ext
|
||||
OIDC
|
||||
OIDS
|
||||
OLower
|
||||
OMath
|
||||
OUpper
|
||||
Pat_Syn
|
||||
Pat_WS
|
||||
QMark
|
||||
SD
|
||||
Term
|
||||
UIdeo
|
||||
VS
|
||||
WSpace
|
||||
XIDC
|
||||
XIDS
|
||||
|
||||
* PropertyValueAliases (General_Category)
|
||||
Other
|
||||
Control
|
||||
Format
|
||||
Unassigned
|
||||
Private_Use
|
||||
Surrogate
|
||||
Letter
|
||||
Cased_Letter
|
||||
Lowercase_Letter
|
||||
Modifier_Letter
|
||||
Other_Letter
|
||||
Titlecase_Letter
|
||||
Uppercase_Letter
|
||||
Mark
|
||||
Spacing_Mark
|
||||
Enclosing_Mark
|
||||
Nonspacing_Mark
|
||||
Number
|
||||
Decimal_Number
|
||||
Letter_Number
|
||||
Other_Number
|
||||
Punctuation
|
||||
Connector_Punctation
|
||||
Dash_Punctation
|
||||
Close_Punctation
|
||||
Final_Punctation
|
||||
Initial_Punctation
|
||||
Other_Punctation
|
||||
Open_Punctation
|
||||
Symbol
|
||||
Currency_Symbol
|
||||
Modifier_Symbol
|
||||
Math_Symbol
|
||||
Other_Symbol
|
||||
Separator
|
||||
Line_Separator
|
||||
Paragraph_Separator
|
||||
Space_Separator
|
||||
|
||||
* PropertyValueAliases (Script)
|
||||
Arab
|
||||
Armi
|
||||
Armn
|
||||
Avst
|
||||
Bali
|
||||
Bamu
|
||||
Batk
|
||||
Beng
|
||||
Bopo
|
||||
Brah
|
||||
Brai
|
||||
Bugi
|
||||
Buhd
|
||||
Cans
|
||||
Cari
|
||||
Cher
|
||||
Copt
|
||||
Qaac
|
||||
Cprt
|
||||
Cyrl
|
||||
Deva
|
||||
Dsrt
|
||||
Egyp
|
||||
Ethi
|
||||
Geor
|
||||
Glag
|
||||
Goth
|
||||
Grek
|
||||
Gujr
|
||||
Guru
|
||||
Hang
|
||||
Hani
|
||||
Hano
|
||||
Hebr
|
||||
Hira
|
||||
Ital
|
||||
Java
|
||||
Kali
|
||||
Kana
|
||||
Khar
|
||||
Khmr
|
||||
Knda
|
||||
Kthi
|
||||
Lana
|
||||
Laoo
|
||||
Latn
|
||||
Lepc
|
||||
Limb
|
||||
Linb
|
||||
Lyci
|
||||
Lydi
|
||||
Mand
|
||||
Mlym
|
||||
Mong
|
||||
Mtei
|
||||
Mymr
|
||||
Nkoo
|
||||
Ogam
|
||||
Olck
|
||||
Orkh
|
||||
Orya
|
||||
Osma
|
||||
Phag
|
||||
Phli
|
||||
Phnx
|
||||
Prti
|
||||
Rjng
|
||||
Runr
|
||||
Samr
|
||||
Sarb
|
||||
Saur
|
||||
Shaw
|
||||
Sinh
|
||||
Sund
|
||||
Sylo
|
||||
Syrc
|
||||
Tagb
|
||||
Tale
|
||||
Talu
|
||||
Taml
|
||||
Tavt
|
||||
Telu
|
||||
Tfng
|
||||
Tglg
|
||||
Thaa
|
||||
Tibt
|
||||
Ugar
|
||||
Vaii
|
||||
Xpeo
|
||||
Xsux
|
||||
Yiii
|
||||
Zinh
|
||||
Qaai
|
||||
Zyyy
|
||||
Zzzz
|
||||
|
||||
* DerivedAges
|
||||
Age=1.1
|
||||
Age=2.0
|
||||
Age=2.1
|
||||
Age=3.0
|
||||
Age=3.1
|
||||
Age=3.2
|
||||
Age=4.0
|
||||
Age=4.1
|
||||
Age=5.0
|
||||
Age=5.1
|
||||
Age=5.2
|
||||
Age=6.0
|
||||
Age=6.1
|
||||
|
||||
* Blocks
|
||||
In_Basic_Latin
|
||||
In_Latin_1_Supplement
|
||||
In_Latin_Extended_A
|
||||
In_Latin_Extended_B
|
||||
In_IPA_Extensions
|
||||
In_Spacing_Modifier_Letters
|
||||
In_Combining_Diacritical_Marks
|
||||
In_Greek_and_Coptic
|
||||
In_Cyrillic
|
||||
In_Cyrillic_Supplement
|
||||
In_Armenian
|
||||
In_Hebrew
|
||||
In_Arabic
|
||||
In_Syriac
|
||||
In_Arabic_Supplement
|
||||
In_Thaana
|
||||
In_NKo
|
||||
In_Samaritan
|
||||
In_Mandaic
|
||||
In_Arabic_Extended_A
|
||||
In_Devanagari
|
||||
In_Bengali
|
||||
In_Gurmukhi
|
||||
In_Gujarati
|
||||
In_Oriya
|
||||
In_Tamil
|
||||
In_Telugu
|
||||
In_Kannada
|
||||
In_Malayalam
|
||||
In_Sinhala
|
||||
In_Thai
|
||||
In_Lao
|
||||
In_Tibetan
|
||||
In_Myanmar
|
||||
In_Georgian
|
||||
In_Hangul_Jamo
|
||||
In_Ethiopic
|
||||
In_Ethiopic_Supplement
|
||||
In_Cherokee
|
||||
In_Unified_Canadian_Aboriginal_Syllabics
|
||||
In_Ogham
|
||||
In_Runic
|
||||
In_Tagalog
|
||||
In_Hanunoo
|
||||
In_Buhid
|
||||
In_Tagbanwa
|
||||
In_Khmer
|
||||
In_Mongolian
|
||||
In_Unified_Canadian_Aboriginal_Syllabics_Extended
|
||||
In_Limbu
|
||||
In_Tai_Le
|
||||
In_New_Tai_Lue
|
||||
In_Khmer_Symbols
|
||||
In_Buginese
|
||||
In_Tai_Tham
|
||||
In_Balinese
|
||||
In_Sundanese
|
||||
In_Batak
|
||||
In_Lepcha
|
||||
In_Ol_Chiki
|
||||
In_Sundanese_Supplement
|
||||
In_Vedic_Extensions
|
||||
In_Phonetic_Extensions
|
||||
In_Phonetic_Extensions_Supplement
|
||||
In_Combining_Diacritical_Marks_Supplement
|
||||
In_Latin_Extended_Additional
|
||||
In_Greek_Extended
|
||||
In_General_Punctuation
|
||||
In_Superscripts_and_Subscripts
|
||||
In_Currency_Symbols
|
||||
In_Combining_Diacritical_Marks_for_Symbols
|
||||
In_Letterlike_Symbols
|
||||
In_Number_Forms
|
||||
In_Arrows
|
||||
In_Mathematical_Operators
|
||||
In_Miscellaneous_Technical
|
||||
In_Control_Pictures
|
||||
In_Optical_Character_Recognition
|
||||
In_Enclosed_Alphanumerics
|
||||
In_Box_Drawing
|
||||
In_Block_Elements
|
||||
In_Geometric_Shapes
|
||||
In_Miscellaneous_Symbols
|
||||
In_Dingbats
|
||||
In_Miscellaneous_Mathematical_Symbols_A
|
||||
In_Supplemental_Arrows_A
|
||||
In_Braille_Patterns
|
||||
In_Supplemental_Arrows_B
|
||||
In_Miscellaneous_Mathematical_Symbols_B
|
||||
In_Supplemental_Mathematical_Operators
|
||||
In_Miscellaneous_Symbols_and_Arrows
|
||||
In_Glagolitic
|
||||
In_Latin_Extended_C
|
||||
In_Coptic
|
||||
In_Georgian_Supplement
|
||||
In_Tifinagh
|
||||
In_Ethiopic_Extended
|
||||
In_Cyrillic_Extended_A
|
||||
In_Supplemental_Punctuation
|
||||
In_CJK_Radicals_Supplement
|
||||
In_Kangxi_Radicals
|
||||
In_Ideographic_Description_Characters
|
||||
In_CJK_Symbols_and_Punctuation
|
||||
In_Hiragana
|
||||
In_Katakana
|
||||
In_Bopomofo
|
||||
In_Hangul_Compatibility_Jamo
|
||||
In_Kanbun
|
||||
In_Bopomofo_Extended
|
||||
In_CJK_Strokes
|
||||
In_Katakana_Phonetic_Extensions
|
||||
In_Enclosed_CJK_Letters_and_Months
|
||||
In_CJK_Compatibility
|
||||
In_CJK_Unified_Ideographs_Extension_A
|
||||
In_Yijing_Hexagram_Symbols
|
||||
In_CJK_Unified_Ideographs
|
||||
In_Yi_Syllables
|
||||
In_Yi_Radicals
|
||||
In_Lisu
|
||||
In_Vai
|
||||
In_Cyrillic_Extended_B
|
||||
In_Bamum
|
||||
In_Modifier_Tone_Letters
|
||||
In_Latin_Extended_D
|
||||
In_Syloti_Nagri
|
||||
In_Common_Indic_Number_Forms
|
||||
In_Phags_pa
|
||||
In_Saurashtra
|
||||
In_Devanagari_Extended
|
||||
In_Kayah_Li
|
||||
In_Rejang
|
||||
In_Hangul_Jamo_Extended_A
|
||||
In_Javanese
|
||||
In_Cham
|
||||
In_Myanmar_Extended_A
|
||||
In_Tai_Viet
|
||||
In_Meetei_Mayek_Extensions
|
||||
In_Ethiopic_Extended_A
|
||||
In_Meetei_Mayek
|
||||
In_Hangul_Syllables
|
||||
In_Hangul_Jamo_Extended_B
|
||||
In_High_Surrogates
|
||||
In_High_Private_Use_Surrogates
|
||||
In_Low_Surrogates
|
||||
In_Private_Use_Area
|
||||
In_CJK_Compatibility_Ideographs
|
||||
In_Alphabetic_Presentation_Forms
|
||||
In_Arabic_Presentation_Forms_A
|
||||
In_Variation_Selectors
|
||||
In_Vertical_Forms
|
||||
In_Combining_Half_Marks
|
||||
In_CJK_Compatibility_Forms
|
||||
In_Small_Form_Variants
|
||||
In_Arabic_Presentation_Forms_B
|
||||
In_Halfwidth_and_Fullwidth_Forms
|
||||
In_Specials
|
||||
In_Linear_B_Syllabary
|
||||
In_Linear_B_Ideograms
|
||||
In_Aegean_Numbers
|
||||
In_Ancient_Greek_Numbers
|
||||
In_Ancient_Symbols
|
||||
In_Phaistos_Disc
|
||||
In_Lycian
|
||||
In_Carian
|
||||
In_Old_Italic
|
||||
In_Gothic
|
||||
In_Ugaritic
|
||||
In_Old_Persian
|
||||
In_Deseret
|
||||
In_Shavian
|
||||
In_Osmanya
|
||||
In_Cypriot_Syllabary
|
||||
In_Imperial_Aramaic
|
||||
In_Phoenician
|
||||
In_Lydian
|
||||
In_Meroitic_Hieroglyphs
|
||||
In_Meroitic_Cursive
|
||||
In_Kharoshthi
|
||||
In_Old_South_Arabian
|
||||
In_Avestan
|
||||
In_Inscriptional_Parthian
|
||||
In_Inscriptional_Pahlavi
|
||||
In_Old_Turkic
|
||||
In_Rumi_Numeral_Symbols
|
||||
In_Brahmi
|
||||
In_Kaithi
|
||||
In_Sora_Sompeng
|
||||
In_Chakma
|
||||
In_Sharada
|
||||
In_Takri
|
||||
In_Cuneiform
|
||||
In_Cuneiform_Numbers_and_Punctuation
|
||||
In_Egyptian_Hieroglyphs
|
||||
In_Bamum_Supplement
|
||||
In_Miao
|
||||
In_Kana_Supplement
|
||||
In_Byzantine_Musical_Symbols
|
||||
In_Musical_Symbols
|
||||
In_Ancient_Greek_Musical_Notation
|
||||
In_Tai_Xuan_Jing_Symbols
|
||||
In_Counting_Rod_Numerals
|
||||
In_Mathematical_Alphanumeric_Symbols
|
||||
In_Arabic_Mathematical_Alphabetic_Symbols
|
||||
In_Mahjong_Tiles
|
||||
In_Domino_Tiles
|
||||
In_Playing_Cards
|
||||
In_Enclosed_Alphanumeric_Supplement
|
||||
In_Enclosed_Ideographic_Supplement
|
||||
In_Miscellaneous_Symbols_And_Pictographs
|
||||
In_Emoticons
|
||||
In_Transport_And_Map_Symbols
|
||||
In_Alchemical_Symbols
|
||||
In_CJK_Unified_Ideographs_Extension_B
|
||||
In_CJK_Unified_Ideographs_Extension_C
|
||||
In_CJK_Unified_Ideographs_Extension_D
|
||||
In_CJK_Compatibility_Ideographs_Supplement
|
||||
In_Tags
|
||||
In_Variation_Selectors_Supplement
|
||||
In_Supplementary_Private_Use_Area_A
|
||||
In_Supplementary_Private_Use_Area_B
|
||||
In_No_Block
|
51
src/Onigmo/enc/ascii.c
Normal file
51
src/Onigmo/enc/ascii.c
Normal file
@ -0,0 +1,51 @@
|
||||
/**********************************************************************
|
||||
ascii.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
onigenc_ascii_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
163
src/Onigmo/enc/big5.c
Normal file
163
src/Onigmo/enc/big5.c
Normal file
@ -0,0 +1,163 @@
|
||||
/**********************************************************************
|
||||
big5.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_BIG5[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static int
|
||||
big5_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_BIG5[*p];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
big5_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_BIG5, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_BIG5, code, buf);
|
||||
}
|
||||
|
||||
static int
|
||||
big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
big5_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_BIG5, code, ctype);
|
||||
}
|
||||
|
||||
static const char BIG5_CAN_BE_TRAIL_TABLE[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
|
||||
};
|
||||
|
||||
#define BIG5_ISMB_FIRST(byte) (EncLen_BIG5[byte] > 1)
|
||||
#define BIG5_ISMB_TRAIL(byte) BIG5_CAN_BE_TRAIL_TABLE[(byte)]
|
||||
|
||||
static UChar*
|
||||
big5_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
if (BIG5_ISMB_TRAIL(*p)) {
|
||||
while (p > start) {
|
||||
if (! BIG5_ISMB_FIRST(*--p)) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = enclen(ONIG_ENCODING_BIG5, p);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
|
||||
return (BIG5_ISMB_TRAIL(c) ? FALSE : TRUE);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingBIG5 = {
|
||||
big5_mbc_enc_len,
|
||||
"Big5", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
big5_mbc_to_code,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
big5_code_to_mbc,
|
||||
big5_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
big5_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
big5_left_adjust_char_head,
|
||||
big5_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
201
src/Onigmo/enc/cp1251.c
Normal file
201
src/Onigmo/enc/cp1251.c
Normal file
@ -0,0 +1,201 @@
|
||||
/**********************************************************************
|
||||
cp1251.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2006-2007 Byte <byte AT mail DOT kna DOT ru>
|
||||
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c]
|
||||
#define ENC_IS_CP1251_CTYPE(code,ctype) \
|
||||
((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncCP1251_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247',
|
||||
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
|
||||
'\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncCP1251_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
|
||||
0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
|
||||
0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0,
|
||||
0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2,
|
||||
0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0,
|
||||
0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
*lower = ENC_CP1251_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_CP1251_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xb8, 0xa8 },
|
||||
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfd, 0xdd },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xdf }
|
||||
};
|
||||
|
||||
static int
|
||||
cp1251_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingCP1251 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"CP1251", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
cp1251_mbc_case_fold,
|
||||
cp1251_apply_all_case_fold,
|
||||
cp1251_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
cp1251_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
32
src/Onigmo/enc/cp932.c
Normal file
32
src/Onigmo/enc/cp932.c
Normal file
@ -0,0 +1,32 @@
|
||||
/**********************************************************************
|
||||
cp932.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define ENC_CP932
|
||||
#include "sjis.c"
|
531
src/Onigmo/enc/euc_jp.c
Normal file
531
src/Onigmo/enc/euc_jp.c
Normal file
@ -0,0 +1,531 @@
|
||||
/**********************************************************************
|
||||
euc_jp.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
|
||||
|
||||
static const int EncLen_EUCJP[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
/* Fullwidth Alphabet */
|
||||
{ 0xa3c1, 0xa3e1 },
|
||||
{ 0xa3c2, 0xa3e2 },
|
||||
{ 0xa3c3, 0xa3e3 },
|
||||
{ 0xa3c4, 0xa3e4 },
|
||||
{ 0xa3c5, 0xa3e5 },
|
||||
{ 0xa3c6, 0xa3e6 },
|
||||
{ 0xa3c7, 0xa3e7 },
|
||||
{ 0xa3c8, 0xa3e8 },
|
||||
{ 0xa3c9, 0xa3e9 },
|
||||
{ 0xa3ca, 0xa3ea },
|
||||
{ 0xa3cb, 0xa3eb },
|
||||
{ 0xa3cc, 0xa3ec },
|
||||
{ 0xa3cd, 0xa3ed },
|
||||
{ 0xa3ce, 0xa3ee },
|
||||
{ 0xa3cf, 0xa3ef },
|
||||
{ 0xa3d0, 0xa3f0 },
|
||||
{ 0xa3d1, 0xa3f1 },
|
||||
{ 0xa3d2, 0xa3f2 },
|
||||
{ 0xa3d3, 0xa3f3 },
|
||||
{ 0xa3d4, 0xa3f4 },
|
||||
{ 0xa3d5, 0xa3f5 },
|
||||
{ 0xa3d6, 0xa3f6 },
|
||||
{ 0xa3d7, 0xa3f7 },
|
||||
{ 0xa3d8, 0xa3f8 },
|
||||
{ 0xa3d9, 0xa3f9 },
|
||||
{ 0xa3da, 0xa3fa },
|
||||
|
||||
/* Greek */
|
||||
{ 0xa6a1, 0xa6c1 },
|
||||
{ 0xa6a2, 0xa6c2 },
|
||||
{ 0xa6a3, 0xa6c3 },
|
||||
{ 0xa6a4, 0xa6c4 },
|
||||
{ 0xa6a5, 0xa6c5 },
|
||||
{ 0xa6a6, 0xa6c6 },
|
||||
{ 0xa6a7, 0xa6c7 },
|
||||
{ 0xa6a8, 0xa6c8 },
|
||||
{ 0xa6a9, 0xa6c9 },
|
||||
{ 0xa6aa, 0xa6ca },
|
||||
{ 0xa6ab, 0xa6cb },
|
||||
{ 0xa6ac, 0xa6cc },
|
||||
{ 0xa6ad, 0xa6cd },
|
||||
{ 0xa6ae, 0xa6ce },
|
||||
{ 0xa6af, 0xa6cf },
|
||||
{ 0xa6b0, 0xa6d0 },
|
||||
{ 0xa6b1, 0xa6d1 },
|
||||
{ 0xa6b2, 0xa6d2 },
|
||||
{ 0xa6b3, 0xa6d3 },
|
||||
{ 0xa6b4, 0xa6d4 },
|
||||
{ 0xa6b5, 0xa6d5 },
|
||||
{ 0xa6b6, 0xa6d6 },
|
||||
{ 0xa6b7, 0xa6d7 },
|
||||
{ 0xa6b8, 0xa6d8 },
|
||||
|
||||
/* Cyrillic */
|
||||
{ 0xa7a1, 0xa7d1 },
|
||||
{ 0xa7a2, 0xa7d2 },
|
||||
{ 0xa7a3, 0xa7d3 },
|
||||
{ 0xa7a4, 0xa7d4 },
|
||||
{ 0xa7a5, 0xa7d5 },
|
||||
{ 0xa7a6, 0xa7d6 },
|
||||
{ 0xa7a7, 0xa7d7 },
|
||||
{ 0xa7a8, 0xa7d8 },
|
||||
{ 0xa7a9, 0xa7d9 },
|
||||
{ 0xa7aa, 0xa7da },
|
||||
{ 0xa7ab, 0xa7db },
|
||||
{ 0xa7ac, 0xa7dc },
|
||||
{ 0xa7ad, 0xa7dd },
|
||||
{ 0xa7ae, 0xa7de },
|
||||
{ 0xa7af, 0xa7df },
|
||||
{ 0xa7b0, 0xa7e0 },
|
||||
{ 0xa7b1, 0xa7e1 },
|
||||
{ 0xa7b2, 0xa7e2 },
|
||||
{ 0xa7b3, 0xa7e3 },
|
||||
{ 0xa7b4, 0xa7e4 },
|
||||
{ 0xa7b5, 0xa7e5 },
|
||||
{ 0xa7b6, 0xa7e6 },
|
||||
{ 0xa7b7, 0xa7e7 },
|
||||
{ 0xa7b8, 0xa7e8 },
|
||||
{ 0xa7b9, 0xa7e9 },
|
||||
{ 0xa7ba, 0xa7ea },
|
||||
{ 0xa7bb, 0xa7eb },
|
||||
{ 0xa7bc, 0xa7ec },
|
||||
{ 0xa7bd, 0xa7ed },
|
||||
{ 0xa7be, 0xa7ee },
|
||||
{ 0xa7bf, 0xa7ef },
|
||||
{ 0xa7c0, 0xa7f0 },
|
||||
{ 0xa7c1, 0xa7f1 },
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_EUCJP[*p];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len(p);
|
||||
n = (OnigCodePoint )*p++;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
|
||||
else if ((code & 0xff808080) == 0x00808080) return 3;
|
||||
else if ((code & 0xffff8080) == 0x00008080) return 2;
|
||||
else
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
code_to_mbc_first(OnigCodePoint code)
|
||||
{
|
||||
int first;
|
||||
|
||||
if ((code & 0xff0000) != 0) {
|
||||
first = (code >> 16) & 0xff;
|
||||
}
|
||||
else if ((code & 0xff00) != 0) {
|
||||
first = (code >> 8) & 0xff;
|
||||
}
|
||||
else {
|
||||
return (int )code;
|
||||
}
|
||||
return first;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
|
||||
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (mbc_enc_len(buf) != (p - buf))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
#endif
|
||||
return (int )(p - buf);
|
||||
}
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
get_lower_case(OnigCodePoint code)
|
||||
{
|
||||
if (ONIGENC_IS_IN_RANGE(code, 0xa3c1, 0xa3da)) {
|
||||
/* Fullwidth Alphabet */
|
||||
return (OnigCodePoint )(code + 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0xa6a1, 0xa6b8)) {
|
||||
/* Greek */
|
||||
return (OnigCodePoint )(code + 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0xa7a1, 0xa7c1)) {
|
||||
/* Cyrillic */
|
||||
return (OnigCodePoint )(code + 0x0030);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
get_upper_case(OnigCodePoint code)
|
||||
{
|
||||
if (ONIGENC_IS_IN_RANGE(code, 0xa3e1, 0xa3fa)) {
|
||||
/* Fullwidth Alphabet */
|
||||
return (OnigCodePoint )(code - 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0xa6c1, 0xa6d8)) {
|
||||
/* Greek */
|
||||
return (OnigCodePoint )(code - 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0xa7d1, 0xa7f1)) {
|
||||
/* Cyrillic */
|
||||
return (OnigCodePoint )(code - 0x0030);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
int len;
|
||||
OnigCodePoint code, code_lo, code_up;
|
||||
|
||||
code = mbc_to_code(p, end);
|
||||
if (ONIGENC_IS_ASCII_CODE(code))
|
||||
return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items);
|
||||
|
||||
len = mbc_enc_len(p);
|
||||
code_lo = get_lower_case(code);
|
||||
code_up = get_upper_case(code);
|
||||
|
||||
if (code != code_lo) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = code_lo;
|
||||
return 1;
|
||||
}
|
||||
else if (code != code_up) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = code_up;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
OnigCodePoint code;
|
||||
int len;
|
||||
|
||||
code = get_lower_case(mbc_to_code(p, end));
|
||||
len = code_to_mbc(code, lower);
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static UChar*
|
||||
left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
/* In this encoding
|
||||
mb-trail bytes doesn't mix with single bytes.
|
||||
*/
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
while (!eucjp_islead(*p) && p > start) p--;
|
||||
len = mbc_enc_len(p);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
if (c <= 0x7e || c == 0x8e || c == 0x8f)
|
||||
return TRUE;
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static int PropertyInited = 0;
|
||||
static const OnigCodePoint** PropertyList;
|
||||
static int PropertyListNum;
|
||||
static int PropertyListSize;
|
||||
static hash_table_type* PropertyNameTable;
|
||||
|
||||
static const OnigCodePoint CR_Hiragana[] = {
|
||||
1,
|
||||
#ifdef ENC_EUC_JIS_2004
|
||||
0xa4a1, 0xa4fb
|
||||
#else
|
||||
0xa4a1, 0xa4f3
|
||||
#endif
|
||||
}; /* CR_Hiragana */
|
||||
|
||||
#ifdef ENC_EUC_JIS_2004
|
||||
static const OnigCodePoint CR_Katakana[] = {
|
||||
5,
|
||||
0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */
|
||||
0x8eb1, 0x8edd, /* JIS X 0201 Katakana */
|
||||
0xa5a1, 0xa5fe,
|
||||
0xa6ee, 0xa6fe,
|
||||
0xa7f2, 0xa7f5,
|
||||
}; /* CR_Katakana */
|
||||
#else
|
||||
static const OnigCodePoint CR_Katakana[] = {
|
||||
3,
|
||||
0x8ea6, 0x8eaf, /* JIS X 0201 Katakana */
|
||||
0x8eb1, 0x8edd, /* JIS X 0201 Katakana */
|
||||
0xa5a1, 0xa5f6,
|
||||
}; /* CR_Katakana */
|
||||
#endif
|
||||
|
||||
#ifdef ENC_EUC_JIS_2004
|
||||
static const OnigCodePoint CR_Han[] = {
|
||||
/* EUC-JIS-2004 (JIS X 0213:2004) */
|
||||
7,
|
||||
/* plane 1 */
|
||||
0xa1b8, 0xa1b8,
|
||||
0xaea1, 0xfefe, /* Kanji level 1, 2 and 3 */
|
||||
/* plane 2 */
|
||||
0x8fa1a1, 0x8fa1fe, /* row 1 */
|
||||
0x8fa3a1, 0x8fa5fe, /* row 3 .. 5 */
|
||||
0x8fa8a1, 0x8fa8fe, /* row 8 */
|
||||
0x8faca1, 0x8faffe, /* row 12 .. 15 */
|
||||
0x8feea1, 0x8ffef6, /* row 78 .. 94 */
|
||||
}; /* CR_Han */
|
||||
#else
|
||||
static const OnigCodePoint CR_Han[] = {
|
||||
/* EUC-JP (JIS X 0208 based) */
|
||||
4,
|
||||
0xa1b8, 0xa1b8,
|
||||
0xb0a1, 0xcfd3, /* Kanji level 1 */
|
||||
0xd0a1, 0xf4a6, /* Kanji level 2 */
|
||||
0x8fb0a1, 0x8fedf3 /* JIS X 0212 Supplemental Kanji (row 16 .. 77) */
|
||||
}; /* CR_Han */
|
||||
#endif
|
||||
|
||||
static const OnigCodePoint CR_Latin[] = {
|
||||
4,
|
||||
0x0041, 0x005a,
|
||||
0x0061, 0x007a,
|
||||
0xa3c1, 0xa3da,
|
||||
0xa3e1, 0xa3fa,
|
||||
/* TODO: add raw 8 .. 11 to support EUC-JIS-2004 */
|
||||
/* TODO: add JIS X 0212 row 9 .. 11 */
|
||||
}; /* CR_Latin */
|
||||
|
||||
static const OnigCodePoint CR_Greek[] = {
|
||||
2,
|
||||
0xa6a1, 0xa6b8,
|
||||
#ifdef ENC_EUC_JIS_2004
|
||||
0xa6c1, 0xa6d9,
|
||||
#else
|
||||
0xa6c1, 0xa6d8,
|
||||
/* TODO: add JIS X 0212 row 6 */
|
||||
#endif
|
||||
}; /* CR_Greek */
|
||||
|
||||
static const OnigCodePoint CR_Cyrillic[] = {
|
||||
2,
|
||||
0xa7a1, 0xa7c1,
|
||||
0xa7d1, 0xa7f1,
|
||||
/* TODO: add JIS X 0212 row 7 */
|
||||
}; /* CR_Cyrillic */
|
||||
|
||||
static int
|
||||
init_property_list(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana);
|
||||
PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana);
|
||||
PROPERTY_LIST_ADD_PROP("han", CR_Han);
|
||||
PROPERTY_LIST_ADD_PROP("latin", CR_Latin);
|
||||
PROPERTY_LIST_ADD_PROP("greek", CR_Greek);
|
||||
PROPERTY_LIST_ADD_PROP("cyrillic", CR_Cyrillic);
|
||||
PropertyInited = 1;
|
||||
|
||||
end:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
hash_data_type ctype;
|
||||
UChar *s, *e;
|
||||
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
s = e = xalloca(end - p + 1);
|
||||
for (; p < end; p++) {
|
||||
*e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
}
|
||||
|
||||
if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) {
|
||||
return onigenc_minimum_property_name_to_ctype(enc, s, e);
|
||||
}
|
||||
|
||||
return (int )ctype;
|
||||
}
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
|
||||
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
|
||||
if (ctype >= (unsigned int )PropertyListNum)
|
||||
return ONIGERR_TYPE_BUG;
|
||||
|
||||
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
const OnigCodePoint* ranges[])
|
||||
{
|
||||
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
else {
|
||||
*sb_out = 0x80;
|
||||
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
|
||||
if (ctype >= (OnigCtype )PropertyListNum)
|
||||
return ONIGERR_TYPE_BUG;
|
||||
|
||||
*ranges = PropertyList[ctype];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_JP = {
|
||||
mbc_enc_len,
|
||||
"EUC-JP", /* name */
|
||||
3, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
160
src/Onigmo/enc/euc_kr.c
Normal file
160
src/Onigmo/enc/euc_kr.c
Normal file
@ -0,0 +1,160 @@
|
||||
/**********************************************************************
|
||||
euc_kr.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_EUCKR[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static int
|
||||
euckr_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_EUCKR[*p];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euckr_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_KR, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
return onigenc_mb2_code_to_mbc(ONIG_ENCODING_EUC_KR, code, buf);
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
return onigenc_mb2_is_code_ctype(ONIG_ENCODING_EUC_KR, code, ctype);
|
||||
}
|
||||
|
||||
#define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
|
||||
|
||||
static UChar*
|
||||
euckr_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
while (!euckr_islead(*p) && p > start) p--;
|
||||
len = enclen(ONIG_ENCODING_EUC_KR, p);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
if (c <= 0x7e) return TRUE;
|
||||
else return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_KR = {
|
||||
euckr_mbc_enc_len,
|
||||
"EUC-KR", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
euckr_mbc_to_code,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
euckr_code_to_mbc,
|
||||
euckr_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euckr_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
|
||||
/* Same with OnigEncodingEUC_KR except the name */
|
||||
OnigEncodingType OnigEncodingEUC_CN = {
|
||||
euckr_mbc_enc_len,
|
||||
"EUC-CN", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
euckr_mbc_to_code,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
euckr_code_to_mbc,
|
||||
euckr_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euckr_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
130
src/Onigmo/enc/euc_tw.c
Normal file
130
src/Onigmo/enc/euc_tw.c
Normal file
@ -0,0 +1,130 @@
|
||||
/**********************************************************************
|
||||
euc_tw.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_EUCTW[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
|
||||
};
|
||||
|
||||
static int
|
||||
euctw_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_EUCTW[*p];
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
euctw_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_EUC_TW, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_EUC_TW, code, buf);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
|
||||
}
|
||||
|
||||
#define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
|
||||
|
||||
static UChar*
|
||||
euctw_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
/* Assumed in this encoding,
|
||||
mb-trail bytes don't mix with single bytes.
|
||||
*/
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
while (!euctw_islead(*p) && p > start) p--;
|
||||
len = enclen(ONIG_ENCODING_EUC_TW, p);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
if (c <= 0x7e) return TRUE;
|
||||
else return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingEUC_TW = {
|
||||
euctw_mbc_enc_len,
|
||||
"EUC-TW", /* name */
|
||||
4, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
euctw_mbc_to_code,
|
||||
onigenc_mb4_code_to_mbclen,
|
||||
euctw_code_to_mbc,
|
||||
euctw_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euctw_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euctw_left_adjust_char_head,
|
||||
euctw_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
496
src/Onigmo/enc/gb18030.c
Normal file
496
src/Onigmo/enc/gb18030.c
Normal file
@ -0,0 +1,496 @@
|
||||
/**********************************************************************
|
||||
gb18030.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org>
|
||||
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#if 1
|
||||
#define DEBUG_GB18030(arg)
|
||||
#else
|
||||
#define DEBUG_GB18030(arg) printf arg
|
||||
#endif
|
||||
|
||||
enum {
|
||||
C1, /* one-byte char */
|
||||
C2, /* one-byte or second of two-byte char */
|
||||
C4, /* one-byte or second or fourth of four-byte char */
|
||||
CM /* first of two- or four-byte char or second of two-byte char */
|
||||
};
|
||||
|
||||
static const char GB18030_MAP[] = {
|
||||
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
|
||||
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
|
||||
C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
|
||||
C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
|
||||
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
||||
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
||||
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
||||
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
|
||||
C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
|
||||
CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
|
||||
};
|
||||
|
||||
static int
|
||||
gb18030_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
if (GB18030_MAP[*p] != CM)
|
||||
return 1;
|
||||
p++;
|
||||
if (GB18030_MAP[*p] == C4)
|
||||
return 4;
|
||||
if (GB18030_MAP[*p] == C1)
|
||||
return 1; /* illegal sequence */
|
||||
return 2;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
gb18030_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_mbc_to_code(ONIG_ENCODING_GB18030, p, end);
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
return onigenc_mb4_code_to_mbc(ONIG_ENCODING_GB18030, code, buf);
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag,
|
||||
pp, end, lower);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
return onigenc_mb4_is_code_ctype(ONIG_ENCODING_GB18030, code, ctype);
|
||||
}
|
||||
|
||||
enum state {
|
||||
S_START,
|
||||
S_one_C2,
|
||||
S_one_C4,
|
||||
S_one_CM,
|
||||
|
||||
S_odd_CM_one_CX,
|
||||
S_even_CM_one_CX,
|
||||
|
||||
/* CMC4 : pair of "CM C4" */
|
||||
S_one_CMC4,
|
||||
S_odd_CMC4,
|
||||
S_one_C4_odd_CMC4,
|
||||
S_even_CMC4,
|
||||
S_one_C4_even_CMC4,
|
||||
|
||||
S_odd_CM_odd_CMC4,
|
||||
S_even_CM_odd_CMC4,
|
||||
|
||||
S_odd_CM_even_CMC4,
|
||||
S_even_CM_even_CMC4,
|
||||
|
||||
/* C4CM : pair of "C4 CM" */
|
||||
S_odd_C4CM,
|
||||
S_one_CM_odd_C4CM,
|
||||
S_even_C4CM,
|
||||
S_one_CM_even_C4CM,
|
||||
|
||||
S_even_CM_odd_C4CM,
|
||||
S_odd_CM_odd_C4CM,
|
||||
S_even_CM_even_C4CM,
|
||||
S_odd_CM_even_C4CM,
|
||||
};
|
||||
|
||||
static UChar*
|
||||
gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
const UChar *p;
|
||||
enum state state = S_START;
|
||||
|
||||
DEBUG_GB18030(("----------------\n"));
|
||||
for (p = s; p >= start; p--) {
|
||||
DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
|
||||
switch (state) {
|
||||
case S_START:
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
return (UChar *)s;
|
||||
case C2:
|
||||
state = S_one_C2; /* C2 */
|
||||
break;
|
||||
case C4:
|
||||
state = S_one_C4; /* C4 */
|
||||
break;
|
||||
case CM:
|
||||
state = S_one_CM; /* CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C2: /* C2 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)s;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX; /* CM C2 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4: /* C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)s;
|
||||
case CM:
|
||||
state = S_one_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM: /* CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)s;
|
||||
case C4:
|
||||
state = S_odd_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX; /* CM CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 1);
|
||||
case CM:
|
||||
state = S_even_CM_one_CX;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)s;
|
||||
case CM:
|
||||
state = S_odd_CM_one_CX;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_one_CMC4: /* CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)(s - 1);
|
||||
case C4:
|
||||
state = S_one_C4_odd_CMC4; /* C4 CM C4 */
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_one_CX; /* CM CM C4 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)(s - 1);
|
||||
case C4:
|
||||
state = S_one_C4_odd_CMC4;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4_odd_CMC4: /* C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 1);
|
||||
case CM:
|
||||
state = S_even_CMC4; /* CM C4 CM C4 */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CMC4: /* CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)(s - 3);
|
||||
case C4:
|
||||
state = S_one_C4_even_CMC4;
|
||||
break;
|
||||
case CM:
|
||||
state = S_odd_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 3);
|
||||
case CM:
|
||||
state = S_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 3);
|
||||
case CM:
|
||||
state = S_even_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 1);
|
||||
case CM:
|
||||
state = S_odd_CM_odd_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 1);
|
||||
case CM:
|
||||
state = S_even_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 3);
|
||||
case CM:
|
||||
state = S_odd_CM_even_CMC4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)s;
|
||||
case CM:
|
||||
state = S_one_CM_odd_C4CM; /* CM C4 CM */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)(s - 2); /* |CM C4 CM */
|
||||
case C4:
|
||||
state = S_even_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_even_C4CM: /* C4 CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 2); /* C4|CM C4 CM */
|
||||
case CM:
|
||||
state = S_one_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
|
||||
case C4:
|
||||
state = S_odd_C4CM;
|
||||
break;
|
||||
case CM:
|
||||
state = S_even_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_even_CM_odd_C4CM: /* CM CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 0); /* |CM CM|C4|CM */
|
||||
case CM:
|
||||
state = S_odd_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
|
||||
case CM:
|
||||
state = S_even_CM_odd_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
|
||||
case CM:
|
||||
state = S_odd_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
|
||||
switch (GB18030_MAP[*p]) {
|
||||
case C1:
|
||||
case C2:
|
||||
case C4:
|
||||
return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
|
||||
case CM:
|
||||
state = S_even_CM_even_C4CM;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_GB18030(("state %d\n", state));
|
||||
switch (state) {
|
||||
case S_START: return (UChar *)(s - 0);
|
||||
case S_one_C2: return (UChar *)(s - 0);
|
||||
case S_one_C4: return (UChar *)(s - 0);
|
||||
case S_one_CM: return (UChar *)(s - 0);
|
||||
|
||||
case S_odd_CM_one_CX: return (UChar *)(s - 1);
|
||||
case S_even_CM_one_CX: return (UChar *)(s - 0);
|
||||
|
||||
case S_one_CMC4: return (UChar *)(s - 1);
|
||||
case S_odd_CMC4: return (UChar *)(s - 1);
|
||||
case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
|
||||
case S_even_CMC4: return (UChar *)(s - 3);
|
||||
case S_one_C4_even_CMC4: return (UChar *)(s - 3);
|
||||
|
||||
case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
|
||||
case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
|
||||
|
||||
case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
|
||||
case S_even_CM_even_CMC4: return (UChar *)(s - 3);
|
||||
|
||||
case S_odd_C4CM: return (UChar *)(s - 0);
|
||||
case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
|
||||
case S_even_C4CM: return (UChar *)(s - 2);
|
||||
case S_one_CM_even_C4CM: return (UChar *)(s - 0);
|
||||
|
||||
case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
|
||||
case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
|
||||
case S_even_CM_even_C4CM: return (UChar *)(s - 2);
|
||||
case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
|
||||
}
|
||||
|
||||
return (UChar* )s; /* never come here. (escape warning) */
|
||||
}
|
||||
|
||||
static int
|
||||
gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingGB18030 = {
|
||||
gb18030_mbc_enc_len,
|
||||
"GB18030", /* name */
|
||||
4, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
gb18030_mbc_to_code,
|
||||
onigenc_mb4_code_to_mbclen,
|
||||
gb18030_code_to_mbc,
|
||||
gb18030_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
gb18030_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
gb18030_left_adjust_char_head,
|
||||
gb18030_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
273
src/Onigmo/enc/iso8859_1.c
Normal file
273
src/Onigmo/enc/iso8859_1.c
Normal file
@ -0,0 +1,273 @@
|
||||
/**********************************************************************
|
||||
iso8859_1.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
|
||||
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const unsigned short EncISO_8859_1_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
if (*p == 0x53 && end > p + 1
|
||||
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (0x61 <= *p && *p <= 0x7a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
if (*p == 0x73 && end > p + 1
|
||||
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (0xc0 <= *p && *p <= 0xcf) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
return 1;
|
||||
}
|
||||
else if (0xd0 <= *p && *p <= 0xdf) {
|
||||
if (*p == 0xdf) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 2;
|
||||
items[0].code[0] = (OnigCodePoint )'s';
|
||||
items[0].code[1] = (OnigCodePoint )'s';
|
||||
|
||||
items[1].byte_len = 1;
|
||||
items[1].code_len = 2;
|
||||
items[1].code[0] = (OnigCodePoint )'S';
|
||||
items[1].code[1] = (OnigCodePoint )'S';
|
||||
|
||||
items[2].byte_len = 1;
|
||||
items[2].code_len = 2;
|
||||
items[2].code[0] = (OnigCodePoint )'s';
|
||||
items[2].code[1] = (OnigCodePoint )'S';
|
||||
|
||||
items[3].byte_len = 1;
|
||||
items[3].code_len = 2;
|
||||
items[3].code[0] = (OnigCodePoint )'S';
|
||||
items[3].code[1] = (OnigCodePoint )'s';
|
||||
|
||||
return 4;
|
||||
}
|
||||
else if (*p != 0xd7) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if (0xe0 <= *p && *p <= 0xef) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
return 1;
|
||||
}
|
||||
else if (0xf0 <= *p && *p <= 0xfe) {
|
||||
if (*p != 0xf7) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (*p >= 0xaa && *p <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_1 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-1", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
240
src/Onigmo/enc/iso8859_10.c
Normal file
240
src/Onigmo/enc/iso8859_10.c
Normal file
@ -0,0 +1,240 @@
|
||||
/**********************************************************************
|
||||
iso8859_10.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
|
||||
((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\261', '\262', '\263', '\264', '\265', '\266', '\247',
|
||||
'\270', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_10_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
|
||||
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xb1 },
|
||||
{ 0xa2, 0xb2 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa4, 0xb4 },
|
||||
{ 0xa5, 0xb5 },
|
||||
{ 0xa6, 0xb6 },
|
||||
{ 0xa8, 0xb8 },
|
||||
{ 0xa9, 0xb9 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xbb },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xae, 0xbe },
|
||||
{ 0xaf, 0xbf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_10 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-10", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
97
src/Onigmo/enc/iso8859_11.c
Normal file
97
src/Onigmo/enc/iso8859_11.c
Normal file
@ -0,0 +1,97 @@
|
||||
/**********************************************************************
|
||||
iso8859_11.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
|
||||
((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const unsigned short EncISO_8859_11_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_11 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-11", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
229
src/Onigmo/enc/iso8859_13.c
Normal file
229
src/Onigmo/enc/iso8859_13.c
Normal file
@ -0,0 +1,229 @@
|
||||
/**********************************************************************
|
||||
iso8859_13.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
|
||||
((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_13_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
|
||||
0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xdf, 0xb5 are lower case letter, but can't convert. */
|
||||
if (*p == 0xb5)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_13 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-13", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
242
src/Onigmo/enc/iso8859_14.c
Normal file
242
src/Onigmo/enc/iso8859_14.c
Normal file
@ -0,0 +1,242 @@
|
||||
/**********************************************************************
|
||||
iso8859_14.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
|
||||
((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\242', '\242', '\243', '\245', '\245', '\253', '\247',
|
||||
'\270', '\251', '\272', '\253', '\274', '\255', '\256', '\377',
|
||||
'\261', '\261', '\263', '\263', '\265', '\265', '\266', '\271',
|
||||
'\270', '\271', '\272', '\277', '\274', '\276', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_14_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2,
|
||||
0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa4, 0xa5 },
|
||||
{ 0xa6, 0xab },
|
||||
{ 0xa8, 0xb8 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xaf, 0xff },
|
||||
|
||||
{ 0xb0, 0xb1 },
|
||||
{ 0xb2, 0xb3 },
|
||||
{ 0xb4, 0xb5 },
|
||||
{ 0xb7, 0xb9 },
|
||||
{ 0xbb, 0xbf },
|
||||
{ 0xbd, 0xbe },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_14 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-14", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
236
src/Onigmo/enc/iso8859_15.c
Normal file
236
src/Onigmo/enc/iso8859_15.c
Normal file
@ -0,0 +1,236 @@
|
||||
/**********************************************************************
|
||||
iso8859_15.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
|
||||
((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\250', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\270', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_15_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0,
|
||||
0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xdf etc.. are lower case letter, but can't convert. */
|
||||
if (*p == 0xaa || *p == 0xb5 || *p == 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa6, 0xa8 },
|
||||
|
||||
{ 0xb4, 0xb8 },
|
||||
{ 0xbc, 0xbd },
|
||||
{ 0xbe, 0xff },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_15 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-15", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
238
src/Onigmo/enc/iso8859_16.c
Normal file
238
src/Onigmo/enc/iso8859_16.c
Normal file
@ -0,0 +1,238 @@
|
||||
/**********************************************************************
|
||||
iso8859_16.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
|
||||
((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\242', '\242', '\263', '\245', '\245', '\250', '\247',
|
||||
'\250', '\251', '\272', '\253', '\256', '\255', '\256', '\277',
|
||||
'\260', '\261', '\271', '\263', '\270', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\275', '\275', '\377', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_16_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0,
|
||||
0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2,
|
||||
0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xa2 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa6, 0xa8 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xac, 0xae },
|
||||
{ 0xaf, 0xbf },
|
||||
|
||||
{ 0xb2, 0xb9 },
|
||||
{ 0xb4, 0xb8 },
|
||||
{ 0xbc, 0xbd },
|
||||
{ 0xbe, 0xff },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_16 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-16", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
236
src/Onigmo/enc/iso8859_2.c
Normal file
236
src/Onigmo/enc/iso8859_2.c
Normal file
@ -0,0 +1,236 @@
|
||||
/**********************************************************************
|
||||
iso8859_2.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
|
||||
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
|
||||
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\277',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_2_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
|
||||
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xb1 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa5, 0xb5 },
|
||||
{ 0xa6, 0xb6 },
|
||||
{ 0xa9, 0xb9 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xbb },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xae, 0xbe },
|
||||
{ 0xaf, 0xbf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_2 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-2", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
236
src/Onigmo/enc/iso8859_3.c
Normal file
236
src/Onigmo/enc/iso8859_3.c
Normal file
@ -0,0 +1,236 @@
|
||||
/**********************************************************************
|
||||
iso8859_3.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
|
||||
((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\261', '\242', '\243', '\244', '\245', '\266', '\247',
|
||||
'\250', '\271', '\272', '\273', '\274', '\255', '\256', '\277',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\303', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\320', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_3_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0,
|
||||
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2,
|
||||
0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0,
|
||||
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (*p == 0xb5)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xb1 },
|
||||
{ 0xa6, 0xb6 },
|
||||
{ 0xa9, 0xb9 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xbb },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xaf, 0xbf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_3 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-3", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
238
src/Onigmo/enc/iso8859_4.c
Normal file
238
src/Onigmo/enc/iso8859_4.c
Normal file
@ -0,0 +1,238 @@
|
||||
/**********************************************************************
|
||||
iso8859_4.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
|
||||
((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\261', '\242', '\263', '\244', '\265', '\266', '\247',
|
||||
'\250', '\271', '\272', '\273', '\274', '\255', '\276', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\277', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_4_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0,
|
||||
0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
if (*p == 0xa2)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xb1 },
|
||||
{ 0xa3, 0xb3 },
|
||||
{ 0xa5, 0xb5 },
|
||||
{ 0xa6, 0xb6 },
|
||||
{ 0xa9, 0xb9 },
|
||||
{ 0xaa, 0xba },
|
||||
{ 0xab, 0xbb },
|
||||
{ 0xac, 0xbc },
|
||||
{ 0xae, 0xbe },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_4 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-4", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
227
src/Onigmo/enc/iso8859_5.c
Normal file
227
src/Onigmo/enc/iso8859_5.c
Normal file
@ -0,0 +1,227 @@
|
||||
/**********************************************************************
|
||||
iso8859_5.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
|
||||
((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\255', '\376', '\377',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_5_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
*lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa1, 0xf1 },
|
||||
{ 0xa2, 0xf2 },
|
||||
{ 0xa3, 0xf3 },
|
||||
{ 0xa4, 0xf4 },
|
||||
{ 0xa5, 0xf5 },
|
||||
{ 0xa6, 0xf6 },
|
||||
{ 0xa7, 0xf7 },
|
||||
{ 0xa8, 0xf8 },
|
||||
{ 0xa9, 0xf9 },
|
||||
{ 0xaa, 0xfa },
|
||||
{ 0xab, 0xfb },
|
||||
{ 0xac, 0xfc },
|
||||
{ 0xae, 0xfe },
|
||||
{ 0xaf, 0xff },
|
||||
|
||||
{ 0xb0, 0xd0 },
|
||||
{ 0xb1, 0xd1 },
|
||||
{ 0xb2, 0xd2 },
|
||||
{ 0xb3, 0xd3 },
|
||||
{ 0xb4, 0xd4 },
|
||||
{ 0xb5, 0xd5 },
|
||||
{ 0xb6, 0xd6 },
|
||||
{ 0xb7, 0xd7 },
|
||||
{ 0xb8, 0xd8 },
|
||||
{ 0xb9, 0xd9 },
|
||||
{ 0xba, 0xda },
|
||||
{ 0xbb, 0xdb },
|
||||
{ 0xbc, 0xdc },
|
||||
{ 0xbd, 0xdd },
|
||||
{ 0xbe, 0xde },
|
||||
{ 0xbf, 0xdf },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_5 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-5", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
97
src/Onigmo/enc/iso8859_6.c
Normal file
97
src/Onigmo/enc/iso8859_6.c
Normal file
@ -0,0 +1,97 @@
|
||||
/**********************************************************************
|
||||
iso8859_6.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
|
||||
((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const unsigned short EncISO_8859_6_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x0000, 0x0000, 0x0000, 0x00a0, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
|
||||
0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_6 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-6", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
223
src/Onigmo/enc/iso8859_7.c
Normal file
223
src/Onigmo/enc/iso8859_7.c
Normal file
@ -0,0 +1,223 @@
|
||||
/**********************************************************************
|
||||
iso8859_7.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
|
||||
((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\334', '\267',
|
||||
'\335', '\336', '\337', '\273', '\374', '\275', '\375', '\376',
|
||||
'\300', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\322', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_7_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2,
|
||||
0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
*lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
if (*p == 0xc0 || *p == 0xe0)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xb6, 0xdc },
|
||||
{ 0xb8, 0xdd },
|
||||
{ 0xb9, 0xde },
|
||||
{ 0xba, 0xdf },
|
||||
{ 0xbc, 0xfc },
|
||||
{ 0xbe, 0xfd },
|
||||
{ 0xbf, 0xfe },
|
||||
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_7 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-7", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
97
src/Onigmo/enc/iso8859_8.c
Normal file
97
src/Onigmo/enc/iso8859_8.c
Normal file
@ -0,0 +1,97 @@
|
||||
/**********************************************************************
|
||||
iso8859_8.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
|
||||
((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const unsigned short EncISO_8859_8_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
|
||||
0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_8 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-8", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
229
src/Onigmo/enc/iso8859_9.c
Normal file
229
src/Onigmo/enc/iso8859_9.c
Normal file
@ -0,0 +1,229 @@
|
||||
/**********************************************************************
|
||||
iso8859_9.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
|
||||
#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
|
||||
((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\335', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
static const unsigned short EncISO_8859_9_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
*lower++ = 's';
|
||||
*lower = 's';
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
(*pp)++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
(*pp)++;
|
||||
v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xdf etc.. are lower case letter, but can't convert. */
|
||||
if (*p >= 0xaa && *p <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe }
|
||||
};
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingISO_8859_9 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"ISO-8859-9", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
251
src/Onigmo/enc/koi8.c
Normal file
251
src/Onigmo/enc/koi8.c
Normal file
@ -0,0 +1,251 @@
|
||||
/**********************************************************************
|
||||
koi8.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
|
||||
#define ENC_IS_KOI8_CTYPE(code,ctype) \
|
||||
((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncKOI8_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
|
||||
};
|
||||
|
||||
static const unsigned short EncKOI8_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
|
||||
};
|
||||
|
||||
|
||||
static int
|
||||
koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
*lower = ENC_KOI8_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
|
||||
{
|
||||
const OnigUChar* p = *pp;
|
||||
|
||||
(*pp)++;
|
||||
if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
int v = (EncKOI8_CtypeTable[*p] &
|
||||
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_KOI8_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
{ 0xdf, 0xff },
|
||||
|
||||
{ 0xe0, 0xc0 },
|
||||
{ 0xe1, 0xc1 },
|
||||
{ 0xe2, 0xc2 },
|
||||
{ 0xe3, 0xc3 },
|
||||
{ 0xe4, 0xc4 },
|
||||
{ 0xe5, 0xc5 },
|
||||
{ 0xe6, 0xc6 },
|
||||
{ 0xe7, 0xc7 },
|
||||
{ 0xe8, 0xc8 },
|
||||
{ 0xe9, 0xc9 },
|
||||
{ 0xea, 0xca },
|
||||
{ 0xeb, 0xcb },
|
||||
{ 0xec, 0xcc },
|
||||
{ 0xed, 0xcd },
|
||||
{ 0xee, 0xce },
|
||||
{ 0xef, 0xcf },
|
||||
|
||||
{ 0xf0, 0xd0 },
|
||||
{ 0xf1, 0xd1 },
|
||||
{ 0xf2, 0xd2 },
|
||||
{ 0xf3, 0xd3 },
|
||||
{ 0xf4, 0xd4 },
|
||||
{ 0xf5, 0xd5 },
|
||||
{ 0xf6, 0xd6 },
|
||||
{ 0xf7, 0xd7 },
|
||||
{ 0xf8, 0xd8 },
|
||||
{ 0xf9, 0xd9 },
|
||||
{ 0xfa, 0xda },
|
||||
{ 0xfb, 0xdb },
|
||||
{ 0xfc, 0xdc },
|
||||
{ 0xfe, 0xde },
|
||||
{ 0xff, 0xdf }
|
||||
};
|
||||
|
||||
static int
|
||||
koi8_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingKOI8 = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"KOI8", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
koi8_mbc_case_fold,
|
||||
koi8_apply_all_case_fold,
|
||||
koi8_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
koi8_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
213
src/Onigmo/enc/koi8_r.c
Normal file
213
src/Onigmo/enc/koi8_r.c
Normal file
@ -0,0 +1,213 @@
|
||||
/**********************************************************************
|
||||
koi8_r.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
|
||||
#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
|
||||
((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\243', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
|
||||
};
|
||||
|
||||
static const unsigned short EncKOI8_R_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
|
||||
};
|
||||
|
||||
static int
|
||||
koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
*lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
int v;
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp)++;
|
||||
v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 256)
|
||||
return ENC_IS_KOI8_R_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
{ 0xa3, 0xb3 },
|
||||
|
||||
{ 0xc0, 0xe0 },
|
||||
{ 0xc1, 0xe1 },
|
||||
{ 0xc2, 0xe2 },
|
||||
{ 0xc3, 0xe3 },
|
||||
{ 0xc4, 0xe4 },
|
||||
{ 0xc5, 0xe5 },
|
||||
{ 0xc6, 0xe6 },
|
||||
{ 0xc7, 0xe7 },
|
||||
{ 0xc8, 0xe8 },
|
||||
{ 0xc9, 0xe9 },
|
||||
{ 0xca, 0xea },
|
||||
{ 0xcb, 0xeb },
|
||||
{ 0xcc, 0xec },
|
||||
{ 0xcd, 0xed },
|
||||
{ 0xce, 0xee },
|
||||
{ 0xcf, 0xef },
|
||||
|
||||
{ 0xd0, 0xf0 },
|
||||
{ 0xd1, 0xf1 },
|
||||
{ 0xd2, 0xf2 },
|
||||
{ 0xd3, 0xf3 },
|
||||
{ 0xd4, 0xf4 },
|
||||
{ 0xd5, 0xf5 },
|
||||
{ 0xd6, 0xf6 },
|
||||
{ 0xd7, 0xf7 },
|
||||
{ 0xd8, 0xf8 },
|
||||
{ 0xd9, 0xf9 },
|
||||
{ 0xda, 0xfa },
|
||||
{ 0xdb, 0xfb },
|
||||
{ 0xdc, 0xfc },
|
||||
{ 0xdd, 0xfd },
|
||||
{ 0xde, 0xfe },
|
||||
{ 0xdf, 0xff }
|
||||
};
|
||||
|
||||
static int
|
||||
koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static int
|
||||
koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingKOI8_R = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"KOI8-R", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
koi8_r_mbc_case_fold,
|
||||
koi8_r_apply_all_case_fold,
|
||||
koi8_r_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
koi8_r_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
1162
src/Onigmo/enc/mktable.c
Normal file
1162
src/Onigmo/enc/mktable.c
Normal file
File diff suppressed because it is too large
Load Diff
571
src/Onigmo/enc/sjis.c
Normal file
571
src/Onigmo/enc/sjis.c
Normal file
@ -0,0 +1,571 @@
|
||||
/**********************************************************************
|
||||
sjis.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifdef ENC_CP932
|
||||
#define ONIG_ENCODING_SELF ONIG_ENCODING_CP932
|
||||
#else
|
||||
#define ONIG_ENCODING_SELF ONIG_ENCODING_SJIS
|
||||
#endif
|
||||
|
||||
static const int EncLen_SJIS[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1
|
||||
};
|
||||
|
||||
static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
||||
};
|
||||
|
||||
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
|
||||
/* Fullwidth Alphabet */
|
||||
{ 0x8260, 0x8281 },
|
||||
{ 0x8261, 0x8282 },
|
||||
{ 0x8262, 0x8283 },
|
||||
{ 0x8263, 0x8284 },
|
||||
{ 0x8264, 0x8285 },
|
||||
{ 0x8265, 0x8286 },
|
||||
{ 0x8266, 0x8287 },
|
||||
{ 0x8267, 0x8288 },
|
||||
{ 0x8268, 0x8289 },
|
||||
{ 0x8269, 0x828a },
|
||||
{ 0x826a, 0x828b },
|
||||
{ 0x826b, 0x828c },
|
||||
{ 0x826c, 0x828d },
|
||||
{ 0x826d, 0x828e },
|
||||
{ 0x826e, 0x828f },
|
||||
{ 0x826f, 0x8290 },
|
||||
{ 0x8270, 0x8291 },
|
||||
{ 0x8271, 0x8292 },
|
||||
{ 0x8272, 0x8293 },
|
||||
{ 0x8273, 0x8294 },
|
||||
{ 0x8274, 0x8295 },
|
||||
{ 0x8275, 0x8296 },
|
||||
{ 0x8276, 0x8297 },
|
||||
{ 0x8277, 0x8298 },
|
||||
{ 0x8278, 0x8299 },
|
||||
{ 0x8279, 0x829a },
|
||||
|
||||
/* Greek */
|
||||
{ 0x839f, 0x83bf },
|
||||
{ 0x83a0, 0x83c0 },
|
||||
{ 0x83a1, 0x83c1 },
|
||||
{ 0x83a2, 0x83c2 },
|
||||
{ 0x83a3, 0x83c3 },
|
||||
{ 0x83a4, 0x83c4 },
|
||||
{ 0x83a5, 0x83c5 },
|
||||
{ 0x83a6, 0x83c6 },
|
||||
{ 0x83a7, 0x83c7 },
|
||||
{ 0x83a8, 0x83c8 },
|
||||
{ 0x83a9, 0x83c9 },
|
||||
{ 0x83aa, 0x83ca },
|
||||
{ 0x83ab, 0x83cb },
|
||||
{ 0x83ac, 0x83cc },
|
||||
{ 0x83ad, 0x83cd },
|
||||
{ 0x83ae, 0x83ce },
|
||||
{ 0x83af, 0x83cf },
|
||||
{ 0x83b0, 0x83d0 },
|
||||
{ 0x83b1, 0x83d1 },
|
||||
{ 0x83b2, 0x83d2 },
|
||||
{ 0x83b3, 0x83d3 },
|
||||
{ 0x83b4, 0x83d4 },
|
||||
{ 0x83b5, 0x83d5 },
|
||||
{ 0x83b6, 0x83d6 },
|
||||
|
||||
/* Cyrillic */
|
||||
{ 0x8440, 0x8470 },
|
||||
{ 0x8441, 0x8471 },
|
||||
{ 0x8442, 0x8472 },
|
||||
{ 0x8443, 0x8473 },
|
||||
{ 0x8444, 0x8474 },
|
||||
{ 0x8445, 0x8475 },
|
||||
{ 0x8446, 0x8476 },
|
||||
{ 0x8447, 0x8477 },
|
||||
{ 0x8448, 0x8478 },
|
||||
{ 0x8449, 0x8479 },
|
||||
{ 0x844a, 0x847a },
|
||||
{ 0x844b, 0x847b },
|
||||
{ 0x844c, 0x847c },
|
||||
{ 0x844d, 0x847d },
|
||||
{ 0x844e, 0x847e },
|
||||
{ 0x844f, 0x8480 },
|
||||
{ 0x8450, 0x8481 },
|
||||
{ 0x8451, 0x8482 },
|
||||
{ 0x8452, 0x8483 },
|
||||
{ 0x8453, 0x8484 },
|
||||
{ 0x8454, 0x8485 },
|
||||
{ 0x8455, 0x8486 },
|
||||
{ 0x8456, 0x8487 },
|
||||
{ 0x8457, 0x8488 },
|
||||
{ 0x8458, 0x8489 },
|
||||
{ 0x8459, 0x848a },
|
||||
{ 0x845a, 0x848b },
|
||||
{ 0x845b, 0x848c },
|
||||
{ 0x845c, 0x848d },
|
||||
{ 0x845d, 0x848e },
|
||||
{ 0x845e, 0x848f },
|
||||
{ 0x845f, 0x8490 },
|
||||
{ 0x8460, 0x8491 },
|
||||
};
|
||||
|
||||
#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1)
|
||||
#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)]
|
||||
|
||||
static int
|
||||
mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_SJIS[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if (code < 256) {
|
||||
if (EncLen_SJIS[(int )code] == 1)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
else if (code <= 0xffff) {
|
||||
int low = code & 0xff;
|
||||
if (! SJIS_ISMB_TRAIL(low))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len(p);
|
||||
c = *p++;
|
||||
n = c;
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 0
|
||||
if (mbc_enc_len(buf) != (p - buf))
|
||||
return REGERR_INVALID_CODE_POINT_VALUE;
|
||||
#endif
|
||||
return (int )(p - buf);
|
||||
}
|
||||
|
||||
static int
|
||||
apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
return onigenc_apply_all_case_fold_with_map(
|
||||
sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
|
||||
flag, f, arg);
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
get_lower_case(OnigCodePoint code)
|
||||
{
|
||||
if (ONIGENC_IS_IN_RANGE(code, 0x8260, 0x8279)) {
|
||||
/* Fullwidth Alphabet */
|
||||
return (OnigCodePoint )(code + 0x0021);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0x839f, 0x83b6)) {
|
||||
/* Greek */
|
||||
return (OnigCodePoint )(code + 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0x8440, 0x8460)) {
|
||||
/* Cyrillic */
|
||||
int d = (code >= 0x844f) ? 1 : 0;
|
||||
return (OnigCodePoint )(code + (0x0030 + d));
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
get_upper_case(OnigCodePoint code)
|
||||
{
|
||||
if (ONIGENC_IS_IN_RANGE(code, 0x8281, 0x829a)) {
|
||||
/* Fullwidth Alphabet */
|
||||
return (OnigCodePoint )(code - 0x0021);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0x83bf, 0x83d6)) {
|
||||
/* Greek */
|
||||
return (OnigCodePoint )(code - 0x0020);
|
||||
}
|
||||
else if (ONIGENC_IS_IN_RANGE(code, 0x8470, 0x847e) ||
|
||||
ONIGENC_IS_IN_RANGE(code, 0x8480, 0x8491)) {
|
||||
/* Cyrillic */
|
||||
int d = (code >= 0x8480) ? 1 : 0;
|
||||
return (OnigCodePoint )(code - (0x0030 - d));
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
int len;
|
||||
OnigCodePoint code, code_lo, code_up;
|
||||
|
||||
code = mbc_to_code(p, end);
|
||||
if (ONIGENC_IS_ASCII_CODE(code))
|
||||
return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items);
|
||||
|
||||
len = mbc_enc_len(p);
|
||||
code_lo = get_lower_case(code);
|
||||
code_up = get_upper_case(code);
|
||||
|
||||
if (code != code_lo) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = code_lo;
|
||||
return 1;
|
||||
}
|
||||
else if (code != code_up) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = code_up;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
OnigCodePoint code;
|
||||
int len;
|
||||
|
||||
code = get_lower_case(mbc_to_code(p, end));
|
||||
len = code_to_mbc(code, lower);
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SELF, flag, pp, end);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
|
||||
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
const UChar *p;
|
||||
int len;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
if (SJIS_ISMB_TRAIL(*p)) {
|
||||
while (p > start) {
|
||||
if (! SJIS_ISMB_FIRST(*--p)) {
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
len = mbc_enc_len(p);
|
||||
if (p + len > s) return (UChar* )p;
|
||||
p += len;
|
||||
return (UChar* )(p + ((s - p) & ~1));
|
||||
}
|
||||
|
||||
static int
|
||||
is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
const UChar c = *s;
|
||||
return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
|
||||
}
|
||||
|
||||
|
||||
static int PropertyInited = 0;
|
||||
static const OnigCodePoint** PropertyList;
|
||||
static int PropertyListNum;
|
||||
static int PropertyListSize;
|
||||
static hash_table_type* PropertyNameTable;
|
||||
|
||||
static const OnigCodePoint CR_Hiragana[] = {
|
||||
1,
|
||||
0x829f, 0x82f1
|
||||
}; /* CR_Hiragana */
|
||||
|
||||
static const OnigCodePoint CR_Katakana[] = {
|
||||
4,
|
||||
0x00a6, 0x00af,
|
||||
0x00b1, 0x00dd,
|
||||
0x8340, 0x837e,
|
||||
0x8380, 0x8396,
|
||||
}; /* CR_Katakana */
|
||||
|
||||
#ifdef ENC_CP932
|
||||
static const OnigCodePoint CR_Han[] = {
|
||||
6,
|
||||
0x8157, 0x8157,
|
||||
0x889f, 0x9872, /* Kanji level 1 */
|
||||
0x989f, 0x9ffc, /* Kanji level 2 */
|
||||
0xe040, 0xeaa4, /* Kanji level 2 */
|
||||
0xed40, 0xeeec, /* NEC-selected IBM extended characters (without symbols) */
|
||||
0xfa5c, 0xfc4b, /* IBM extended characters (without symbols) */
|
||||
}; /* CR_Han */
|
||||
#else
|
||||
static const OnigCodePoint CR_Han[] = {
|
||||
4,
|
||||
0x8157, 0x8157,
|
||||
0x889f, 0x9872, /* Kanji level 1 */
|
||||
0x989f, 0x9ffc, /* Kanji level 2 */
|
||||
0xe040, 0xeaa4, /* Kanji level 2 */
|
||||
}; /* CR_Han */
|
||||
#endif
|
||||
|
||||
static const OnigCodePoint CR_Latin[] = {
|
||||
4,
|
||||
0x0041, 0x005a,
|
||||
0x0061, 0x007a,
|
||||
0x8260, 0x8279,
|
||||
0x8281, 0x829a,
|
||||
}; /* CR_Latin */
|
||||
|
||||
static const OnigCodePoint CR_Greek[] = {
|
||||
2,
|
||||
0x839f, 0x83b6,
|
||||
0x83bf, 0x83d6,
|
||||
}; /* CR_Greek */
|
||||
|
||||
static const OnigCodePoint CR_Cyrillic[] = {
|
||||
3,
|
||||
0x8440, 0x8460,
|
||||
0x8470, 0x847f,
|
||||
0x8480, 0x8491,
|
||||
}; /* CR_Cyrillic */
|
||||
|
||||
static int
|
||||
init_property_list(void)
|
||||
{
|
||||
int r;
|
||||
|
||||
PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana);
|
||||
PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana);
|
||||
PROPERTY_LIST_ADD_PROP("han", CR_Han);
|
||||
PROPERTY_LIST_ADD_PROP("latin", CR_Latin);
|
||||
PROPERTY_LIST_ADD_PROP("greek", CR_Greek);
|
||||
PROPERTY_LIST_ADD_PROP("cyrillic", CR_Cyrillic);
|
||||
PropertyInited = 1;
|
||||
|
||||
end:
|
||||
return r;
|
||||
}
|
||||
|
||||
static int
|
||||
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
hash_data_type ctype;
|
||||
UChar *s, *e;
|
||||
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
s = e = xalloca(end - p + 1);
|
||||
for (; p < end; p++) {
|
||||
*e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
}
|
||||
|
||||
if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) {
|
||||
return onigenc_minimum_property_name_to_ctype(enc, s, e);
|
||||
}
|
||||
|
||||
return (int )ctype;
|
||||
}
|
||||
|
||||
static int
|
||||
is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
|
||||
return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
|
||||
if (ctype >= (unsigned int )PropertyListNum)
|
||||
return ONIGERR_TYPE_BUG;
|
||||
|
||||
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
const OnigCodePoint* ranges[])
|
||||
{
|
||||
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
else {
|
||||
*sb_out = 0x80;
|
||||
|
||||
PROPERTY_LIST_INIT_CHECK;
|
||||
|
||||
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
|
||||
if (ctype >= (OnigCtype )PropertyListNum)
|
||||
return ONIGERR_TYPE_BUG;
|
||||
|
||||
*ranges = PropertyList[ctype];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENC_CP932
|
||||
OnigEncodingType OnigEncodingCP932 = {
|
||||
mbc_enc_len,
|
||||
"CP932", /* name */
|
||||
2, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
#else
|
||||
OnigEncodingType OnigEncodingSJIS = {
|
||||
mbc_enc_len,
|
||||
"Shift_JIS", /* name */
|
||||
2, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
property_name_to_ctype,
|
||||
is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
#endif
|
677
src/Onigmo/enc/unicode.c
Normal file
677
src/Onigmo/enc/unicode.c
Normal file
@ -0,0 +1,677 @@
|
||||
/**********************************************************************
|
||||
unicode.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
|
||||
((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
#if 0
|
||||
#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \
|
||||
((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0)
|
||||
#endif
|
||||
|
||||
static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
#include "enc/unicode/name2ctype.h"
|
||||
|
||||
typedef struct {
|
||||
int n;
|
||||
OnigCodePoint code[3];
|
||||
} CodePointList3;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
CodePointList3 to;
|
||||
} CaseFold_11_Type;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
CodePointList3 to;
|
||||
} CaseUnfold_11_Type;
|
||||
|
||||
typedef struct {
|
||||
int n;
|
||||
OnigCodePoint code[2];
|
||||
} CodePointList2;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from[2];
|
||||
CodePointList2 to;
|
||||
} CaseUnfold_12_Type;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from[3];
|
||||
CodePointList2 to;
|
||||
} CaseUnfold_13_Type;
|
||||
|
||||
#include "enc/unicode/casefold.h"
|
||||
|
||||
|
||||
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
||||
#define CODE_RANGES_NUM numberof(CodeRanges)
|
||||
|
||||
extern int
|
||||
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (
|
||||
#ifdef USE_UNICODE_PROPERTIES
|
||||
ctype <= ONIGENC_MAX_STD_CTYPE &&
|
||||
#endif
|
||||
code < 256) {
|
||||
return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
|
||||
}
|
||||
|
||||
if (ctype >= CODE_RANGES_NUM) {
|
||||
return ONIGERR_TYPE_BUG;
|
||||
}
|
||||
|
||||
return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
|
||||
{
|
||||
if (ctype >= CODE_RANGES_NUM) {
|
||||
return ONIGERR_TYPE_BUG;
|
||||
}
|
||||
|
||||
*ranges = CodeRanges[ctype];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
const OnigCodePoint* ranges[])
|
||||
{
|
||||
*sb_out = 0x00;
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
}
|
||||
|
||||
#include "st.h"
|
||||
|
||||
#define PROPERTY_NAME_MAX_SIZE (MAX_WORD_LENGTH + 1)
|
||||
|
||||
extern int
|
||||
onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
|
||||
{
|
||||
int len;
|
||||
int ctype;
|
||||
UChar buf[PROPERTY_NAME_MAX_SIZE];
|
||||
UChar *p;
|
||||
OnigCodePoint code;
|
||||
|
||||
len = 0;
|
||||
for (p = name; p < end; p += enclen(enc, p)) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code == ' ' || code == '-' || code == '_')
|
||||
continue;
|
||||
if (code >= 0x80)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
|
||||
buf[len++] = ONIGENC_ASCII_CODE_TO_LOWER_CASE(code);
|
||||
if (len >= PROPERTY_NAME_MAX_SIZE)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
buf[len] = 0;
|
||||
|
||||
if ((ctype = uniname2ctype(buf, len)) < 0) {
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
return ctype;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
|
||||
{
|
||||
if (x[0] == y[0] && x[1] == y[1]) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
code2_hash(OnigCodePoint* x)
|
||||
{
|
||||
return (int )(x[0] + x[1]);
|
||||
}
|
||||
|
||||
static struct st_hash_type type_code2_hash = {
|
||||
code2_cmp,
|
||||
code2_hash,
|
||||
};
|
||||
|
||||
static int
|
||||
code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
|
||||
{
|
||||
if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
code3_hash(OnigCodePoint* x)
|
||||
{
|
||||
return (int )(x[0] + x[1] + x[2]);
|
||||
}
|
||||
|
||||
static struct st_hash_type type_code3_hash = {
|
||||
code3_cmp,
|
||||
code3_hash,
|
||||
};
|
||||
|
||||
|
||||
static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
|
||||
static st_table* Unfold1Table;
|
||||
static st_table* Unfold2Table;
|
||||
static st_table* Unfold3Table;
|
||||
static int CaseFoldInited = 0;
|
||||
|
||||
static int init_case_fold_table(void)
|
||||
{
|
||||
const CaseFold_11_Type *p;
|
||||
const CaseUnfold_11_Type *p1;
|
||||
const CaseUnfold_12_Type *p2;
|
||||
const CaseUnfold_13_Type *p3;
|
||||
int i;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
FoldTable = st_init_numtable_with_size(FOLD_TABLE_SIZE);
|
||||
if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
|
||||
for (i = 0; i < numberof(CaseFold); i++) {
|
||||
p = &CaseFold[i];
|
||||
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
|
||||
}
|
||||
for (i = 0; i < numberof(CaseFold_Locale); i++) {
|
||||
p = &CaseFold_Locale[i];
|
||||
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
|
||||
}
|
||||
|
||||
Unfold1Table = st_init_numtable_with_size(UNFOLD1_TABLE_SIZE);
|
||||
if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_11); i++) {
|
||||
p1 = &CaseUnfold_11[i];
|
||||
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
|
||||
}
|
||||
for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
|
||||
p1 = &CaseUnfold_11_Locale[i];
|
||||
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
|
||||
}
|
||||
|
||||
Unfold2Table = st_init_table_with_size(&type_code2_hash, UNFOLD2_TABLE_SIZE);
|
||||
if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_12); i++) {
|
||||
p2 = &CaseUnfold_12[i];
|
||||
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
|
||||
}
|
||||
for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
|
||||
p2 = &CaseUnfold_12_Locale[i];
|
||||
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
|
||||
}
|
||||
|
||||
Unfold3Table = st_init_table_with_size(&type_code3_hash, UNFOLD3_TABLE_SIZE);
|
||||
if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_13); i++) {
|
||||
p3 = &CaseUnfold_13[i];
|
||||
st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
|
||||
}
|
||||
|
||||
CaseFoldInited = 1;
|
||||
THREAD_ATOMIC_END;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
|
||||
OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
|
||||
UChar* fold)
|
||||
{
|
||||
CodePointList3 *to;
|
||||
OnigCodePoint code;
|
||||
int i, len, rlen;
|
||||
const UChar *p = *pp;
|
||||
|
||||
if (CaseFoldInited == 0) init_case_fold_table();
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
len = enclen(enc, p);
|
||||
*pp += len;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (code == 0x0049) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
|
||||
}
|
||||
else if (code == 0x0130) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
|
||||
if (to->n == 1) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
|
||||
}
|
||||
#if 0
|
||||
/* NO NEEDS TO CHECK */
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
#else
|
||||
else {
|
||||
#endif
|
||||
rlen = 0;
|
||||
for (i = 0; i < to->n; i++) {
|
||||
len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
|
||||
fold += len;
|
||||
rlen += len;
|
||||
}
|
||||
return rlen;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
*fold++ = *p++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
const CaseUnfold_11_Type* p11;
|
||||
OnigCodePoint code;
|
||||
int i, j, k, r;
|
||||
|
||||
/* if (CaseFoldInited == 0) init_case_fold_table(); */
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_11); i++) {
|
||||
p11 = &CaseUnfold_11[i];
|
||||
for (j = 0; j < p11->to.n; j++) {
|
||||
code = p11->from;
|
||||
r = (*f)(p11->to.code[j], &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = p11->to.code[j];
|
||||
r = (*f)(p11->from, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < j; k++) {
|
||||
r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
code = 0x0131;
|
||||
r = (*f)(0x0049, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
code = 0x0049;
|
||||
r = (*f)(0x0131, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = 0x0130;
|
||||
r = (*f)(0x0069, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
code = 0x0069;
|
||||
r = (*f)(0x0130, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
|
||||
p11 = &CaseUnfold_11_Locale[i];
|
||||
for (j = 0; j < p11->to.n; j++) {
|
||||
code = p11->from;
|
||||
r = (*f)(p11->to.code[j], &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = p11->to.code[j];
|
||||
r = (*f)(p11->from, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < j; k++) {
|
||||
r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
|
||||
1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
|
||||
1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
for (i = 0; i < numberof(CaseUnfold_12); i++) {
|
||||
for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
|
||||
r = (*f)(CaseUnfold_12[i].to.code[j],
|
||||
(OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
|
||||
if (k == j) continue;
|
||||
|
||||
r = (*f)(CaseUnfold_12[i].to.code[j],
|
||||
(OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
|
||||
#endif
|
||||
for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
|
||||
for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
|
||||
r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
|
||||
(OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
|
||||
if (k == j) continue;
|
||||
|
||||
r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
|
||||
(OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
|
||||
1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_13); i++) {
|
||||
for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
|
||||
r = (*f)(CaseUnfold_13[i].to.code[j],
|
||||
(OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
|
||||
if (k == j) continue;
|
||||
|
||||
r = (*f)(CaseUnfold_13[i].to.code[j],
|
||||
(OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
int n, i, j, k, len;
|
||||
OnigCodePoint code, codes[3];
|
||||
CodePointList3 *to, *z3;
|
||||
CodePointList2 *z2;
|
||||
|
||||
if (CaseFoldInited == 0) init_case_fold_table();
|
||||
|
||||
n = 0;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
len = enclen(enc, p);
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (code == 0x0049) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0131;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0130) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0069;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0131) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0049;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0069) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0130;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
|
||||
if (to->n == 1) {
|
||||
OnigCodePoint orig_code = code;
|
||||
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = to->code[0];
|
||||
n++;
|
||||
|
||||
code = to->code[0];
|
||||
if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
|
||||
for (i = 0; i < to->n; i++) {
|
||||
if (to->code[i] != orig_code) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = to->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
OnigCodePoint cs[3][4];
|
||||
int fn, ncs[3];
|
||||
|
||||
for (fn = 0; fn < to->n; fn++) {
|
||||
cs[fn][0] = to->code[fn];
|
||||
if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
|
||||
(void* )&z3) != 0) {
|
||||
for (i = 0; i < z3->n; i++) {
|
||||
cs[fn][i+1] = z3->code[i];
|
||||
}
|
||||
ncs[fn] = z3->n + 1;
|
||||
}
|
||||
else
|
||||
ncs[fn] = 1;
|
||||
}
|
||||
|
||||
if (fn == 2) {
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 2;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
|
||||
(void* )&z2) != 0) {
|
||||
for (i = 0; i < z2->n; i++) {
|
||||
if (z2->code[i] == code) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = z2->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
for (k = 0; k < ncs[2]; k++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 3;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
items[n].code[2] = cs[2][k];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
|
||||
(void* )&z2) != 0) {
|
||||
for (i = 0; i < z2->n; i++) {
|
||||
if (z2->code[i] == code) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = z2->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* multi char folded code is not head of another folded multi char */
|
||||
flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
|
||||
for (i = 0; i < to->n; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = to->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
p += len;
|
||||
if (p < end) {
|
||||
int clen;
|
||||
|
||||
codes[0] = code;
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
|
||||
&& to->n == 1) {
|
||||
codes[1] = to->code[0];
|
||||
}
|
||||
else
|
||||
codes[1] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
|
||||
for (i = 0; i < z2->n; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = z2->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
p += clen;
|
||||
if (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
|
||||
&& to->n == 1) {
|
||||
codes[2] = to->code[0];
|
||||
}
|
||||
else
|
||||
codes[2] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
|
||||
(void* )&z2) != 0) {
|
||||
for (i = 0; i < z2->n; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = z2->code[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
2238
src/Onigmo/enc/unicode/casefold.h
Normal file
2238
src/Onigmo/enc/unicode/casefold.h
Normal file
File diff suppressed because it is too large
Load Diff
28722
src/Onigmo/enc/unicode/name2ctype.h
Normal file
28722
src/Onigmo/enc/unicode/name2ctype.h
Normal file
File diff suppressed because it is too large
Load Diff
223
src/Onigmo/enc/utf16_be.c
Normal file
223
src/Onigmo/enc/utf16_be.c
Normal file
@ -0,0 +1,223 @@
|
||||
/**********************************************************************
|
||||
utf16_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static int
|
||||
utf16be_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_UTF16[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
if (*(p+1) == 0x0a && *p == 0x00)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*(p+1) == 0x0b || *(p+1) == 0x0c || *(p+1) == 0x0d || *(p+1) == 0x85)
|
||||
&& *p == 0x00)
|
||||
return 1;
|
||||
if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(*p)) {
|
||||
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
|
||||
+ p[3];
|
||||
}
|
||||
else {
|
||||
code = p[0] * 256 + p[1];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return (code > 0xffff ? 4 : 2);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
if (code > 0xffff) {
|
||||
unsigned int plane, high;
|
||||
|
||||
plane = (code >> 16) - 1;
|
||||
*p++ = (plane >> 2) + 0xd8;
|
||||
high = (code & 0xff00) >> 8;
|
||||
*p++ = ((plane & 0x03) << 6) + (high >> 2);
|
||||
*p++ = (high & 0x03) + 0xdc;
|
||||
*p = (UChar )(code & 0xff);
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) {
|
||||
p++;
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*p == 0x49) {
|
||||
*fold++ = 0x01;
|
||||
*fold = 0x31;
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
*pp += 2;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag,
|
||||
pp, end, fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += EncLen_UTF16[*p];
|
||||
|
||||
if (*p == 0) {
|
||||
int c, v;
|
||||
|
||||
p++;
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
|
||||
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_BE = {
|
||||
utf16be_mbc_enc_len,
|
||||
"UTF-16BE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
utf16be_is_mbc_newline,
|
||||
utf16be_mbc_to_code,
|
||||
utf16be_code_to_mbclen,
|
||||
utf16be_code_to_mbc,
|
||||
utf16be_mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf16be_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
224
src/Onigmo/enc/utf16_le.c
Normal file
224
src/Onigmo/enc/utf16_le.c
Normal file
@ -0,0 +1,224 @@
|
||||
/**********************************************************************
|
||||
utf16_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static int
|
||||
utf16le_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return (code > 0xffff ? 4 : 2);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_UTF16[*(p+1)];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
if (*p == 0x0a && *(p+1) == 0x00)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*p == 0x0b || *p == 0x0c || *p == 0x0d || *p == 0x85)
|
||||
&& *(p+1) == 0x00)
|
||||
return 1;
|
||||
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
UChar c0 = *p;
|
||||
UChar c1 = *(p+1);
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(c1)) {
|
||||
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
|
||||
+ p[2];
|
||||
}
|
||||
else {
|
||||
code = c1 * 256 + p[0];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
if (code > 0xffff) {
|
||||
unsigned int plane, high;
|
||||
|
||||
plane = (code >> 16) - 1;
|
||||
high = (code & 0xff00) >> 8;
|
||||
|
||||
*p++ = ((plane & 0x03) << 6) + (high >> 2);
|
||||
*p++ = (plane >> 2) + 0xd8;
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
*p = (high & 0x03) + 0xdc;
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*p == 0x49) {
|
||||
*fold++ = 0x31;
|
||||
*fold = 0x01;
|
||||
(*pp) += 2;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
*fold = 0;
|
||||
*pp += 2;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += EncLen_UTF16[*(p+1)];
|
||||
|
||||
if (*(p+1) == 0) {
|
||||
int c, v;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
|
||||
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
utf16le_mbc_enc_len,
|
||||
"UTF-16LE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
utf16le_is_mbc_newline,
|
||||
utf16le_mbc_to_code,
|
||||
utf16le_code_to_mbclen,
|
||||
utf16le_code_to_mbc,
|
||||
utf16le_mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf16le_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
181
src/Onigmo/enc/utf32_be.c
Normal file
181
src/Onigmo/enc/utf32_be.c
Normal file
@ -0,0 +1,181 @@
|
||||
/**********************************************************************
|
||||
utf32_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static int
|
||||
utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*(p+3) == 0x0b || *(p+3) == 0x0c || *(p+3) == 0x0d || *(p+3) == 0x85)
|
||||
&& *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
|
||||
return 1;
|
||||
if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
|
||||
&& *(p+1) == 0 && *p == 0)
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
*p++ = (UChar )((code & 0xff000000) >>24);
|
||||
*p++ = (UChar )((code & 0xff0000) >>16);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar ) (code & 0xff);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) {
|
||||
*fold++ = 0;
|
||||
*fold++ = 0;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*(p+3) == 0x49) {
|
||||
*fold++ = 0x01;
|
||||
*fold = 0x31;
|
||||
(*pp) += 4;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3));
|
||||
*pp += 4;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += 4;
|
||||
|
||||
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
|
||||
int c, v;
|
||||
|
||||
p += 3;
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
|
||||
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
rem = (int )((s - start) % 4);
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_BE = {
|
||||
utf32be_mbc_enc_len,
|
||||
"UTF-32BE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
utf32be_is_mbc_newline,
|
||||
utf32be_mbc_to_code,
|
||||
utf32be_code_to_mbclen,
|
||||
utf32be_code_to_mbc,
|
||||
utf32be_mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf32be_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
181
src/Onigmo/enc/utf32_le.c
Normal file
181
src/Onigmo/enc/utf32_le.c
Normal file
@ -0,0 +1,181 @@
|
||||
/**********************************************************************
|
||||
utf32_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static int
|
||||
utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
|
||||
return 1;
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if ((*p == 0x0b ||*p == 0x0c ||*p == 0x0d || *p == 0x85)
|
||||
&& *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
|
||||
return 1;
|
||||
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
|
||||
&& *(p+2) == 0x00 && *(p+3) == 0x00)
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
*p++ = (UChar ) (code & 0xff);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar )((code & 0xff0000) >>16);
|
||||
*p++ = (UChar )((code & 0xff000000) >>24);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*p == 0x49) {
|
||||
*fold++ = 0x31;
|
||||
*fold++ = 0x01;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
*fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
*fold++ = 0;
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold++ = 0;
|
||||
*fold = 0;
|
||||
*pp += 4;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end,
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += 4;
|
||||
|
||||
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
int c, v;
|
||||
|
||||
if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
|
||||
(BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
|
||||
if ((v | BIT_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static UChar*
|
||||
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
rem = (int )((s - start) % 4);
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_LE = {
|
||||
utf32le_mbc_enc_len,
|
||||
"UTF-32LE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
utf32le_is_mbc_newline,
|
||||
utf32le_mbc_to_code,
|
||||
utf32le_code_to_mbclen,
|
||||
utf32le_code_to_mbc,
|
||||
utf32le_mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
utf32le_get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
304
src/Onigmo/enc/utf8.c
Normal file
304
src/Onigmo/enc/utf8.c
Normal file
@ -0,0 +1,304 @@
|
||||
/**********************************************************************
|
||||
utf8.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define USE_INVALID_CODE_SCHEME
|
||||
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
|
||||
#define INVALID_CODE_FE 0xfffffffe
|
||||
#define INVALID_CODE_FF 0xffffffff
|
||||
#define VALID_CODE_LIMIT 0x7fffffff
|
||||
#endif
|
||||
|
||||
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
|
||||
|
||||
static const int EncLen_UTF8[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||
};
|
||||
|
||||
static int
|
||||
mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_UTF8[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p < end) {
|
||||
if (*p == 0x0a) return 1;
|
||||
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
if (*p == 0x0b || *p == 0x0c || *p == 0x0d) return 1;
|
||||
if (p + 1 < end) {
|
||||
if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
|
||||
return 1;
|
||||
if (p + 2 < end) {
|
||||
if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
|
||||
&& *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
int c, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = mbc_enc_len(p);
|
||||
c = *p++;
|
||||
if (len > 1) {
|
||||
len--;
|
||||
n = c & ((1 << (6 - len)) - 1);
|
||||
while (len--) {
|
||||
c = *p++;
|
||||
n = (n << 6) | (c & ((1 << 6) - 1));
|
||||
}
|
||||
return n;
|
||||
}
|
||||
else {
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
if (c > 0xfd) {
|
||||
return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
|
||||
}
|
||||
#endif
|
||||
return (OnigCodePoint )c;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xffffff80) == 0) return 1;
|
||||
else if ((code & 0xfffff800) == 0) return 2;
|
||||
else if ((code & 0xffff0000) == 0) return 3;
|
||||
else if ((code & 0xffe00000) == 0) return 4;
|
||||
else if ((code & 0xfc000000) == 0) return 5;
|
||||
else if ((code & 0x80000000) == 0) return 6;
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
else if (code == INVALID_CODE_FE) return 1;
|
||||
else if (code == INVALID_CODE_FF) return 1;
|
||||
#endif
|
||||
else
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
}
|
||||
|
||||
static int
|
||||
code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
|
||||
#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
|
||||
|
||||
if ((code & 0xffffff80) == 0) {
|
||||
*buf = (UChar )code;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xfffff800) == 0) {
|
||||
*p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
|
||||
}
|
||||
else if ((code & 0xffff0000) == 0) {
|
||||
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0xffe00000) == 0) {
|
||||
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0xfc000000) == 0) {
|
||||
*p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
|
||||
*p++ = UTF8_TRAILS(code, 18);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
else if ((code & 0x80000000) == 0) {
|
||||
*p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
|
||||
*p++ = UTF8_TRAILS(code, 24);
|
||||
*p++ = UTF8_TRAILS(code, 18);
|
||||
*p++ = UTF8_TRAILS(code, 12);
|
||||
*p++ = UTF8_TRAILS(code, 6);
|
||||
}
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
else if (code == INVALID_CODE_FE) {
|
||||
*p = 0xfe;
|
||||
return 1;
|
||||
}
|
||||
else if (code == INVALID_CODE_FF) {
|
||||
*p = 0xff;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
|
||||
}
|
||||
|
||||
*p++ = UTF8_TRAIL0(code);
|
||||
return (int )(p - buf);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (*p == 0x49) {
|
||||
*fold++ = 0xc4;
|
||||
*fold = 0xb1;
|
||||
(*pp)++;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag,
|
||||
pp, end, fold);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
(*pp)++;
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
}
|
||||
else {
|
||||
(*pp) += mbc_enc_len(p);
|
||||
|
||||
if (*p == 0xc3) {
|
||||
int c = *(p + 1);
|
||||
if (c >= 0x80) {
|
||||
if (c <= (UChar )0x9e) { /* upper */
|
||||
if (c == (UChar )0x97) return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
|
||||
if (c == (UChar )'\267') return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
else if (c == (UChar )0x9f &&
|
||||
(flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int
|
||||
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
|
||||
const OnigCodePoint* ranges[])
|
||||
{
|
||||
*sb_out = 0x80;
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
}
|
||||
|
||||
|
||||
static UChar*
|
||||
left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
const UChar *p;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
p = s;
|
||||
|
||||
while (!utf8_islead(*p) && p > start) p--;
|
||||
return (UChar* )p;
|
||||
}
|
||||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8,
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF8 = {
|
||||
mbc_enc_len,
|
||||
"UTF-8", /* name */
|
||||
6, /* max byte length */
|
||||
1, /* min byte length */
|
||||
is_mbc_newline,
|
||||
mbc_to_code,
|
||||
code_to_mbclen,
|
||||
code_to_mbc,
|
||||
mbc_case_fold,
|
||||
onigenc_unicode_apply_all_case_fold,
|
||||
get_case_fold_codes_by_str,
|
||||
onigenc_unicode_property_name_to_ctype,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
};
|
191
src/Onigmo/index.html
Normal file
191
src/Onigmo/index.html
Normal file
@ -0,0 +1,191 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
|
||||
<title>Oniguruma</title>
|
||||
</head>
|
||||
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
|
||||
|
||||
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
|
||||
|
||||
<p>
|
||||
(c) K.Kosako, updated at: 2013/04/04
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<font color="orange">
|
||||
<dt><b>What's new</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>2013/04/04: Version 5.9.4 released.</li>
|
||||
<li>2007/08/16: Version 4.7.1 released.</li>
|
||||
<li>2007/06/20: Version 2.5.9 released.</li>
|
||||
<li>2007/06/20: Maintainer of 2.x was changed.</li>
|
||||
</ul>
|
||||
</dl>
|
||||
<hr>
|
||||
|
||||
<p>
|
||||
Oniguruma is a regular expressions library.<br>
|
||||
The characteristics of this library is that different character encoding
|
||||
<br>for every regular expression object can be specified.
|
||||
<br>(supported APIs: GNU regex, POSIX and Oniguruma native)
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt><b>Supported character encodings:</b><br>
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
|
||||
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
<font color="orange">
|
||||
(GB18030 encoding was contributed by KUBO Takehiro)<br>
|
||||
(CP1251 encoding was contributed by Byte)
|
||||
</font>
|
||||
</p>
|
||||
</dl>
|
||||
|
||||
<hr>
|
||||
|
||||
<dt><b>License:</b> BSD license.
|
||||
|
||||
<dl>
|
||||
<dt><b>Platform:</b>
|
||||
<ul>
|
||||
<li> Unix (include Mac OS X)
|
||||
<li> Cygwin
|
||||
<li> Win32
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
|
||||
<dt><b>Download:</b>
|
||||
<ul>
|
||||
<li> <a href="archive/onig-5.9.4.tar.gz">Latest release version 5.9.4</a> (2013/04/04) <a href="HISTORY_5X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-5.9.3.tar.gz">5.9.3</a> (2012/10/26)
|
||||
<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16) <a href="HISTORY_4X.txt">Change Log</a>
|
||||
<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
|
||||
<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<font color="red">
|
||||
Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.<br>
|
||||
About 2.x, please contact him.<br>
|
||||
</font>
|
||||
* 5.x supports Unicode Property/Script.<br>
|
||||
* 2.x supports Ruby1.6/1.8.<br>
|
||||
|
||||
<br>
|
||||
<dt><b>Documents:</b> (version 5.9.4)
|
||||
<ul>
|
||||
<li> <a href="doc/RE.txt">Regular Expressions</a>
|
||||
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
|
||||
<li> <a href="doc/API.txt">Oniguruma API</a>
|
||||
<a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Sample Programs:</b>
|
||||
<ul>
|
||||
<li><a href="sample/simple.c">example of the minimum</a>
|
||||
<li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Site Links:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
|
||||
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Links:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.perzl.org/aix/index.php?n=Main.Oniguruma">AIX Open Source Packages</a>
|
||||
<li> <a href="https://aur.archlinux.org/packages/oniguruma/">Arch Linux Package</a>
|
||||
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://limechat.net/cocoaoniguruma/">CocoaOniguruma</a>
|
||||
<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page)
|
||||
<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
|
||||
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
|
||||
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
|
||||
<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page)
|
||||
<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
|
||||
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
|
||||
<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page)
|
||||
<li> <a href="http://monaos.org/">Mona OS</a>
|
||||
<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page)
|
||||
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
|
||||
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
|
||||
<li> <a href="http://sonoisa.github.com/ogrekit/About_%28English%29.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
|
||||
<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
|
||||
<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
|
||||
<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
|
||||
<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
|
||||
<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
|
||||
<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
|
||||
<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
|
||||
<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
|
||||
<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
|
||||
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
|
||||
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
|
||||
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
|
||||
<li> <a href="https://code.google.com/p/oniguruma-visualworks/">oniguruma-visualworks</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
|
||||
<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>References:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
|
||||
<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
|
||||
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
|
||||
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
|
||||
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
|
||||
<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
|
||||
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
</dl>
|
||||
<p>
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
</p>
|
||||
|
||||
<hr>
|
||||
<dl>
|
||||
<dt><b>Other Libraries:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
|
||||
<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
|
||||
<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
|
||||
<li> <a href="http://www.pcre.org/">PCRE</a>
|
||||
<li> <a href="http://re2c.org/">re2c</a>
|
||||
<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
|
||||
<li> <a href="http://laurikari.net/tre/">TRE</a>
|
||||
<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
|
||||
<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
|
||||
<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
|
||||
<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
|
||||
<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
|
||||
</ul>
|
||||
</dl>
|
||||
</body>
|
||||
</html>
|
194
src/Onigmo/index_ja.html
Normal file
194
src/Onigmo/index_ja.html
Normal file
@ -0,0 +1,194 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=utf-8">
|
||||
<title>鬼車</title>
|
||||
</head>
|
||||
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
|
||||
|
||||
<h1>鬼車</h1>
|
||||
|
||||
<p>
|
||||
(c) K.Kosako, 最終更新: 2013/04/04
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<font color="orange">
|
||||
<dt><b>更新情報</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>2013/04/04: Version 5.9.4 リリース</li>
|
||||
<li>2007/08/16: Version 4.7.1 リリース</li>
|
||||
<li>2007/06/20: Version 2.5.9 リリース</li>
|
||||
<li>2007/06/20: 2.xの保守担当者を変更</li>
|
||||
</ul>
|
||||
</dl>
|
||||
<hr>
|
||||
|
||||
<p>
|
||||
鬼車は正規表現ライブラリである。<br>
|
||||
このライブラリの特徴は、それぞれの正規表現オブジェクトごとに異なる文字エンコーディングを
|
||||
指定できること。<br>
|
||||
(API: GNU regex, POSIX and Oniguruma native)
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt><b>対応している文字エンコーディング:</b><br>
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
|
||||
Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
|
||||
<font color="orange">
|
||||
(GB18030は、KUBO Takehiro氏提供)<br>
|
||||
(CP1251は、Byte氏提供)
|
||||
</font>
|
||||
</p>
|
||||
</dl>
|
||||
|
||||
<hr>
|
||||
|
||||
<dt><b>ライセンス:</b>BSDライセンス
|
||||
|
||||
<dl>
|
||||
<dt><b>プラットフォーム:</b>
|
||||
<ul>
|
||||
<li> Unix (Mac OS Xを含む)
|
||||
<li> Cygwin
|
||||
<li> Win32
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
|
||||
<dt><b>ダウンロード:</b>
|
||||
<ul>
|
||||
<li> <a href="archive/onig-5.9.4.tar.gz">5.9.4 最新版</a> (2013/04/04) <a href="HISTORY_5X.txt">更新履歴</a>
|
||||
<li> <a href="archive/onig-5.9.3.tar.gz">5.9.3</a> (2012/10/26)
|
||||
<li> <a href="archive/onig-4.7.1.tar.gz">4.7.1 最新版</a> (2007/08/16) <a href="HISTORY_4X.txt">更新履歴</a>
|
||||
<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
|
||||
<li> <a href="archive/onigd2_5_9.tar.gz">2.5.9 最新版</a> (2007/06/20) <a href="HISTORY_2X.txt">更新履歴</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<font color="red">
|
||||
2.xの保守担当は、Hannes Wyss <hwyss AT ywesee.com>に交替しました。<br>
|
||||
2.xについては、彼に連絡してください。<br>
|
||||
</font>
|
||||
* 5.xはUnicode Property/Scriptを提供<br>
|
||||
* 2.xはRuby1.6/1.8組込みライブラリとして動作する。 (2006年末で保守を終了)<br>
|
||||
|
||||
<br>
|
||||
<dt><b>ドキュメント:</b> (version 5.9.4)
|
||||
<ul>
|
||||
<li> <a href="doc/RE.txt">正規表現</a>
|
||||
<a href="doc/RE.ja.txt">(日本語: EUC-JP)</a>
|
||||
<li> <a href="doc/API.txt">鬼車API</a>
|
||||
<a href="doc/API.ja.txt">(日本語: EUC-JP)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>サンプルプログラム:</b>
|
||||
<ul>
|
||||
<li><a href="sample/simple.c">最小使用例</a>
|
||||
<li><a href="sample/sql.c">可変文法と可変メタ文字機能使用例(SQL-like pattern match)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>サイト:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
|
||||
<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna > Lib > Oniguruma</a> (日本語)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>リンク:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.perzl.org/aix/index.php?n=Main.Oniguruma">AIX Open Source Packages</a>
|
||||
<li> <a href="https://aur.archlinux.org/packages/oniguruma/">Arch Linux Package</a>
|
||||
<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (日本語)
|
||||
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (日本語)
|
||||
<li> <a href="http://limechat.net/cocoaoniguruma/index_ja.html">CocoaOniguruma</a> (日本語)
|
||||
<li> <a href="http://kmaebashi.com/">crowbar</a> (日本語)
|
||||
<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
|
||||
<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (日本語)
|
||||
<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (日本語)
|
||||
<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (日本語)
|
||||
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
|
||||
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
|
||||
<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (日本語)
|
||||
<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
|
||||
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (日本語)
|
||||
<li> <a href="http://limechat.net/">LimeChat</a> (日本語)
|
||||
<li> <a href="http://medb.enhiro.com/">meDB</a> (日本語)
|
||||
<li> <a href="http://monaos.org/">Mona OS</a>
|
||||
<li> <a href="http://mongoose.jp/">mongoose</a> (日本語)
|
||||
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (日本語)
|
||||
<li> <a href="http://ochusha.sourceforge.jp/">おちゅ〜しゃ</a> (日本語)
|
||||
<li> <a href="http://sonoisa.github.com/ogrekit/About.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (日本語)
|
||||
<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (日本語)
|
||||
<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
|
||||
<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
|
||||
<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (日本語)
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (日本語)
|
||||
<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (日本語)
|
||||
<li> <a href="http://www.php.gr.jp/">日本PHPユーザ会</a> PHP 5.0 mb_ereg (日本語)
|
||||
<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (日本語)
|
||||
<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (日本語)
|
||||
<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
|
||||
<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (日本語)
|
||||
<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (日本語)
|
||||
<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (日本語)
|
||||
<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (日本語)
|
||||
<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
|
||||
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
|
||||
<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
|
||||
<li> <a href="https://code.google.com/p/oniguruma-visualworks/">oniguruma-visualworks</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
|
||||
<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (日本語)
|
||||
<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (日本語)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>参考資料:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Rubyリファレンスマニュアル</a> (日本語)
|
||||
<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
|
||||
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
|
||||
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
|
||||
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
|
||||
<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">正規表現メモ</a> (日本語)
|
||||
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Perl正規表現雑技</a> (日本語)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
</dl>
|
||||
<p>
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
</p>
|
||||
|
||||
<hr>
|
||||
<dl>
|
||||
<dt><b>他のライブラリ:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
|
||||
<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
|
||||
<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
|
||||
<li> <a href="http://www.pcre.org/">PCRE</a>
|
||||
<li> <a href="http://re2c.org/">re2c</a>
|
||||
<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
|
||||
<li> <a href="http://laurikari.net/tre/">TRE</a>
|
||||
<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
|
||||
<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
|
||||
<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
|
||||
<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
|
||||
<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
|
||||
<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
|
||||
</ul>
|
||||
</dl>
|
||||
|
||||
<hr>
|
||||
<a href="../">ホームにもどる</a>
|
||||
</body>
|
||||
</html>
|
520
src/Onigmo/install-sh
Normal file
520
src/Onigmo/install-sh
Normal file
@ -0,0 +1,520 @@
|
||||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2009-04-28.21; # UTC
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# `make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
nl='
|
||||
'
|
||||
IFS=" "" $nl"
|
||||
|
||||
# set DOITPROG to echo to test this script
|
||||
|
||||
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||
doit=${DOITPROG-}
|
||||
if test -z "$doit"; then
|
||||
doit_exec=exec
|
||||
else
|
||||
doit_exec=$doit
|
||||
fi
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_glob='?'
|
||||
initialize_posix_glob='
|
||||
test "$posix_glob" != "?" || {
|
||||
if (set -f) 2>/dev/null; then
|
||||
posix_glob=
|
||||
else
|
||||
posix_glob=:
|
||||
fi
|
||||
}
|
||||
'
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
no_target_directory=
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *' '* | *'
|
||||
'* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-t) dst_arg=$2
|
||||
shift;;
|
||||
|
||||
-T) no_target_directory=true;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call `install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
trap '(exit $?); exit' 1 2 13 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names starting with `-'.
|
||||
case $src in
|
||||
-*) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dst=$dst_arg
|
||||
# Protect names starting with `-'.
|
||||
case $dst in
|
||||
-*) dst=./$dst;;
|
||||
esac
|
||||
|
||||
# If destination is a directory, append the input filename; won't work
|
||||
# if double slashes aren't ignored.
|
||||
if test -d "$dst"; then
|
||||
if test -n "$no_target_directory"; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dst=$dstdir/`basename "$src"`
|
||||
dstdir_status=0
|
||||
else
|
||||
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||
dstdir=`
|
||||
(dirname "$dst") 2>/dev/null ||
|
||||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||
X"$dst" : 'X\(//\)[^/]' \| \
|
||||
X"$dst" : 'X\(//\)$' \| \
|
||||
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||
echo X"$dst" |
|
||||
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)[^/].*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\).*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
s/.*/./; q'
|
||||
`
|
||||
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
if (umask $mkdir_umask &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writeable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/d" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
-*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
eval "$initialize_posix_glob"
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
$posix_glob set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
$posix_glob set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test -z "$d" && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=$dstdir/_inst.$$_
|
||||
rmtmp=$dstdir/_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
|
||||
eval "$initialize_posix_glob" &&
|
||||
$posix_glob set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
$posix_glob set +f &&
|
||||
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
8413
src/Onigmo/ltmain.sh
Normal file
8413
src/Onigmo/ltmain.sh
Normal file
File diff suppressed because it is too large
Load Diff
7377
src/Onigmo/m4/libtool.m4
vendored
Normal file
7377
src/Onigmo/m4/libtool.m4
vendored
Normal file
File diff suppressed because it is too large
Load Diff
368
src/Onigmo/m4/ltoptions.m4
vendored
Normal file
368
src/Onigmo/m4/ltoptions.m4
vendored
Normal file
@ -0,0 +1,368 @@
|
||||
# Helper functions for option handling. -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6 ltoptions.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
|
||||
|
||||
|
||||
# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ------------------------------------------
|
||||
m4_define([_LT_MANGLE_OPTION],
|
||||
[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
|
||||
# ---------------------------------------
|
||||
# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
|
||||
# matching handler defined, dispatch to it. Other OPTION-NAMEs are
|
||||
# saved as a flag.
|
||||
m4_define([_LT_SET_OPTION],
|
||||
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
|
||||
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
_LT_MANGLE_DEFUN([$1], [$2]),
|
||||
[m4_warning([Unknown $1 option `$2'])])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
|
||||
# ------------------------------------------------------------
|
||||
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
|
||||
m4_define([_LT_IF_OPTION],
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
|
||||
|
||||
|
||||
# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
|
||||
# -------------------------------------------------------
|
||||
# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
|
||||
# are set.
|
||||
m4_define([_LT_UNLESS_OPTIONS],
|
||||
[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
|
||||
[m4_define([$0_found])])])[]dnl
|
||||
m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
|
||||
])[]dnl
|
||||
])
|
||||
|
||||
|
||||
# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
|
||||
# ----------------------------------------
|
||||
# OPTION-LIST is a space-separated list of Libtool options associated
|
||||
# with MACRO-NAME. If any OPTION has a matching handler declared with
|
||||
# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
|
||||
# the unknown option and exit.
|
||||
m4_defun([_LT_SET_OPTIONS],
|
||||
[# Set options
|
||||
m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
|
||||
[_LT_SET_OPTION([$1], _LT_Option)])
|
||||
|
||||
m4_if([$1],[LT_INIT],[
|
||||
dnl
|
||||
dnl Simply set some default values (i.e off) if boolean options were not
|
||||
dnl specified:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
|
||||
])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
|
||||
])
|
||||
dnl
|
||||
dnl If no reference was made to various pairs of opposing options, then
|
||||
dnl we run the default mode handler for the pair. For example, if neither
|
||||
dnl `shared' nor `disable-shared' was passed, we enable building of shared
|
||||
dnl archives by default:
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
|
||||
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
|
||||
[_LT_ENABLE_FAST_INSTALL])
|
||||
])
|
||||
])# _LT_SET_OPTIONS
|
||||
|
||||
|
||||
## --------------------------------- ##
|
||||
## Macros to handle LT_INIT options. ##
|
||||
## --------------------------------- ##
|
||||
|
||||
# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
|
||||
# -----------------------------------------
|
||||
m4_define([_LT_MANGLE_DEFUN],
|
||||
[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
|
||||
|
||||
|
||||
# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
|
||||
# -----------------------------------------------
|
||||
m4_define([LT_OPTION_DEFINE],
|
||||
[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
|
||||
])# LT_OPTION_DEFINE
|
||||
|
||||
|
||||
# dlopen
|
||||
# ------
|
||||
LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_DLOPEN],
|
||||
[_LT_SET_OPTION([LT_INIT], [dlopen])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `dlopen' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
|
||||
|
||||
|
||||
# win32-dll
|
||||
# ---------
|
||||
# Declare package support for building win32 dll's.
|
||||
LT_OPTION_DEFINE([LT_INIT], [win32-dll],
|
||||
[enable_win32_dll=yes
|
||||
|
||||
case $host in
|
||||
*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-cegcc*)
|
||||
AC_CHECK_TOOL(AS, as, false)
|
||||
AC_CHECK_TOOL(DLLTOOL, dlltool, false)
|
||||
AC_CHECK_TOOL(OBJDUMP, objdump, false)
|
||||
;;
|
||||
esac
|
||||
|
||||
test -z "$AS" && AS=as
|
||||
_LT_DECL([], [AS], [0], [Assembler program])dnl
|
||||
|
||||
test -z "$DLLTOOL" && DLLTOOL=dlltool
|
||||
_LT_DECL([], [DLLTOOL], [0], [DLL creation program])dnl
|
||||
|
||||
test -z "$OBJDUMP" && OBJDUMP=objdump
|
||||
_LT_DECL([], [OBJDUMP], [0], [Object dumper program])dnl
|
||||
])# win32-dll
|
||||
|
||||
AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
|
||||
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
|
||||
_LT_SET_OPTION([LT_INIT], [win32-dll])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `win32-dll' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
|
||||
|
||||
|
||||
# _LT_ENABLE_SHARED([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-shared flag, and supports the `shared' and
|
||||
# `disable-shared' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_SHARED],
|
||||
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([shared],
|
||||
[AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
|
||||
[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_shared=yes ;;
|
||||
no) enable_shared=no ;;
|
||||
*)
|
||||
enable_shared=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_shared=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
|
||||
|
||||
_LT_DECL([build_libtool_libs], [enable_shared], [0],
|
||||
[Whether or not to build shared libraries])
|
||||
])# _LT_ENABLE_SHARED
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_SHARED],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-shared])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
|
||||
AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_SHARED], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_SHARED], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_STATIC([DEFAULT])
|
||||
# ----------------------------
|
||||
# implement the --enable-static flag, and support the `static' and
|
||||
# `disable-static' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_STATIC],
|
||||
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([static],
|
||||
[AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
|
||||
[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_static=yes ;;
|
||||
no) enable_static=no ;;
|
||||
*)
|
||||
enable_static=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_static=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
|
||||
|
||||
_LT_DECL([build_old_libs], [enable_static], [0],
|
||||
[Whether or not to build static libraries])
|
||||
])# _LT_ENABLE_STATIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
|
||||
|
||||
# Old names:
|
||||
AC_DEFUN([AC_ENABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
|
||||
])
|
||||
|
||||
AC_DEFUN([AC_DISABLE_STATIC],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-static])
|
||||
])
|
||||
|
||||
AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
|
||||
AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AM_ENABLE_STATIC], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_STATIC], [])
|
||||
|
||||
|
||||
|
||||
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
|
||||
# ----------------------------------
|
||||
# implement the --enable-fast-install flag, and support the `fast-install'
|
||||
# and `disable-fast-install' LT_INIT options.
|
||||
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
|
||||
m4_define([_LT_ENABLE_FAST_INSTALL],
|
||||
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
|
||||
AC_ARG_ENABLE([fast-install],
|
||||
[AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
|
||||
[optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
|
||||
[p=${PACKAGE-default}
|
||||
case $enableval in
|
||||
yes) enable_fast_install=yes ;;
|
||||
no) enable_fast_install=no ;;
|
||||
*)
|
||||
enable_fast_install=no
|
||||
# Look at the argument we got. We use all the common list separators.
|
||||
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
|
||||
for pkg in $enableval; do
|
||||
IFS="$lt_save_ifs"
|
||||
if test "X$pkg" = "X$p"; then
|
||||
enable_fast_install=yes
|
||||
fi
|
||||
done
|
||||
IFS="$lt_save_ifs"
|
||||
;;
|
||||
esac],
|
||||
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
|
||||
|
||||
_LT_DECL([fast_install], [enable_fast_install], [0],
|
||||
[Whether or not to optimize for fast installation])dnl
|
||||
])# _LT_ENABLE_FAST_INSTALL
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
|
||||
|
||||
# Old names:
|
||||
AU_DEFUN([AC_ENABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the `fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
|
||||
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
|
||||
the `disable-fast-install' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
|
||||
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
|
||||
|
||||
|
||||
# _LT_WITH_PIC([MODE])
|
||||
# --------------------
|
||||
# implement the --with-pic flag, and support the `pic-only' and `no-pic'
|
||||
# LT_INIT options.
|
||||
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
|
||||
m4_define([_LT_WITH_PIC],
|
||||
[AC_ARG_WITH([pic],
|
||||
[AS_HELP_STRING([--with-pic],
|
||||
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
|
||||
[pic_mode="$withval"],
|
||||
[pic_mode=default])
|
||||
|
||||
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
|
||||
|
||||
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
|
||||
])# _LT_WITH_PIC
|
||||
|
||||
LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
|
||||
LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
|
||||
|
||||
# Old name:
|
||||
AU_DEFUN([AC_LIBTOOL_PICMODE],
|
||||
[_LT_SET_OPTION([LT_INIT], [pic-only])
|
||||
AC_DIAGNOSE([obsolete],
|
||||
[$0: Remove this warning and the call to _LT_SET_OPTION when you
|
||||
put the `pic-only' option into LT_INIT's first parameter.])
|
||||
])
|
||||
|
||||
dnl aclocal-1.4 backwards compatibility:
|
||||
dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
|
||||
|
||||
## ----------------- ##
|
||||
## LTDL_INIT Options ##
|
||||
## ----------------- ##
|
||||
|
||||
m4_define([_LTDL_MODE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
|
||||
[m4_define([_LTDL_MODE], [nonrecursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [recursive],
|
||||
[m4_define([_LTDL_MODE], [recursive])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [subproject],
|
||||
[m4_define([_LTDL_MODE], [subproject])])
|
||||
|
||||
m4_define([_LTDL_TYPE], [])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [installable],
|
||||
[m4_define([_LTDL_TYPE], [installable])])
|
||||
LT_OPTION_DEFINE([LTDL_INIT], [convenience],
|
||||
[m4_define([_LTDL_TYPE], [convenience])])
|
123
src/Onigmo/m4/ltsugar.m4
vendored
Normal file
123
src/Onigmo/m4/ltsugar.m4
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
|
||||
# Written by Gary V. Vaughan, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 6 ltsugar.m4
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
|
||||
|
||||
|
||||
# lt_join(SEP, ARG1, [ARG2...])
|
||||
# -----------------------------
|
||||
# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
|
||||
# associated separator.
|
||||
# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
|
||||
# versions in m4sugar had bugs.
|
||||
m4_define([lt_join],
|
||||
[m4_if([$#], [1], [],
|
||||
[$#], [2], [[$2]],
|
||||
[m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
m4_define([_lt_join],
|
||||
[m4_if([$#$2], [2], [],
|
||||
[m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
|
||||
|
||||
|
||||
# lt_car(LIST)
|
||||
# lt_cdr(LIST)
|
||||
# ------------
|
||||
# Manipulate m4 lists.
|
||||
# These macros are necessary as long as will still need to support
|
||||
# Autoconf-2.59 which quotes differently.
|
||||
m4_define([lt_car], [[$1]])
|
||||
m4_define([lt_cdr],
|
||||
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
|
||||
[$#], 1, [],
|
||||
[m4_dquote(m4_shift($@))])])
|
||||
m4_define([lt_unquote], $1)
|
||||
|
||||
|
||||
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
|
||||
# ------------------------------------------
|
||||
# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
|
||||
# Note that neither SEPARATOR nor STRING are expanded; they are appended
|
||||
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
|
||||
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
|
||||
# than defined and empty).
|
||||
#
|
||||
# This macro is needed until we can rely on Autoconf 2.62, since earlier
|
||||
# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
|
||||
m4_define([lt_append],
|
||||
[m4_define([$1],
|
||||
m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
|
||||
|
||||
|
||||
|
||||
# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
|
||||
# ----------------------------------------------------------
|
||||
# Produce a SEP delimited list of all paired combinations of elements of
|
||||
# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
|
||||
# has the form PREFIXmINFIXSUFFIXn.
|
||||
# Needed until we can rely on m4_combine added in Autoconf 2.62.
|
||||
m4_define([lt_combine],
|
||||
[m4_if(m4_eval([$# > 3]), [1],
|
||||
[m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
|
||||
[[m4_foreach([_Lt_prefix], [$2],
|
||||
[m4_foreach([_Lt_suffix],
|
||||
]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
|
||||
[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
|
||||
|
||||
|
||||
# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
|
||||
# -----------------------------------------------------------------------
|
||||
# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
|
||||
# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
|
||||
m4_define([lt_if_append_uniq],
|
||||
[m4_ifdef([$1],
|
||||
[m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
|
||||
[lt_append([$1], [$2], [$3])$4],
|
||||
[$5])],
|
||||
[lt_append([$1], [$2], [$3])$4])])
|
||||
|
||||
|
||||
# lt_dict_add(DICT, KEY, VALUE)
|
||||
# -----------------------------
|
||||
m4_define([lt_dict_add],
|
||||
[m4_define([$1($2)], [$3])])
|
||||
|
||||
|
||||
# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
|
||||
# --------------------------------------------
|
||||
m4_define([lt_dict_add_subkey],
|
||||
[m4_define([$1($2:$3)], [$4])])
|
||||
|
||||
|
||||
# lt_dict_fetch(DICT, KEY, [SUBKEY])
|
||||
# ----------------------------------
|
||||
m4_define([lt_dict_fetch],
|
||||
[m4_ifval([$3],
|
||||
m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
|
||||
m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
|
||||
|
||||
|
||||
# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
|
||||
# -----------------------------------------------------------------
|
||||
m4_define([lt_if_dict_fetch],
|
||||
[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
|
||||
[$5],
|
||||
[$6])])
|
||||
|
||||
|
||||
# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
|
||||
# --------------------------------------------------------------
|
||||
m4_define([lt_dict_filter],
|
||||
[m4_if([$5], [], [],
|
||||
[lt_join(m4_quote(m4_default([$4], [[, ]])),
|
||||
lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
|
||||
[lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
|
||||
])
|
23
src/Onigmo/m4/ltversion.m4
vendored
Normal file
23
src/Onigmo/m4/ltversion.m4
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# ltversion.m4 -- version numbers -*- Autoconf -*-
|
||||
#
|
||||
# Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# Generated from ltversion.in.
|
||||
|
||||
# serial 3017 ltversion.m4
|
||||
# This file is part of GNU Libtool
|
||||
|
||||
m4_define([LT_PACKAGE_VERSION], [2.2.6b])
|
||||
m4_define([LT_PACKAGE_REVISION], [1.3017])
|
||||
|
||||
AC_DEFUN([LTVERSION_VERSION],
|
||||
[macro_version='2.2.6b'
|
||||
macro_revision='1.3017'
|
||||
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
|
||||
_LT_DECL(, macro_revision, 0)
|
||||
])
|
92
src/Onigmo/m4/lt~obsolete.m4
vendored
Normal file
92
src/Onigmo/m4/lt~obsolete.m4
vendored
Normal file
@ -0,0 +1,92 @@
|
||||
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
|
||||
#
|
||||
# Copyright (C) 2004, 2005, 2007 Free Software Foundation, Inc.
|
||||
# Written by Scott James Remnant, 2004.
|
||||
#
|
||||
# This file is free software; the Free Software Foundation gives
|
||||
# unlimited permission to copy and/or distribute it, with or without
|
||||
# modifications, as long as this notice is preserved.
|
||||
|
||||
# serial 4 lt~obsolete.m4
|
||||
|
||||
# These exist entirely to fool aclocal when bootstrapping libtool.
|
||||
#
|
||||
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
|
||||
# which have later been changed to m4_define as they aren't part of the
|
||||
# exported API, or moved to Autoconf or Automake where they belong.
|
||||
#
|
||||
# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
|
||||
# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
|
||||
# using a macro with the same name in our local m4/libtool.m4 it'll
|
||||
# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
|
||||
# and doesn't know about Autoconf macros at all.)
|
||||
#
|
||||
# So we provide this file, which has a silly filename so it's always
|
||||
# included after everything else. This provides aclocal with the
|
||||
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
|
||||
# because those macros already exist, or will be overwritten later.
|
||||
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
|
||||
#
|
||||
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
|
||||
# Yes, that means every name once taken will need to remain here until
|
||||
# we give up compatibility with versions before 1.7, at which point
|
||||
# we need to keep only those names which we still refer to.
|
||||
|
||||
# This is to help aclocal find these macros, as it can't see m4_define.
|
||||
AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
|
||||
|
||||
m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
|
||||
m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
|
||||
m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
|
||||
m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
|
||||
m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
|
||||
m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
|
||||
m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
|
||||
m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
|
||||
m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
|
||||
m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
|
||||
m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
|
||||
m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
|
||||
m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
|
||||
m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
|
||||
m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
|
||||
m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
|
||||
m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
|
||||
m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
|
||||
m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
|
||||
m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
|
||||
m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
|
||||
m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
|
||||
m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
|
||||
m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
|
||||
m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
|
||||
m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
|
||||
m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
|
||||
m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
|
||||
m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
|
||||
m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
|
||||
m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
|
||||
m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
|
||||
m4_ifndef([AC_LIBTOOL_RC], [AC_DEFUN([AC_LIBTOOL_RC])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
|
||||
m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
|
||||
m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
|
||||
m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
|
376
src/Onigmo/missing
Normal file
376
src/Onigmo/missing
Normal file
@ -0,0 +1,376 @@
|
||||
#! /bin/sh
|
||||
# Common stub for a few missing GNU programs while installing.
|
||||
|
||||
scriptversion=2009-04-28.21; # UTC
|
||||
|
||||
# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006,
|
||||
# 2008, 2009 Free Software Foundation, Inc.
|
||||
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2, or (at your option)
|
||||
# any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# As a special exception to the GNU General Public License, if you
|
||||
# distribute this file as part of a program that contains a
|
||||
# configuration script generated by Autoconf, you may include it under
|
||||
# the same distribution terms that you use for the rest of that program.
|
||||
|
||||
if test $# -eq 0; then
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
run=:
|
||||
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
|
||||
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
|
||||
|
||||
# In the cases where this matters, `missing' is being run in the
|
||||
# srcdir already.
|
||||
if test -f configure.ac; then
|
||||
configure_ac=configure.ac
|
||||
else
|
||||
configure_ac=configure.in
|
||||
fi
|
||||
|
||||
msg="missing on your system"
|
||||
|
||||
case $1 in
|
||||
--run)
|
||||
# Try to run requested program, and just exit if it succeeds.
|
||||
run=
|
||||
shift
|
||||
"$@" && exit 0
|
||||
# Exit code 63 means version mismatch. This often happens
|
||||
# when the user try to use an ancient version of a tool on
|
||||
# a file that requires a minimum version. In this case we
|
||||
# we should proceed has if the program had been absent, or
|
||||
# if --run hadn't been passed.
|
||||
if test $? = 63; then
|
||||
run=:
|
||||
msg="probably too old"
|
||||
fi
|
||||
;;
|
||||
|
||||
-h|--h|--he|--hel|--help)
|
||||
echo "\
|
||||
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||
|
||||
Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
||||
error status if there is no known handling for PROGRAM.
|
||||
|
||||
Options:
|
||||
-h, --help display this help and exit
|
||||
-v, --version output version information and exit
|
||||
--run try to run the given command, and emulate it if it fails
|
||||
|
||||
Supported PROGRAM values:
|
||||
aclocal touch file \`aclocal.m4'
|
||||
autoconf touch file \`configure'
|
||||
autoheader touch file \`config.h.in'
|
||||
autom4te touch the output file, or create a stub one
|
||||
automake touch all \`Makefile.in' files
|
||||
bison create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
flex create \`lex.yy.c', if possible, from existing .c
|
||||
help2man touch the output file
|
||||
lex create \`lex.yy.c', if possible, from existing .c
|
||||
makeinfo touch the output file
|
||||
tar try tar, gnutar, gtar, then tar without non-portable flags
|
||||
yacc create \`y.tab.[ch]', if possible, from existing .[ch]
|
||||
|
||||
Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and
|
||||
\`g' are ignored when checking the name.
|
||||
|
||||
Send bug reports to <bug-automake@gnu.org>."
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||
echo "missing $scriptversion (GNU Automake)"
|
||||
exit $?
|
||||
;;
|
||||
|
||||
-*)
|
||||
echo 1>&2 "$0: Unknown \`$1' option"
|
||||
echo 1>&2 "Try \`$0 --help' for more information"
|
||||
exit 1
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
# normalize program name to check for.
|
||||
program=`echo "$1" | sed '
|
||||
s/^gnu-//; t
|
||||
s/^gnu//; t
|
||||
s/^g//; t'`
|
||||
|
||||
# Now exit if we have it, but it failed. Also exit now if we
|
||||
# don't have it and --version was passed (most likely to detect
|
||||
# the program). This is about non-GNU programs, so use $1 not
|
||||
# $program.
|
||||
case $1 in
|
||||
lex*|yacc*)
|
||||
# Not GNU programs, they don't have --version.
|
||||
;;
|
||||
|
||||
tar*)
|
||||
if test -n "$run"; then
|
||||
echo 1>&2 "ERROR: \`tar' requires --run"
|
||||
exit 1
|
||||
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
||||
# We have it, but it failed.
|
||||
exit 1
|
||||
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||
# Could not run --version or --help. This is probably someone
|
||||
# running `$TOOL --version' or `$TOOL --help' to check whether
|
||||
# $TOOL exists and not knowing $TOOL uses missing.
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# If it does not exist, or fails to run (possibly an outdated version),
|
||||
# try to emulate it.
|
||||
case $program in
|
||||
aclocal*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acinclude.m4' or \`${configure_ac}'. You might want
|
||||
to install the \`Automake' and \`Perl' packages. Grab them from
|
||||
any GNU archive site."
|
||||
touch aclocal.m4
|
||||
;;
|
||||
|
||||
autoconf*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`${configure_ac}'. You might want to install the
|
||||
\`Autoconf' and \`GNU m4' packages. Grab them from any GNU
|
||||
archive site."
|
||||
touch configure
|
||||
;;
|
||||
|
||||
autoheader*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`acconfig.h' or \`${configure_ac}'. You might want
|
||||
to install the \`Autoconf' and \`GNU m4' packages. Grab them
|
||||
from any GNU archive site."
|
||||
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
||||
test -z "$files" && files="config.h"
|
||||
touch_files=
|
||||
for f in $files; do
|
||||
case $f in
|
||||
*:*) touch_files="$touch_files "`echo "$f" |
|
||||
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
||||
*) touch_files="$touch_files $f.in";;
|
||||
esac
|
||||
done
|
||||
touch $touch_files
|
||||
;;
|
||||
|
||||
automake*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
|
||||
You might want to install the \`Automake' and \`Perl' packages.
|
||||
Grab them from any GNU archive site."
|
||||
find . -type f -name Makefile.am -print |
|
||||
sed 's/\.am$/.in/' |
|
||||
while read f; do touch "$f"; done
|
||||
;;
|
||||
|
||||
autom4te*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, but is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them.
|
||||
You can get \`$1' as part of \`Autoconf' from any GNU
|
||||
archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo "#! /bin/sh"
|
||||
echo "# Created by GNU Automake missing as a replacement of"
|
||||
echo "# $ $@"
|
||||
echo "exit 0"
|
||||
chmod +x $file
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
bison*|yacc*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' $msg. You should only need it if
|
||||
you modified a \`.y' file. You may need the \`Bison' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Bison' from any GNU archive site."
|
||||
rm -f y.tab.c y.tab.h
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG="\${$#}"
|
||||
case $LASTARG in
|
||||
*.y)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.c
|
||||
fi
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" y.tab.h
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f y.tab.h; then
|
||||
echo >y.tab.h
|
||||
fi
|
||||
if test ! -f y.tab.c; then
|
||||
echo 'main() { return 0; }' >y.tab.c
|
||||
fi
|
||||
;;
|
||||
|
||||
lex*|flex*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.l' file. You may need the \`Flex' package
|
||||
in order for those modifications to take effect. You can get
|
||||
\`Flex' from any GNU archive site."
|
||||
rm -f lex.yy.c
|
||||
if test $# -ne 1; then
|
||||
eval LASTARG="\${$#}"
|
||||
case $LASTARG in
|
||||
*.l)
|
||||
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
||||
if test -f "$SRCFILE"; then
|
||||
cp "$SRCFILE" lex.yy.c
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if test ! -f lex.yy.c; then
|
||||
echo 'main() { return 0; }' >lex.yy.c
|
||||
fi
|
||||
;;
|
||||
|
||||
help2man*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a dependency of a manual page. You may need the
|
||||
\`Help2man' package in order for those modifications to take
|
||||
effect. You can get \`Help2man' from any GNU archive site."
|
||||
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -f "$file"; then
|
||||
touch $file
|
||||
else
|
||||
test -z "$file" || exec >$file
|
||||
echo ".ab help2man is required to generate this page"
|
||||
exit $?
|
||||
fi
|
||||
;;
|
||||
|
||||
makeinfo*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is $msg. You should only need it if
|
||||
you modified a \`.texi' or \`.texinfo' file, or any other file
|
||||
indirectly affecting the aspect of the manual. The spurious
|
||||
call might also be the consequence of using a buggy \`make' (AIX,
|
||||
DU, IRIX). You might want to install the \`Texinfo' package or
|
||||
the \`GNU make' package. Grab either from any GNU archive site."
|
||||
# The file to touch is that specified with -o ...
|
||||
file=`echo "$*" | sed -n "$sed_output"`
|
||||
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||
if test -z "$file"; then
|
||||
# ... or it is the one specified with @setfilename ...
|
||||
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
||||
file=`sed -n '
|
||||
/^@setfilename/{
|
||||
s/.* \([^ ]*\) *$/\1/
|
||||
p
|
||||
q
|
||||
}' $infile`
|
||||
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
||||
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
||||
fi
|
||||
# If the file does not exist, the user really needs makeinfo;
|
||||
# let's fail without touching anything.
|
||||
test -f $file || exit 1
|
||||
touch $file
|
||||
;;
|
||||
|
||||
tar*)
|
||||
shift
|
||||
|
||||
# We have already tried tar in the generic part.
|
||||
# Look for gnutar/gtar before invocation to avoid ugly error
|
||||
# messages.
|
||||
if (gnutar --version > /dev/null 2>&1); then
|
||||
gnutar "$@" && exit 0
|
||||
fi
|
||||
if (gtar --version > /dev/null 2>&1); then
|
||||
gtar "$@" && exit 0
|
||||
fi
|
||||
firstarg="$1"
|
||||
if shift; then
|
||||
case $firstarg in
|
||||
*o*)
|
||||
firstarg=`echo "$firstarg" | sed s/o//`
|
||||
tar "$firstarg" "$@" && exit 0
|
||||
;;
|
||||
esac
|
||||
case $firstarg in
|
||||
*h*)
|
||||
firstarg=`echo "$firstarg" | sed s/h//`
|
||||
tar "$firstarg" "$@" && exit 0
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
echo 1>&2 "\
|
||||
WARNING: I can't seem to be able to run \`tar' with the given arguments.
|
||||
You may want to install GNU tar or Free paxutils, or check the
|
||||
command line arguments."
|
||||
exit 1
|
||||
;;
|
||||
|
||||
*)
|
||||
echo 1>&2 "\
|
||||
WARNING: \`$1' is needed, and is $msg.
|
||||
You might have modified some files without having the
|
||||
proper tools for further handling them. Check the \`README' file,
|
||||
it often tells you about the needed prerequisites for installing
|
||||
this package. You may also peek at any GNU archive site, in case
|
||||
some other package would contain this missing \`$1' program."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
78
src/Onigmo/onig-config.in
Normal file
78
src/Onigmo/onig-config.in
Normal file
@ -0,0 +1,78 @@
|
||||
#!/bin/sh
|
||||
# Copyright (C) 2006 K.Kosako (sndgk393 AT ybb DOT ne DOT jp)
|
||||
|
||||
ONIG_VERSION=@PACKAGE_VERSION@
|
||||
|
||||
show_usage()
|
||||
{
|
||||
cat <<EOF
|
||||
Usage: onig-config [OPTION]
|
||||
|
||||
Values for OPTION are:
|
||||
--prefix[=DIR] change prefix to DIR
|
||||
--prefix print prefix
|
||||
--exec-prefix[=DIR] change exec_prefix to DIR
|
||||
--exec-prefix print exec_prefix
|
||||
--cflags print C compiler flags
|
||||
--libs print library information
|
||||
--version print oniguruma version
|
||||
--help print this help
|
||||
|
||||
EOF
|
||||
|
||||
exit 1
|
||||
}
|
||||
|
||||
if test $# -eq 0; then
|
||||
show_usage
|
||||
fi
|
||||
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
is_set_exec_prefix=no
|
||||
|
||||
while test $# -gt 0; do
|
||||
case "$1" in
|
||||
-*=*) val=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'`
|
||||
;;
|
||||
*) val=
|
||||
;;
|
||||
esac
|
||||
|
||||
case $1 in
|
||||
--prefix=*)
|
||||
prefix=$val
|
||||
if test $is_set_exec_prefix = no ; then
|
||||
exec_prefix=$val
|
||||
fi
|
||||
;;
|
||||
--prefix)
|
||||
echo $prefix
|
||||
;;
|
||||
--exec-prefix=*)
|
||||
exec_prefix=$val
|
||||
is_set_exec_prefix=yes
|
||||
;;
|
||||
--exec-prefix)
|
||||
echo $exec_prefix
|
||||
;;
|
||||
--cflags)
|
||||
if test @includedir@ != /usr/include ; then
|
||||
show_includedir=-I@includedir@
|
||||
fi
|
||||
echo $show_includedir
|
||||
;;
|
||||
--libs)
|
||||
echo -L@libdir@ -lonig
|
||||
;;
|
||||
--version)
|
||||
echo $ONIG_VERSION
|
||||
;;
|
||||
*)
|
||||
show_usage
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# END
|
462
src/Onigmo/onig.py
Normal file
462
src/Onigmo/onig.py
Normal file
@ -0,0 +1,462 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""Using Onigmo (Oniguruma-mod) regular expression library.
|
||||
|
||||
This is a low level wrapper for Onigmo regular expression DLL/shared object.
|
||||
(This module does not support static link library.)
|
||||
This provides almost same API as the original C API, so the API is not
|
||||
object oriented.
|
||||
|
||||
Onigmo DLL (onig.dll, libonig.so, etc.) must be placed in the
|
||||
default search path. The default search path depends on the system.
|
||||
"""
|
||||
|
||||
import ctypes
|
||||
import os
|
||||
import sys
|
||||
|
||||
#__all__ = ["onig_new", "onig_free",
|
||||
# "onig_search", "onig_match",
|
||||
# "onig_region_new", "onig_region_free",
|
||||
# "onig_version", "onig_copyright"]
|
||||
|
||||
|
||||
#
|
||||
# Type Definitions
|
||||
#
|
||||
|
||||
OnigCodePoint = ctypes.c_uint
|
||||
|
||||
class OnigRegexType(ctypes.Structure):
|
||||
_fields_ = [
|
||||
]
|
||||
regex_t = OnigRegexType
|
||||
OnigRegex = ctypes.POINTER(OnigRegexType)
|
||||
|
||||
try:
|
||||
# Python 2.7
|
||||
_c_ssize_t = ctypes.c_ssize_t
|
||||
except AttributeError:
|
||||
# Python 2.6
|
||||
if ctypes.sizeof(ctypes.c_int) == ctypes.sizeof(ctypes.c_void_p):
|
||||
_c_ssize_t = ctypes.c_int
|
||||
elif ctypes.sizeof(ctypes.c_long) == ctypes.sizeof(ctypes.c_void_p):
|
||||
_c_ssize_t = ctypes.c_long
|
||||
elif ctypes.sizeof(ctypes.c_longlong) == ctypes.sizeof(ctypes.c_void_p):
|
||||
_c_ssize_t = ctypes.c_longlong
|
||||
|
||||
class OnigRegion(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("allocated", ctypes.c_int),
|
||||
("num_regs", ctypes.c_int),
|
||||
("beg", ctypes.POINTER(_c_ssize_t)),
|
||||
("end", ctypes.POINTER(_c_ssize_t)),
|
||||
("history_root",ctypes.c_void_p),
|
||||
]
|
||||
re_registers = OnigRegion
|
||||
|
||||
OnigOptionType = ctypes.c_int
|
||||
|
||||
class OnigEncodingType(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("mbc_enc_len", ctypes.c_void_p),
|
||||
("name", ctypes.c_char_p),
|
||||
("max_enc_len", ctypes.c_int),
|
||||
("min_enc_len", ctypes.c_int),
|
||||
("is_mbc_newline", ctypes.c_void_p),
|
||||
("mbc_to_code", ctypes.c_void_p),
|
||||
("code_to_mbclen", ctypes.c_void_p),
|
||||
("code_to_mbc", ctypes.c_void_p),
|
||||
("mbc_case_fold", ctypes.c_void_p),
|
||||
("apply_all_case_fold", ctypes.c_void_p),
|
||||
("get_case_fold_codes_by_str", ctypes.c_void_p),
|
||||
("property_name_to_ctype", ctypes.c_void_p),
|
||||
("is_code_ctype", ctypes.c_void_p),
|
||||
("get_ctype_code_range", ctypes.c_void_p),
|
||||
("left_adjust_char_head", ctypes.c_void_p),
|
||||
("is_allowed_reverse_match",ctypes.c_void_p),
|
||||
]
|
||||
OnigEncoding = ctypes.POINTER(OnigEncodingType)
|
||||
|
||||
class OnigMetaCharTableType(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("esc", OnigCodePoint),
|
||||
("anychar", OnigCodePoint),
|
||||
("anytime", OnigCodePoint),
|
||||
("zero_or_one_time",OnigCodePoint),
|
||||
("one_or_one_time", OnigCodePoint),
|
||||
("anychar_anytime", OnigCodePoint),
|
||||
]
|
||||
|
||||
class OnigSyntaxType(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("op", ctypes.c_uint),
|
||||
("op2", ctypes.c_uint),
|
||||
("behavior", ctypes.c_uint),
|
||||
("options", OnigOptionType),
|
||||
("meta_char_table", OnigMetaCharTableType),
|
||||
]
|
||||
|
||||
class OnigErrorInfo(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("enc", OnigEncoding),
|
||||
("par", ctypes.c_char_p),
|
||||
("par_end", ctypes.c_char_p),
|
||||
]
|
||||
|
||||
|
||||
# load the DLL or the shared library
|
||||
|
||||
if os.name in ("nt", "ce"):
|
||||
_libname = "onig.dll"
|
||||
elif sys.platform == "cygwin":
|
||||
_libname = "libonig.dll"
|
||||
else:
|
||||
_libname = "libonig.so"
|
||||
|
||||
libonig = ctypes.cdll.LoadLibrary(_libname)
|
||||
|
||||
#
|
||||
# Encodings
|
||||
#
|
||||
def _load_encoding(enc):
|
||||
return ctypes.pointer(OnigEncodingType.in_dll(libonig, enc))
|
||||
|
||||
ONIG_ENCODING_ASCII = _load_encoding("OnigEncodingASCII")
|
||||
ONIG_ENCODING_ISO_8859_1 = _load_encoding("OnigEncodingISO_8859_1")
|
||||
ONIG_ENCODING_ISO_8859_2 = _load_encoding("OnigEncodingISO_8859_2")
|
||||
ONIG_ENCODING_ISO_8859_3 = _load_encoding("OnigEncodingISO_8859_3")
|
||||
ONIG_ENCODING_ISO_8859_4 = _load_encoding("OnigEncodingISO_8859_4")
|
||||
ONIG_ENCODING_ISO_8859_5 = _load_encoding("OnigEncodingISO_8859_5")
|
||||
ONIG_ENCODING_ISO_8859_6 = _load_encoding("OnigEncodingISO_8859_6")
|
||||
ONIG_ENCODING_ISO_8859_7 = _load_encoding("OnigEncodingISO_8859_7")
|
||||
ONIG_ENCODING_ISO_8859_8 = _load_encoding("OnigEncodingISO_8859_8")
|
||||
ONIG_ENCODING_ISO_8859_9 = _load_encoding("OnigEncodingISO_8859_9")
|
||||
ONIG_ENCODING_ISO_8859_10 = _load_encoding("OnigEncodingISO_8859_10")
|
||||
ONIG_ENCODING_ISO_8859_11 = _load_encoding("OnigEncodingISO_8859_11")
|
||||
ONIG_ENCODING_ISO_8859_13 = _load_encoding("OnigEncodingISO_8859_13")
|
||||
ONIG_ENCODING_ISO_8859_14 = _load_encoding("OnigEncodingISO_8859_14")
|
||||
ONIG_ENCODING_ISO_8859_15 = _load_encoding("OnigEncodingISO_8859_15")
|
||||
ONIG_ENCODING_ISO_8859_16 = _load_encoding("OnigEncodingISO_8859_16")
|
||||
ONIG_ENCODING_UTF8 = _load_encoding("OnigEncodingUTF8")
|
||||
ONIG_ENCODING_UTF16_LE = _load_encoding("OnigEncodingUTF16_LE")
|
||||
ONIG_ENCODING_UTF16_BE = _load_encoding("OnigEncodingUTF16_BE")
|
||||
ONIG_ENCODING_UTF32_LE = _load_encoding("OnigEncodingUTF32_LE")
|
||||
ONIG_ENCODING_UTF32_BE = _load_encoding("OnigEncodingUTF32_BE")
|
||||
ONIG_ENCODING_EUC_JP = _load_encoding("OnigEncodingEUC_JP")
|
||||
ONIG_ENCODING_EUC_TW = _load_encoding("OnigEncodingEUC_TW")
|
||||
ONIG_ENCODING_EUC_KR = _load_encoding("OnigEncodingEUC_KR")
|
||||
ONIG_ENCODING_EUC_CN = _load_encoding("OnigEncodingEUC_CN")
|
||||
ONIG_ENCODING_SJIS = _load_encoding("OnigEncodingSJIS")
|
||||
try:
|
||||
ONIG_ENCODING_CP932 = _load_encoding("OnigEncodingCP932")
|
||||
except ValueError:
|
||||
pass
|
||||
#ONIG_ENCODING_KOI8 = _load_encoding("OnigEncodingKOI8")
|
||||
ONIG_ENCODING_KOI8_R = _load_encoding("OnigEncodingKOI8_R")
|
||||
ONIG_ENCODING_CP1251 = _load_encoding("OnigEncodingCP1251")
|
||||
ONIG_ENCODING_BIG5 = _load_encoding("OnigEncodingBIG5")
|
||||
ONIG_ENCODING_GB18030 = _load_encoding("OnigEncodingGB18030")
|
||||
|
||||
#ONIG_ENCODING_UNDEF = None
|
||||
|
||||
|
||||
#
|
||||
# Syntaxes
|
||||
#
|
||||
def _load_syntax(syn):
|
||||
return ctypes.pointer(OnigSyntaxType.in_dll(libonig, syn))
|
||||
|
||||
ONIG_SYNTAX_ASIS = _load_syntax("OnigSyntaxASIS")
|
||||
ONIG_SYNTAX_POSIX_BASIC = _load_syntax("OnigSyntaxPosixBasic")
|
||||
ONIG_SYNTAX_POSIX_EXTENDED = _load_syntax("OnigSyntaxPosixExtended")
|
||||
ONIG_SYNTAX_EMACS = _load_syntax("OnigSyntaxEmacs")
|
||||
ONIG_SYNTAX_GREP = _load_syntax("OnigSyntaxGrep")
|
||||
ONIG_SYNTAX_GNU_REGEX = _load_syntax("OnigSyntaxGnuRegex")
|
||||
ONIG_SYNTAX_JAVA = _load_syntax("OnigSyntaxJava")
|
||||
ONIG_SYNTAX_PERL = _load_syntax("OnigSyntaxPerl")
|
||||
try:
|
||||
ONIG_SYNTAX_PERL58 = _load_syntax("OnigSyntaxPerl58")
|
||||
ONIG_SYNTAX_PERL58_NG = _load_syntax("OnigSyntaxPerl58_NG")
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
ONIG_SYNTAX_PERL_NG = _load_syntax("OnigSyntaxPerl_NG")
|
||||
except ValueError:
|
||||
pass
|
||||
ONIG_SYNTAX_RUBY = _load_syntax("OnigSyntaxRuby")
|
||||
try:
|
||||
ONIG_SYNTAX_PYTHON = _load_syntax("OnigSyntaxPython")
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
ONIG_SYNTAX_DEFAULT = ctypes.POINTER(OnigSyntaxType).in_dll(
|
||||
libonig, "OnigDefaultSyntax")
|
||||
|
||||
|
||||
#
|
||||
# Constants
|
||||
#
|
||||
|
||||
ONIG_MAX_ERROR_MESSAGE_LEN = 90
|
||||
|
||||
# options
|
||||
ONIG_OPTION_NONE = 0
|
||||
ONIG_OPTION_IGNORECASE = 1
|
||||
ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1)
|
||||
ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1)
|
||||
ONIG_OPTION_DOTALL = ONIG_OPTION_MULTILINE
|
||||
ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1)
|
||||
ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1)
|
||||
ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
# options (search time)
|
||||
ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1)
|
||||
ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1)
|
||||
# options (ctype range)
|
||||
ONIG_OPTION_ASCII_RANGE = (ONIG_OPTION_POSIX_REGION << 1)
|
||||
ONIG_OPTION_POSIX_BRACKET_ALL_RANGE = (ONIG_OPTION_ASCII_RANGE << 1)
|
||||
ONIG_OPTION_WORD_BOUND_ALL_RANGE = (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
# options (newline)
|
||||
ONIG_OPTION_NEWLINE_CRLF = (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
|
||||
ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE
|
||||
|
||||
|
||||
# syntax (operators)
|
||||
ONIG_SYN_OP_VARIABLE_META_CHARACTERS = (1<<0)
|
||||
ONIG_SYN_OP_DOT_ANYCHAR = (1<<1)
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF = (1<<2)
|
||||
ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF = (1<<3)
|
||||
ONIG_SYN_OP_PLUS_ONE_INF = (1<<4)
|
||||
ONIG_SYN_OP_ESC_PLUS_ONE_INF = (1<<5)
|
||||
ONIG_SYN_OP_QMARK_ZERO_ONE = (1<<6)
|
||||
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE = (1<<7)
|
||||
ONIG_SYN_OP_BRACE_INTERVAL = (1<<8)
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL = (1<<9)
|
||||
ONIG_SYN_OP_VBAR_ALT = (1<<10)
|
||||
ONIG_SYN_OP_ESC_VBAR_ALT = (1<<11)
|
||||
ONIG_SYN_OP_LPAREN_SUBEXP = (1<<12)
|
||||
ONIG_SYN_OP_ESC_LPAREN_SUBEXP = (1<<13)
|
||||
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR = (1<<14)
|
||||
ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR = (1<<15)
|
||||
ONIG_SYN_OP_DECIMAL_BACKREF = (1<<16)
|
||||
ONIG_SYN_OP_BRACKET_CC = (1<<17)
|
||||
ONIG_SYN_OP_ESC_W_WORD = (1<<18)
|
||||
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END = (1<<19)
|
||||
ONIG_SYN_OP_ESC_B_WORD_BOUND = (1<<20)
|
||||
ONIG_SYN_OP_ESC_S_WHITE_SPACE = (1<<21)
|
||||
ONIG_SYN_OP_ESC_D_DIGIT = (1<<22)
|
||||
ONIG_SYN_OP_LINE_ANCHOR = (1<<23)
|
||||
ONIG_SYN_OP_POSIX_BRACKET = (1<<24)
|
||||
ONIG_SYN_OP_QMARK_NON_GREEDY = (1<<25)
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS = (1<<26)
|
||||
ONIG_SYN_OP_ESC_C_CONTROL = (1<<27)
|
||||
ONIG_SYN_OP_ESC_OCTAL3 = (1<<28)
|
||||
ONIG_SYN_OP_ESC_X_HEX2 = (1<<29)
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 = (1<<30)
|
||||
ONIG_SYN_OP_ESC_O_BRACE_OCTAL = (1<<31)
|
||||
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE = (1<<0)
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT = (1<<1)
|
||||
ONIG_SYN_OP2_OPTION_PERL = (1<<2)
|
||||
ONIG_SYN_OP2_OPTION_RUBY = (1<<3)
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT = (1<<4)
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5)
|
||||
ONIG_SYN_OP2_CCLASS_SET_OP = (1<<6)
|
||||
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP = (1<<7)
|
||||
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF = (1<<8)
|
||||
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL = (1<<9)
|
||||
ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY = (1<<10)
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11)
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META = (1<<12)
|
||||
ONIG_SYN_OP2_ESC_V_VTAB = (1<<13)
|
||||
ONIG_SYN_OP2_ESC_U_HEX4 = (1<<14)
|
||||
ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR = (1<<15)
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16)
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17)
|
||||
#ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18)
|
||||
ONIG_SYN_OP2_ESC_H_XDIGIT = (1<<19)
|
||||
ONIG_SYN_OP2_INEFFECTIVE_ESCAPE = (1<<20)
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK = (1<<21)
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER = (1<<22)
|
||||
ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE = (1<<23)
|
||||
ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE = (1<<24)
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP = (1<<25)
|
||||
ONIG_SYN_OP2_ESC_G_BRACE_BACKREF = (1<<26)
|
||||
ONIG_SYN_OP2_QMARK_SUBEXP_CALL = (1<<27)
|
||||
ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET = (1<<28)
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION = (1<<29)
|
||||
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP = (1<<30)
|
||||
ONIG_SYN_OP2_OPTION_JAVA = (1<<31)
|
||||
|
||||
# syntax (behavior)
|
||||
ONIG_SYN_CONTEXT_INDEP_ANCHORS = (1<<31)
|
||||
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS = (1<<0)
|
||||
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS = (1<<1)
|
||||
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2)
|
||||
ONIG_SYN_ALLOW_INVALID_INTERVAL = (1<<3)
|
||||
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV = (1<<4)
|
||||
ONIG_SYN_STRICT_CHECK_BACKREF = (1<<5)
|
||||
ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6)
|
||||
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP = (1<<7)
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8)
|
||||
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9)
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL = (1<<10)
|
||||
|
||||
# (behavior) in char class [...]
|
||||
ONIG_SYN_POSIX_BRACKET_ALWAYS_ALL_RANGE = (1<<19)
|
||||
ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20)
|
||||
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC = (1<<21)
|
||||
ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC = (1<<22)
|
||||
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23)
|
||||
# syntax (behavior) warning
|
||||
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED = (1<<24)
|
||||
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT = (1<<25)
|
||||
|
||||
# meta character specifiers (onig_set_meta_char())
|
||||
ONIG_META_CHAR_ESCAPE = 0
|
||||
ONIG_META_CHAR_ANYCHAR = 1
|
||||
ONIG_META_CHAR_ANYTIME = 2
|
||||
ONIG_META_CHAR_ZERO_OR_ONE_TIME = 3
|
||||
ONIG_META_CHAR_ONE_OR_MORE_TIME = 4
|
||||
ONIG_META_CHAR_ANYCHAR_ANYTIME = 5
|
||||
|
||||
ONIG_INEFFECTIVE_META_CHAR = 0
|
||||
|
||||
|
||||
# error codes
|
||||
def ONIG_IS_PATTERN_ERROR(ecode):
|
||||
return ((ecode) <= -100 and (ecode) > -1000)
|
||||
# normal return
|
||||
ONIG_NORMAL = 0
|
||||
ONIG_MISMATCH = -1
|
||||
ONIG_NO_SUPPORT_CONFIG = -2
|
||||
# internal error
|
||||
# general error
|
||||
ONIGERR_INVALID_ARGUMENT = -30
|
||||
# syntax error
|
||||
# values error (syntax error)
|
||||
# errors related to thread
|
||||
ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT = -1001
|
||||
|
||||
|
||||
#
|
||||
# Onigmo APIs
|
||||
#
|
||||
|
||||
# onig_init
|
||||
onig_init = libonig.onig_init
|
||||
|
||||
# onig_error_code_to_str
|
||||
libonig.onig_error_code_to_str.argtypes = [ctypes.c_char_p, ctypes.c_int,
|
||||
ctypes.POINTER(OnigErrorInfo)]
|
||||
def onig_error_code_to_str(err_buf, err_code, err_info=None):
|
||||
return libonig.onig_error_code_to_str(err_buf, err_code, err_info)
|
||||
|
||||
# onig_set_warn_func
|
||||
# onig_set_verb_warn_func
|
||||
|
||||
# onig_new
|
||||
libonig.onig_new.argtypes = [ctypes.POINTER(OnigRegex),
|
||||
ctypes.c_void_p, ctypes.c_void_p,
|
||||
OnigOptionType, OnigEncoding, ctypes.POINTER(OnigSyntaxType),
|
||||
ctypes.POINTER(OnigErrorInfo)]
|
||||
onig_new = libonig.onig_new
|
||||
|
||||
# onig_reg_init
|
||||
# onig_new_without_alloc
|
||||
# onig_new_deluxe
|
||||
|
||||
# onig_free
|
||||
libonig.onig_free.argtypes = [OnigRegex]
|
||||
onig_free = libonig.onig_free
|
||||
|
||||
# onig_free_body
|
||||
# onig_recompile
|
||||
# onig_recompile_deluxe
|
||||
|
||||
# onig_search
|
||||
libonig.onig_search.argtypes = [OnigRegex,
|
||||
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
|
||||
ctypes.POINTER(OnigRegion), OnigOptionType]
|
||||
libonig.onig_search.restype = _c_ssize_t
|
||||
onig_search = libonig.onig_search
|
||||
|
||||
# onig_search_gpos
|
||||
|
||||
# onig_match
|
||||
libonig.onig_match.argtypes = [OnigRegex,
|
||||
ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
|
||||
ctypes.POINTER(OnigRegion), OnigOptionType]
|
||||
libonig.onig_match.restype = _c_ssize_t
|
||||
onig_match = libonig.onig_match
|
||||
|
||||
# onig_region_new
|
||||
libonig.onig_region_new.argtypes = []
|
||||
libonig.onig_region_new.restype = ctypes.POINTER(OnigRegion)
|
||||
onig_region_new = libonig.onig_region_new
|
||||
|
||||
# onig_region_init
|
||||
|
||||
# onig_region_free
|
||||
libonig.onig_region_free.argtypes = [ctypes.POINTER(OnigRegion), ctypes.c_int]
|
||||
onig_region_free = libonig.onig_region_free
|
||||
|
||||
# onig_region_copy
|
||||
# onig_region_clear
|
||||
# onig_region_resize
|
||||
# onig_region_set
|
||||
# onig_name_to_group_numbers
|
||||
# onig_name_to_backref_number
|
||||
# onig_foreach_name
|
||||
# onig_number_of_names
|
||||
# onig_number_of_captures
|
||||
# onig_number_of_capture_histories
|
||||
# onig_get_capture_tree
|
||||
# onig_capture_tree_traverse
|
||||
# onig_noname_group_capture_is_active
|
||||
# onig_get_encoding
|
||||
# onig_get_options
|
||||
# onig_get_case_fold_flag
|
||||
# onig_get_syntax
|
||||
# onig_set_default_syntax
|
||||
|
||||
# onig_copy_syntax
|
||||
libonig.onig_copy_syntax.argtypes = [ctypes.POINTER(OnigSyntaxType),
|
||||
ctypes.POINTER(OnigSyntaxType)]
|
||||
onig_copy_syntax = libonig.onig_copy_syntax
|
||||
|
||||
# onig_get_syntax_op
|
||||
# onig_get_syntax_op2
|
||||
# onig_get_syntax_behavior
|
||||
# onig_get_syntax_options
|
||||
# onig_set_syntax_op
|
||||
# onig_set_syntax_op2
|
||||
# onig_set_syntax_behavior
|
||||
# onig_set_syntax_options
|
||||
# onig_set_meta_char
|
||||
# onig_copy_encoding
|
||||
# onig_get_default_case_fold_flag
|
||||
# onig_set_default_case_fold_flag
|
||||
# onig_get_match_stack_limit_size
|
||||
# onig_set_match_stack_limit_size
|
||||
|
||||
# onig_end
|
||||
libonig.onig_end.argtypes = []
|
||||
onig_end = libonig.onig_end
|
||||
|
||||
# onig_version
|
||||
libonig.onig_version.argtypes = []
|
||||
libonig.onig_version.restype = ctypes.c_char_p
|
||||
def onig_version():
|
||||
return libonig.onig_version().decode()
|
||||
|
||||
# onig_copyright
|
||||
libonig.onig_copyright.argtypes = []
|
||||
libonig.onig_copyright.restype = ctypes.c_char_p
|
||||
def onig_copyright():
|
||||
return libonig.onig_copyright().decode()
|
85
src/Onigmo/oniggnu.h
Normal file
85
src/Onigmo/oniggnu.h
Normal file
@ -0,0 +1,85 @@
|
||||
#ifndef ONIGGNU_H
|
||||
#define ONIGGNU_H
|
||||
/**********************************************************************
|
||||
oniggnu.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "oniguruma.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define RE_MBCTYPE_ASCII 0
|
||||
#define RE_MBCTYPE_EUC 1
|
||||
#define RE_MBCTYPE_SJIS 2
|
||||
#define RE_MBCTYPE_UTF8 3
|
||||
|
||||
/* GNU regex options */
|
||||
#ifndef RE_NREGS
|
||||
#define RE_NREGS ONIG_NREGION
|
||||
#endif
|
||||
|
||||
#define RE_OPTION_IGNORECASE ONIG_OPTION_IGNORECASE
|
||||
#define RE_OPTION_EXTENDED ONIG_OPTION_EXTEND
|
||||
#define RE_OPTION_MULTILINE ONIG_OPTION_MULTILINE
|
||||
#define RE_OPTION_SINGLELINE ONIG_OPTION_SINGLELINE
|
||||
#define RE_OPTION_LONGEST ONIG_OPTION_FIND_LONGEST
|
||||
#define RE_OPTION_POSIXLINE (RE_OPTION_MULTILINE|RE_OPTION_SINGLELINE)
|
||||
#define RE_OPTION_FIND_NOT_EMPTY ONIG_OPTION_FIND_NOT_EMPTY
|
||||
#define RE_OPTION_NEGATE_SINGLELINE ONIG_OPTION_NEGATE_SINGLELINE
|
||||
#define RE_OPTION_DONT_CAPTURE_GROUP ONIG_OPTION_DONT_CAPTURE_GROUP
|
||||
#define RE_OPTION_CAPTURE_GROUP ONIG_OPTION_CAPTURE_GROUP
|
||||
|
||||
|
||||
ONIG_EXTERN
|
||||
void re_mbcinit P_((int));
|
||||
ONIG_EXTERN
|
||||
int re_compile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
|
||||
ONIG_EXTERN
|
||||
int re_recompile_pattern P_((const char*, int, struct re_pattern_buffer*, char* err_buf));
|
||||
ONIG_EXTERN
|
||||
void re_free_pattern P_((struct re_pattern_buffer*));
|
||||
ONIG_EXTERN
|
||||
int re_adjust_startpos P_((struct re_pattern_buffer*, const char*, int, int, int));
|
||||
ONIG_EXTERN
|
||||
int re_search P_((struct re_pattern_buffer*, const char*, int, int, int, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
int re_match P_((struct re_pattern_buffer*, const char *, int, int, struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
void re_set_casetable P_((const char*));
|
||||
ONIG_EXTERN
|
||||
void re_free_registers P_((struct re_registers*));
|
||||
ONIG_EXTERN
|
||||
int re_alloc_pattern P_((struct re_pattern_buffer**)); /* added */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ONIGGNU_H */
|
169
src/Onigmo/onigposix.h
Normal file
169
src/Onigmo/onigposix.h
Normal file
@ -0,0 +1,169 @@
|
||||
#ifndef ONIGPOSIX_H
|
||||
#define ONIGPOSIX_H
|
||||
/**********************************************************************
|
||||
onigposix.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* options */
|
||||
#define REG_ICASE (1<<0)
|
||||
#define REG_NEWLINE (1<<1)
|
||||
#define REG_NOTBOL (1<<2)
|
||||
#define REG_NOTEOL (1<<3)
|
||||
#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */
|
||||
#define REG_NOSUB (1<<5)
|
||||
|
||||
/* POSIX error codes */
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
|
||||
/* extended error codes */
|
||||
#define REG_EONIG_INTERNAL 14
|
||||
#define REG_EONIG_BADWC 15
|
||||
#define REG_EONIG_BADARG 16
|
||||
#define REG_EONIG_THREAD 17
|
||||
|
||||
/* character encodings (for reg_set_encoding()) */
|
||||
#define REG_POSIX_ENCODING_ASCII 0
|
||||
#define REG_POSIX_ENCODING_EUC_JP 1
|
||||
#define REG_POSIX_ENCODING_SJIS 2
|
||||
#define REG_POSIX_ENCODING_UTF8 3
|
||||
#define REG_POSIX_ENCODING_UTF16_BE 4
|
||||
#define REG_POSIX_ENCODING_UTF16_LE 5
|
||||
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* POSIX regex_t */
|
||||
typedef struct {
|
||||
void* onig; /* Oniguruma regex_t* */
|
||||
size_t re_nsub;
|
||||
int comp_options;
|
||||
} regex_t;
|
||||
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
# define P_(args) args
|
||||
#else
|
||||
# define P_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
#ifndef ONIGURUMA_H
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
/* syntax */
|
||||
typedef struct {
|
||||
unsigned int op;
|
||||
unsigned int op2;
|
||||
unsigned int behavior;
|
||||
OnigOptionType options; /* default option */
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
|
||||
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
|
||||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
/* default syntax */
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
||||
|
||||
ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
|
||||
ONIG_EXTERN const char* onig_version P_((void));
|
||||
ONIG_EXTERN const char* onig_copyright P_((void));
|
||||
|
||||
#endif /* ONIGURUMA_H */
|
||||
|
||||
|
||||
ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
|
||||
ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
|
||||
ONIG_EXTERN void regfree P_((regex_t* reg));
|
||||
ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
|
||||
|
||||
/* extended API */
|
||||
ONIG_EXTERN void reg_set_encoding P_((int enc));
|
||||
ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
|
||||
ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
|
||||
ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ONIGPOSIX_H */
|
866
src/Onigmo/oniguruma.h
Normal file
866
src/Onigmo/oniguruma.h
Normal file
@ -0,0 +1,866 @@
|
||||
#ifndef ONIGURUMA_H
|
||||
#define ONIGURUMA_H
|
||||
/**********************************************************************
|
||||
oniguruma.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 5
|
||||
#define ONIGURUMA_VERSION_MINOR 13
|
||||
#define ONIGURUMA_VERSION_TEENY 5
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
# define HAVE_PROTOTYPES 1
|
||||
# endif
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
|
||||
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
# define P_(args) args
|
||||
#else
|
||||
# define P_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef PV_
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
# define PV_(args) args
|
||||
#else
|
||||
# define PV_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(EXPORT) || defined(RUBY_EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
#include <stddef.h> /* for size_t */
|
||||
|
||||
/* PART: character encoding */
|
||||
|
||||
#ifndef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#define UChar OnigUChar
|
||||
#endif
|
||||
|
||||
typedef unsigned char OnigUChar;
|
||||
typedef unsigned int OnigCodePoint;
|
||||
typedef unsigned int OnigCtype;
|
||||
typedef size_t OnigDistance;
|
||||
typedef ptrdiff_t OnigPosition;
|
||||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
typedef unsigned int OnigCaseFoldType; /* case fold flag */
|
||||
|
||||
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
|
||||
|
||||
/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
|
||||
/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
|
||||
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
|
||||
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
|
||||
|
||||
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
|
||||
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
|
||||
|
||||
|
||||
#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
|
||||
/* 13 => Unicode:0x1ffc */
|
||||
|
||||
/* code range */
|
||||
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
|
||||
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
|
||||
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
|
||||
|
||||
typedef struct {
|
||||
int byte_len; /* argument(original) character(s) byte length */
|
||||
int code_len; /* number of code */
|
||||
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
|
||||
} OnigCaseFoldCodeItem;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint esc;
|
||||
OnigCodePoint anychar;
|
||||
OnigCodePoint anytime;
|
||||
OnigCodePoint zero_or_one_time;
|
||||
OnigCodePoint one_or_more_time;
|
||||
OnigCodePoint anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
|
||||
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
|
||||
|
||||
typedef struct OnigEncodingTypeST {
|
||||
int (*mbc_enc_len)(const OnigUChar* p);
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int min_enc_len;
|
||||
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
|
||||
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
|
||||
int (*code_to_mbclen)(OnigCodePoint code);
|
||||
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
|
||||
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
|
||||
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
|
||||
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
|
||||
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
|
||||
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
|
||||
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
|
||||
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
|
||||
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
|
||||
unsigned int flags;
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef OnigEncodingType* OnigEncoding;
|
||||
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingCP932;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
|
||||
ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
||||
|
||||
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
|
||||
#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
|
||||
#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
|
||||
#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
|
||||
#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
|
||||
#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
|
||||
#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
|
||||
#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
|
||||
#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
|
||||
#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
|
||||
#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
|
||||
#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
|
||||
#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
|
||||
#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
|
||||
#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
|
||||
#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
|
||||
#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8)
|
||||
#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE)
|
||||
#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE)
|
||||
#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE)
|
||||
#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE)
|
||||
#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
|
||||
#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
|
||||
#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
|
||||
#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
|
||||
#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS)
|
||||
#define ONIG_ENCODING_CP932 (&OnigEncodingCP932)
|
||||
#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8)
|
||||
#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
|
||||
#define ONIG_ENCODING_CP1251 (&OnigEncodingCP1251)
|
||||
#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
|
||||
#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
|
||||
|
||||
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
|
||||
|
||||
|
||||
/* work size */
|
||||
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
|
||||
#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
|
||||
/* 18: 6(max-byte) * 3(case-fold chars) */
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_NEWLINE 0
|
||||
#define ONIGENC_CTYPE_ALPHA 1
|
||||
#define ONIGENC_CTYPE_BLANK 2
|
||||
#define ONIGENC_CTYPE_CNTRL 3
|
||||
#define ONIGENC_CTYPE_DIGIT 4
|
||||
#define ONIGENC_CTYPE_GRAPH 5
|
||||
#define ONIGENC_CTYPE_LOWER 6
|
||||
#define ONIGENC_CTYPE_PRINT 7
|
||||
#define ONIGENC_CTYPE_PUNCT 8
|
||||
#define ONIGENC_CTYPE_SPACE 9
|
||||
#define ONIGENC_CTYPE_UPPER 10
|
||||
#define ONIGENC_CTYPE_XDIGIT 11
|
||||
#define ONIGENC_CTYPE_WORD 12
|
||||
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
|
||||
#define ONIGENC_CTYPE_ASCII 14
|
||||
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
|
||||
|
||||
/* flags */
|
||||
#define ONIGENC_FLAG_NONE 0U
|
||||
#define ONIGENC_FLAG_UNICODE 1U
|
||||
|
||||
|
||||
#define onig_enc_len(enc,p,end) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
|
||||
onigenc_ascii_is_code_ctype( \
|
||||
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD)
|
||||
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
|
||||
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
(enc)->left_adjust_char_head(start, s)
|
||||
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
|
||||
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
|
||||
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,n) \
|
||||
onigenc_step_back((enc),(start),(s),(n))
|
||||
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p)
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end))
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end))
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf)
|
||||
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
|
||||
(enc)->property_name_to_ctype(enc,p,end)
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype)
|
||||
|
||||
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
|
||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
|
||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
|
||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
|
||||
#define ONIGENC_IS_CODE_LOWER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
|
||||
#define ONIGENC_IS_CODE_UPPER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
|
||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
|
||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
|
||||
#define ONIGENC_IS_CODE_SPACE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
|
||||
#define ONIGENC_IS_CODE_BLANK(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
|
||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
|
||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
|
||||
#define ONIGENC_IS_CODE_WORD(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
|
||||
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
|
||||
(enc)->get_ctype_code_range(ctype,sbout,ranges)
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
|
||||
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
int onigenc_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onigenc_get_default_encoding P_((void));
|
||||
ONIG_EXTERN
|
||||
void onigenc_set_default_caseconv_table P_((const OnigUChar* table));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar** prev));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
|
||||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
|
||||
/* constants */
|
||||
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
|
||||
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
|
||||
|
||||
/* options */
|
||||
#define ONIG_OPTION_NONE 0U
|
||||
#define ONIG_OPTION_IGNORECASE 1U
|
||||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||||
#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
|
||||
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
|
||||
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
|
||||
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
|
||||
/* options (ctype range) */
|
||||
#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_POSIX_REGION << 1)
|
||||
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
|
||||
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
/* options (newline) */
|
||||
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
|
||||
|
||||
/* syntax */
|
||||
typedef struct {
|
||||
unsigned int op;
|
||||
unsigned int op2;
|
||||
unsigned int behavior;
|
||||
OnigOptionType options; /* default option */
|
||||
OnigMetaCharTableType meta_char_table;
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl58;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl58_NG;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPython;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
|
||||
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
|
||||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
|
||||
#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
|
||||
|
||||
/* default syntax */
|
||||
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
/* syntax (operators) */
|
||||
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
|
||||
#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
|
||||
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
|
||||
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
|
||||
#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
|
||||
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
|
||||
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
|
||||
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
|
||||
#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
|
||||
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
|
||||
#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
|
||||
#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
|
||||
#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
|
||||
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
|
||||
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
|
||||
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
|
||||
#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
|
||||
#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
|
||||
#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
|
||||
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
|
||||
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
|
||||
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
|
||||
#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
|
||||
#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
|
||||
#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
|
||||
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
|
||||
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
|
||||
#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
|
||||
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
|
||||
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
|
||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
|
||||
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */ /* NOTIMPL */
|
||||
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
|
||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
|
||||
#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
|
||||
#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
|
||||
#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
|
||||
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
|
||||
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
|
||||
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
|
||||
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
|
||||
#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
|
||||
#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
|
||||
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
|
||||
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
|
||||
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
|
||||
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X as (?>\P{M}\p{M}*) */
|
||||
#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
|
||||
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
|
||||
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
|
||||
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
|
||||
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
|
||||
#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
|
||||
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
|
||||
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
|
||||
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
|
||||
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
|
||||
#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
|
||||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
|
||||
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
|
||||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
|
||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
|
||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
|
||||
/* syntax (behavior) warning */
|
||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
|
||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
|
||||
|
||||
/* meta character specifiers (onig_set_meta_char()) */
|
||||
#define ONIG_META_CHAR_ESCAPE 0
|
||||
#define ONIG_META_CHAR_ANYCHAR 1
|
||||
#define ONIG_META_CHAR_ANYTIME 2
|
||||
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
|
||||
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
|
||||
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
|
||||
|
||||
#define ONIG_INEFFECTIVE_META_CHAR 0
|
||||
|
||||
/* error codes */
|
||||
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
|
||||
/* normal return */
|
||||
#define ONIG_NORMAL 0
|
||||
#define ONIG_MISMATCH -1
|
||||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||||
|
||||
/* internal error */
|
||||
#define ONIGERR_MEMORY -5
|
||||
#define ONIGERR_TYPE_BUG -6
|
||||
#define ONIGERR_PARSER_BUG -11
|
||||
#define ONIGERR_STACK_BUG -12
|
||||
#define ONIGERR_UNDEFINED_BYTECODE -13
|
||||
#define ONIGERR_UNEXPECTED_BYTECODE -14
|
||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
|
||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
|
||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
|
||||
/* general error */
|
||||
#define ONIGERR_INVALID_ARGUMENT -30
|
||||
/* syntax error */
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
|
||||
#define ONIGERR_EMPTY_CHAR_CLASS -102
|
||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
|
||||
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
|
||||
#define ONIGERR_END_PATTERN_AT_META -105
|
||||
#define ONIGERR_END_PATTERN_AT_CONTROL -106
|
||||
#define ONIGERR_META_CODE_SYNTAX -108
|
||||
#define ONIGERR_CONTROL_CODE_SYNTAX -109
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
|
||||
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
|
||||
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
|
||||
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
|
||||
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
|
||||
#define ONIGERR_END_PATTERN_IN_GROUP -118
|
||||
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
|
||||
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
|
||||
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
|
||||
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
|
||||
#define ONIGERR_INVALID_CONDITION_PATTERN -124
|
||||
/* values error (syntax error) */
|
||||
#define ONIGERR_TOO_BIG_NUMBER -200
|
||||
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
|
||||
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
|
||||
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
|
||||
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
|
||||
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
|
||||
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
|
||||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
|
||||
#define ONIGERR_INVALID_BACKREF -208
|
||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
|
||||
#define ONIGERR_TOO_SHORT_DIGITS -210
|
||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
|
||||
#define ONIGERR_EMPTY_GROUP_NAME -214
|
||||
#define ONIGERR_INVALID_GROUP_NAME -215
|
||||
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
|
||||
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
|
||||
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
|
||||
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
|
||||
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
|
||||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
|
||||
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
|
||||
|
||||
/* errors related to thread */
|
||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
|
||||
|
||||
|
||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
|
||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
||||
typedef struct OnigCaptureTreeNodeStruct {
|
||||
int group; /* group number */
|
||||
OnigPosition beg;
|
||||
OnigPosition end;
|
||||
int allocated;
|
||||
int num_childs;
|
||||
struct OnigCaptureTreeNodeStruct** childs;
|
||||
} OnigCaptureTreeNode;
|
||||
|
||||
/* match result region type */
|
||||
struct re_registers {
|
||||
int allocated;
|
||||
int num_regs;
|
||||
OnigPosition* beg;
|
||||
OnigPosition* end;
|
||||
/* extended */
|
||||
OnigCaptureTreeNode* history_root; /* capture history tree root */
|
||||
};
|
||||
|
||||
/* capture tree traverse */
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
|
||||
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
|
||||
|
||||
|
||||
#define ONIG_REGION_NOTPOS -1
|
||||
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
||||
typedef struct {
|
||||
OnigEncoding enc;
|
||||
OnigUChar* par;
|
||||
OnigUChar* par_end;
|
||||
} OnigErrorInfo;
|
||||
|
||||
typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
} OnigRepeatRange;
|
||||
|
||||
typedef void (*OnigWarnFunc) P_((const char* s));
|
||||
extern void onig_null_warn P_((const char* s));
|
||||
#define ONIG_NULL_WARN onig_null_warn
|
||||
|
||||
#define ONIG_CHAR_TABLE_SIZE 256
|
||||
|
||||
/* regex_t state */
|
||||
#define ONIG_STATE_NORMAL 0
|
||||
#define ONIG_STATE_SEARCHING 1
|
||||
#define ONIG_STATE_COMPILING -1
|
||||
#define ONIG_STATE_MODIFY -2
|
||||
|
||||
#define ONIG_STATE(reg) \
|
||||
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int state; /* normal, searching, compiling */
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* combination explosion check */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigDistance dmin; /* min-distance of exact or map */
|
||||
OnigDistance dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
} OnigRegexType;
|
||||
|
||||
typedef OnigRegexType* OnigRegex;
|
||||
|
||||
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
|
||||
typedef OnigRegexType regex_t;
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
int num_of_elements;
|
||||
OnigEncoding pattern_enc;
|
||||
OnigEncoding target_enc;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigOptionType option;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
/* Oniguruma Native API */
|
||||
ONIG_EXTERN
|
||||
int onig_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_error_code_to_str PV_((OnigUChar* s, OnigPosition err_code, ...));
|
||||
ONIG_EXTERN
|
||||
void onig_set_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
void onig_free P_((OnigRegex));
|
||||
ONIG_EXTERN
|
||||
void onig_free_body P_((OnigRegex));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new P_((void));
|
||||
ONIG_EXTERN
|
||||
void onig_region_init P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
void onig_region_free P_((OnigRegion* region, int free_self));
|
||||
ONIG_EXTERN
|
||||
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
|
||||
ONIG_EXTERN
|
||||
void onig_region_clear P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_region_resize P_((OnigRegion* region, int n));
|
||||
ONIG_EXTERN
|
||||
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
|
||||
ONIG_EXTERN
|
||||
int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_names P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_captures P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_capture_histories P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
int onig_noname_group_capture_is_active P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onig_get_encoding P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_options P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_syntax P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
|
||||
ONIG_EXTERN
|
||||
int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_match_stack_limit_size P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_match_stack_limit_size P_((unsigned int size));
|
||||
ONIG_EXTERN
|
||||
int onig_end P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_version P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_copyright P_((void));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ONIGURUMA_H */
|
14
src/Onigmo/oniguruma.pc.in
Normal file
14
src/Onigmo/oniguruma.pc.in
Normal file
@ -0,0 +1,14 @@
|
||||
prefix=@prefix@
|
||||
exec_prefix=@exec_prefix@
|
||||
libdir=@libdir@
|
||||
includedir=@includedir@
|
||||
datarootdir=@datarootdir@
|
||||
datadir=@datadir@
|
||||
|
||||
Name: oniguruma
|
||||
Description: Regular expression library
|
||||
Version: @PACKAGE_VERSION@
|
||||
Requires:
|
||||
Libs: -L${libdir} -lonig
|
||||
Cflags: -I${includedir}
|
||||
|
6677
src/Onigmo/regcomp.c
Normal file
6677
src/Onigmo/regcomp.c
Normal file
File diff suppressed because it is too large
Load Diff
933
src/Onigmo/regenc.c
Normal file
933
src/Onigmo/regenc.c
Normal file
@ -0,0 +1,933 @@
|
||||
/**********************************************************************
|
||||
regenc.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
|
||||
|
||||
extern int
|
||||
onigenc_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern OnigEncoding
|
||||
onigenc_get_default_encoding(void)
|
||||
{
|
||||
return OnigEncDefaultCharEncoding;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_set_default_encoding(OnigEncoding enc)
|
||||
{
|
||||
OnigEncDefaultCharEncoding = enc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
{
|
||||
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
if (p < s) {
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s, const UChar** prev)
|
||||
{
|
||||
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
|
||||
if (p < s) {
|
||||
if (prev) *prev = (const UChar* )p;
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
else {
|
||||
if (prev) *prev = (const UChar* )NULL; /* Sorry */
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
{
|
||||
if (s <= start)
|
||||
return (UChar* )NULL;
|
||||
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
|
||||
{
|
||||
while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
|
||||
if (s <= start)
|
||||
return (UChar* )NULL;
|
||||
|
||||
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
|
||||
}
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
|
||||
{
|
||||
UChar* q = (UChar* )p;
|
||||
while (n-- > 0) {
|
||||
q += ONIGENC_MBC_ENC_LEN(enc, q);
|
||||
}
|
||||
return (q <= end ? q : NULL);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
{
|
||||
int n = 0;
|
||||
UChar* q = (UChar* )p;
|
||||
|
||||
while (q < end) {
|
||||
q += ONIGENC_MBC_ENC_LEN(enc, q);
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_strlen_null(OnigEncoding enc, const UChar* s)
|
||||
{
|
||||
int n = 0;
|
||||
UChar* p = (UChar* )s;
|
||||
|
||||
while (1) {
|
||||
if (*p == '\0') {
|
||||
UChar* q;
|
||||
int len = ONIGENC_MBC_MINLEN(enc);
|
||||
|
||||
if (len == 1) return n;
|
||||
q = p + 1;
|
||||
while (len > 1) {
|
||||
if (*q != '\0') break;
|
||||
q++;
|
||||
len--;
|
||||
}
|
||||
if (len == 1) return n;
|
||||
}
|
||||
p += ONIGENC_MBC_ENC_LEN(enc, p);
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
|
||||
{
|
||||
UChar* start = (UChar* )s;
|
||||
UChar* p = (UChar* )s;
|
||||
|
||||
while (1) {
|
||||
if (*p == '\0') {
|
||||
UChar* q;
|
||||
int len = ONIGENC_MBC_MINLEN(enc);
|
||||
|
||||
if (len == 1) return (int )(p - start);
|
||||
q = p + 1;
|
||||
while (len > 1) {
|
||||
if (*q != '\0') break;
|
||||
q++;
|
||||
len--;
|
||||
}
|
||||
if (len == 1) return (int )(p - start);
|
||||
}
|
||||
p += ONIGENC_MBC_ENC_LEN(enc, p);
|
||||
}
|
||||
}
|
||||
|
||||
const UChar OnigEncAsciiToLowerCaseTable[] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
};
|
||||
|
||||
#ifdef USE_UPPER_CASE_TABLE
|
||||
const UChar OnigEncAsciiToUpperCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
};
|
||||
#endif
|
||||
|
||||
const unsigned short OnigEncAsciiCtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
#ifdef USE_UPPER_CASE_TABLE
|
||||
const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
|
||||
};
|
||||
#endif
|
||||
|
||||
extern void
|
||||
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
|
||||
{
|
||||
/* nothing */
|
||||
/* obsoleted. */
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
{
|
||||
return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
}
|
||||
|
||||
const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
|
||||
{ 0x41, 0x61 },
|
||||
{ 0x42, 0x62 },
|
||||
{ 0x43, 0x63 },
|
||||
{ 0x44, 0x64 },
|
||||
{ 0x45, 0x65 },
|
||||
{ 0x46, 0x66 },
|
||||
{ 0x47, 0x67 },
|
||||
{ 0x48, 0x68 },
|
||||
{ 0x49, 0x69 },
|
||||
{ 0x4a, 0x6a },
|
||||
{ 0x4b, 0x6b },
|
||||
{ 0x4c, 0x6c },
|
||||
{ 0x4d, 0x6d },
|
||||
{ 0x4e, 0x6e },
|
||||
{ 0x4f, 0x6f },
|
||||
{ 0x50, 0x70 },
|
||||
{ 0x51, 0x71 },
|
||||
{ 0x52, 0x72 },
|
||||
{ 0x53, 0x73 },
|
||||
{ 0x54, 0x74 },
|
||||
{ 0x55, 0x75 },
|
||||
{ 0x56, 0x76 },
|
||||
{ 0x57, 0x77 },
|
||||
{ 0x58, 0x78 },
|
||||
{ 0x59, 0x79 },
|
||||
{ 0x5a, 0x7a }
|
||||
};
|
||||
|
||||
extern int
|
||||
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
int i, r;
|
||||
|
||||
for (i = 0;
|
||||
i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
|
||||
i++) {
|
||||
code = OnigAsciiLowerMap[i].to;
|
||||
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = OnigAsciiLowerMap[i].from;
|
||||
r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
return 1;
|
||||
}
|
||||
else if (0x61 <= *p && *p <= 0x7a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
static OnigCodePoint ss[] = { 0x73, 0x73 };
|
||||
|
||||
return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_apply_all_case_fold_with_map(int map_size,
|
||||
const OnigPairCaseFoldCodes map[],
|
||||
int ess_tsett_flag, OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
int i, r;
|
||||
|
||||
r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (i = 0; i < map_size; i++) {
|
||||
code = map[i].to;
|
||||
r = (*f)(map[i].from, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = map[i].from;
|
||||
r = (*f)(map[i].to, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
if (ess_tsett_flag != 0)
|
||||
return ss_apply_all_case_fold(flag, f, arg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
||||
const OnigPairCaseFoldCodes map[],
|
||||
int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
|
||||
/* SS */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (0x61 <= *p && *p <= 0x7a) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
|
||||
/* ss */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
items[1].code[0] = (OnigCodePoint )0xdf;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
else if (*p == 0xdf && ess_tsett_flag != 0) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 2;
|
||||
items[0].code[0] = (OnigCodePoint )'s';
|
||||
items[0].code[1] = (OnigCodePoint )'s';
|
||||
|
||||
items[1].byte_len = 1;
|
||||
items[1].code_len = 2;
|
||||
items[1].code[0] = (OnigCodePoint )'S';
|
||||
items[1].code[1] = (OnigCodePoint )'S';
|
||||
|
||||
items[2].byte_len = 1;
|
||||
items[2].code_len = 2;
|
||||
items[2].code[0] = (OnigCodePoint )'s';
|
||||
items[2].code[1] = (OnigCodePoint )'S';
|
||||
|
||||
items[3].byte_len = 1;
|
||||
items[3].code_len = 2;
|
||||
items[3].code[0] = (OnigCodePoint )'S';
|
||||
items[3].code[1] = (OnigCodePoint )'s';
|
||||
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map_size; i++) {
|
||||
if (*p == map[i].from) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].to;
|
||||
return 1;
|
||||
}
|
||||
else if (*p == map[i].to) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].from;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
|
||||
OnigCodePoint* sb_out ARG_UNUSED,
|
||||
const OnigCodePoint* ranges[] ARG_UNUSED)
|
||||
{
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p < end) {
|
||||
if (*p == 0x0a) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
||||
const UChar*end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
|
||||
|
||||
(*p)++;
|
||||
return 1; /* return byte length of converted char to lower */
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern int
|
||||
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp)++;
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return (OnigCodePoint )(*p);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
|
||||
{
|
||||
return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
*buf = (UChar )(code & 0xff);
|
||||
return 1;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
|
||||
const UChar* s)
|
||||
{
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
{
|
||||
int c, i, len;
|
||||
OnigCodePoint n;
|
||||
|
||||
len = enclen(enc, p);
|
||||
n = (OnigCodePoint )(*p++);
|
||||
if (len == 1) return n;
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
if (p >= end) break;
|
||||
c = *p++;
|
||||
n <<= 8; n += c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED,
|
||||
UChar* lower)
|
||||
{
|
||||
int len;
|
||||
const UChar *p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
|
||||
(*pp)++;
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
|
||||
len = enclen(enc, p);
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted to lower char */
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern int
|
||||
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (ONIGENC_IS_MBC_ASCII(p)) {
|
||||
(*pp)++;
|
||||
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
|
||||
}
|
||||
|
||||
(*pp) += enclen(enc, p);
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & 0xff000000) != 0) return 4;
|
||||
else if ((code & 0xff0000) != 0) return 3;
|
||||
else if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff00) != 0) {
|
||||
*p++ = (UChar )((code >> 8) & 0xff);
|
||||
}
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (enclen(enc, buf) != (p - buf))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
#endif
|
||||
return (int )(p - buf);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar *p = buf;
|
||||
|
||||
if ((code & 0xff000000) != 0) {
|
||||
*p++ = (UChar )((code >> 24) & 0xff);
|
||||
}
|
||||
if ((code & 0xff0000) != 0 || p != buf) {
|
||||
*p++ = (UChar )((code >> 16) & 0xff);
|
||||
}
|
||||
if ((code & 0xff00) != 0 || p != buf) {
|
||||
*p++ = (UChar )((code >> 8) & 0xff);
|
||||
}
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
|
||||
#if 1
|
||||
if (enclen(enc, buf) != (p - buf))
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
#endif
|
||||
return (int )(p - buf);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
||||
{
|
||||
static const PosixBracketEntryType PBS[] = {
|
||||
{ (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
|
||||
{ (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
|
||||
{ (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
|
||||
{ (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
|
||||
{ (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
|
||||
{ (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
|
||||
{ (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
|
||||
{ (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
|
||||
{ (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
|
||||
{ (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
|
||||
{ (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
|
||||
{ (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
|
||||
{ (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
|
||||
{ (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
|
||||
{ (UChar* )NULL, -1, 0 }
|
||||
};
|
||||
|
||||
const PosixBracketEntryType *pb;
|
||||
int len;
|
||||
|
||||
len = onigenc_strlen(enc, p, end);
|
||||
for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
|
||||
if (len == pb->len &&
|
||||
onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
|
||||
return pb->ctype;
|
||||
}
|
||||
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
|
||||
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else {
|
||||
if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
|
||||
return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
|
||||
const UChar* sascii /* ascii */, int n)
|
||||
{
|
||||
int x, c;
|
||||
|
||||
while (n-- > 0) {
|
||||
if (p >= end) return (int )(*sascii);
|
||||
|
||||
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
x = *sascii - c;
|
||||
if (x) return x;
|
||||
|
||||
sascii++;
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
|
||||
const UChar* sascii /* ascii */, int n)
|
||||
{
|
||||
int x, c;
|
||||
|
||||
while (n-- > 0) {
|
||||
if (p >= end) return (int )(*sascii);
|
||||
|
||||
c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (ONIGENC_IS_ASCII_CODE(c))
|
||||
c = ONIGENC_ASCII_CODE_TO_LOWER_CASE(c);
|
||||
x = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*sascii) - c;
|
||||
if (x) return x;
|
||||
|
||||
sascii++;
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Property management */
|
||||
static int
|
||||
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
|
||||
{
|
||||
size_t size;
|
||||
const OnigCodePoint **list = *plist;
|
||||
|
||||
size = sizeof(OnigCodePoint*) * new_size;
|
||||
if (IS_NULL(list)) {
|
||||
list = (const OnigCodePoint** )xmalloc(size);
|
||||
}
|
||||
else {
|
||||
list = (const OnigCodePoint** )xrealloc((void* )list, size);
|
||||
}
|
||||
|
||||
if (IS_NULL(list)) return ONIGERR_MEMORY;
|
||||
|
||||
*plist = list;
|
||||
*psize = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
|
||||
hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
|
||||
int *psize)
|
||||
{
|
||||
#define PROP_INIT_SIZE 16
|
||||
|
||||
int r;
|
||||
|
||||
if (*psize <= *pnum) {
|
||||
int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
|
||||
r = resize_property_list(new_size, plist, psize);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
(*plist)[*pnum] = prop;
|
||||
|
||||
if (ONIG_IS_NULL(*table)) {
|
||||
*table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
|
||||
if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
|
||||
}
|
||||
|
||||
*pnum = *pnum + 1;
|
||||
onig_st_insert_strend(*table, name, name + strlen((char* )name),
|
||||
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_property_list_init(int (*f)(void))
|
||||
{
|
||||
int r;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
r = f();
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
return r;
|
||||
}
|
197
src/Onigmo/regenc.h
Normal file
197
src/Onigmo/regenc.h
Normal file
@ -0,0 +1,197 @@
|
||||
#ifndef REGENC_H
|
||||
#define REGENC_H
|
||||
/**********************************************************************
|
||||
regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef PACKAGE
|
||||
/* PACKAGE is defined in config.h */
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#include "oniguruma.h"
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint from;
|
||||
OnigCodePoint to;
|
||||
} OnigPairCaseFoldCodes;
|
||||
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void* )0)
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifndef ARG_UNUSED
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
|
||||
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
|
||||
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
/* character types bit flag */
|
||||
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
|
||||
#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
|
||||
#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
|
||||
#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
|
||||
#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
|
||||
#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
|
||||
#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
|
||||
#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
|
||||
#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
|
||||
#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
|
||||
#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
|
||||
#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
|
||||
#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
|
||||
#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
|
||||
#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
|
||||
|
||||
#define CTYPE_TO_BIT(ctype) (1<<(ctype))
|
||||
#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
|
||||
((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
|
||||
(ctype) == ONIGENC_CTYPE_PRINT)
|
||||
|
||||
|
||||
typedef struct {
|
||||
const UChar *name;
|
||||
int ctype;
|
||||
short int len;
|
||||
} PosixBracketEntryType;
|
||||
|
||||
|
||||
#define USE_CRNL_AS_LINE_TERMINATOR
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
/* for encoding system implementation (internal) */
|
||||
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
|
||||
|
||||
|
||||
/* methods for single byte encoding */
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
|
||||
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
|
||||
/* methods for multi byte encoding */
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
|
||||
|
||||
/* in enc/unicode.c */
|
||||
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
|
||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToLowerCaseTable[c]
|
||||
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToUpperCaseTable[c]
|
||||
|
||||
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
|
||||
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
|
||||
|
||||
ONIG_EXTERN int
|
||||
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
|
||||
ONIG_EXTERN int
|
||||
onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
|
||||
ONIG_EXTERN UChar*
|
||||
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
|
||||
|
||||
/* defined in regexec.c, but used in enc/xxx.c */
|
||||
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
|
||||
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
|
||||
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
|
||||
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
|
||||
|
||||
#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
|
||||
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
|
||||
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
|
||||
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
|
||||
((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
|
||||
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
|
||||
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
|
||||
|
||||
/* Check if the code is in the range. (from <= code && code <= to) */
|
||||
#define ONIGENC_IS_IN_RANGE(code, from, to) \
|
||||
((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
|
||||
|
||||
|
||||
#endif /* REGENC_H */
|
394
src/Onigmo/regerror.c
Normal file
394
src/Onigmo/regerror.c
Normal file
@ -0,0 +1,394 @@
|
||||
/**********************************************************************
|
||||
regerror.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
#include <stdio.h> /* for vsnprintf() */
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
#include <stdarg.h>
|
||||
#define va_init_list(a,b) va_start(a,b)
|
||||
#else
|
||||
#include <varargs.h>
|
||||
#define va_init_list(a,b) va_start(a)
|
||||
#endif
|
||||
|
||||
extern UChar*
|
||||
onig_error_code_to_format(OnigPosition code)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
if (code >= 0) return (UChar* )0;
|
||||
|
||||
switch (code) {
|
||||
case ONIG_MISMATCH:
|
||||
p = "mismatch"; break;
|
||||
case ONIG_NO_SUPPORT_CONFIG:
|
||||
p = "no support in this configuration"; break;
|
||||
case ONIGERR_MEMORY:
|
||||
p = "failed to allocate memory"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_TYPE_BUG:
|
||||
p = "undefined type (bug)"; break;
|
||||
case ONIGERR_PARSER_BUG:
|
||||
p = "internal parser error (bug)"; break;
|
||||
case ONIGERR_STACK_BUG:
|
||||
p = "stack error (bug)"; break;
|
||||
case ONIGERR_UNDEFINED_BYTECODE:
|
||||
p = "undefined bytecode (bug)"; break;
|
||||
case ONIGERR_UNEXPECTED_BYTECODE:
|
||||
p = "unexpected bytecode (bug)"; break;
|
||||
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
|
||||
p = "default multibyte-encoding is not set"; break;
|
||||
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
|
||||
p = "can't convert to wide-char on specified multibyte-encoding"; break;
|
||||
case ONIGERR_INVALID_ARGUMENT:
|
||||
p = "invalid argument"; break;
|
||||
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
|
||||
p = "end pattern at left brace"; break;
|
||||
case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
|
||||
p = "end pattern at left bracket"; break;
|
||||
case ONIGERR_EMPTY_CHAR_CLASS:
|
||||
p = "empty char-class"; break;
|
||||
case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
|
||||
p = "premature end of char-class"; break;
|
||||
case ONIGERR_END_PATTERN_AT_ESCAPE:
|
||||
p = "end pattern at escape"; break;
|
||||
case ONIGERR_END_PATTERN_AT_META:
|
||||
p = "end pattern at meta"; break;
|
||||
case ONIGERR_END_PATTERN_AT_CONTROL:
|
||||
p = "end pattern at control"; break;
|
||||
case ONIGERR_META_CODE_SYNTAX:
|
||||
p = "invalid meta-code syntax"; break;
|
||||
case ONIGERR_CONTROL_CODE_SYNTAX:
|
||||
p = "invalid control-code syntax"; break;
|
||||
case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
|
||||
p = "char-class value at end of range"; break;
|
||||
case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
|
||||
p = "char-class value at start of range"; break;
|
||||
case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
|
||||
p = "unmatched range specifier in char-class"; break;
|
||||
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
|
||||
p = "target of repeat operator is not specified"; break;
|
||||
case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
|
||||
p = "target of repeat operator is invalid"; break;
|
||||
case ONIGERR_NESTED_REPEAT_OPERATOR:
|
||||
p = "nested repeat operator"; break;
|
||||
case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
|
||||
p = "unmatched close parenthesis"; break;
|
||||
case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
|
||||
p = "end pattern with unmatched parenthesis"; break;
|
||||
case ONIGERR_END_PATTERN_IN_GROUP:
|
||||
p = "end pattern in group"; break;
|
||||
case ONIGERR_UNDEFINED_GROUP_OPTION:
|
||||
p = "undefined group option"; break;
|
||||
case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
|
||||
p = "invalid POSIX bracket type"; break;
|
||||
case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
|
||||
p = "invalid pattern in look-behind"; break;
|
||||
case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
|
||||
p = "invalid repeat range {lower,upper}"; break;
|
||||
case ONIGERR_INVALID_CONDITION_PATTERN:
|
||||
p = "invalid conditional pattern"; break;
|
||||
case ONIGERR_TOO_BIG_NUMBER:
|
||||
p = "too big number"; break;
|
||||
case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
|
||||
p = "too big number for repeat range"; break;
|
||||
case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
|
||||
p = "upper is smaller than lower in repeat range"; break;
|
||||
case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
|
||||
p = "empty range in char class"; break;
|
||||
case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
|
||||
p = "mismatch multibyte code length in char-class range"; break;
|
||||
case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
|
||||
p = "too many multibyte code ranges are specified"; break;
|
||||
case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
|
||||
p = "too short multibyte code string"; break;
|
||||
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
|
||||
p = "too big backref number"; break;
|
||||
case ONIGERR_INVALID_BACKREF:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid backref number/name"; break;
|
||||
#else
|
||||
p = "invalid backref number"; break;
|
||||
#endif
|
||||
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
|
||||
p = "numbered backref/call is not allowed. (use name)"; break;
|
||||
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||
p = "too big wide-char value"; break;
|
||||
case ONIGERR_TOO_SHORT_DIGITS:
|
||||
p = "too short digits"; break;
|
||||
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
|
||||
p = "too long wide-char value"; break;
|
||||
case ONIGERR_INVALID_CODE_POINT_VALUE:
|
||||
p = "invalid code point value"; break;
|
||||
case ONIGERR_EMPTY_GROUP_NAME:
|
||||
p = "group name is empty"; break;
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
p = "invalid group name <%n>"; break;
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid char in group name <%n>"; break;
|
||||
#else
|
||||
p = "invalid char in group number <%n>"; break;
|
||||
#endif
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
p = "undefined name <%n> reference"; break;
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
p = "undefined group <%n> reference"; break;
|
||||
case ONIGERR_MULTIPLEX_DEFINED_NAME:
|
||||
p = "multiplex defined name <%n>"; break;
|
||||
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
|
||||
p = "multiplex definition name <%n> call"; break;
|
||||
case ONIGERR_NEVER_ENDING_RECURSION:
|
||||
p = "never ending recursion"; break;
|
||||
case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
|
||||
p = "group number is too big for capture history"; break;
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
p = "invalid character property name {%n}"; break;
|
||||
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
|
||||
p = "not supported encoding combination"; break;
|
||||
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
|
||||
p = "invalid combination of options"; break;
|
||||
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
|
||||
p = "over thread pass limit count"; break;
|
||||
|
||||
default:
|
||||
p = "undefined error code"; break;
|
||||
}
|
||||
|
||||
return (UChar* )p;
|
||||
}
|
||||
|
||||
static void sprint_byte(char* s, unsigned int v)
|
||||
{
|
||||
sprintf(s, "%02x", (v & 0377));
|
||||
}
|
||||
|
||||
static void sprint_byte_with_x(char* s, unsigned int v)
|
||||
{
|
||||
sprintf(s, "\\x%02x", (v & 0377));
|
||||
}
|
||||
|
||||
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
||||
UChar buf[], int buf_size, int *is_over)
|
||||
{
|
||||
int len;
|
||||
UChar *p;
|
||||
OnigCodePoint code;
|
||||
|
||||
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
||||
p = s;
|
||||
len = 0;
|
||||
while (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code >= 0x80) {
|
||||
if (code > 0xffff && len + 10 <= buf_size) {
|
||||
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
|
||||
sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
|
||||
sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
|
||||
sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
|
||||
len += 10;
|
||||
}
|
||||
else if (len + 6 <= buf_size) {
|
||||
sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
|
||||
sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
|
||||
len += 6;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
buf[len++] = (UChar )code;
|
||||
}
|
||||
|
||||
p += enclen(enc, p);
|
||||
if (len >= buf_size) break;
|
||||
}
|
||||
|
||||
*is_over = ((p < end) ? 1 : 0);
|
||||
}
|
||||
else {
|
||||
len = (int )MIN((end - s), buf_size);
|
||||
xmemcpy(buf, s, (size_t )len);
|
||||
*is_over = ((buf_size < (end - s)) ? 1 : 0);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
|
||||
#define MAX_ERROR_PAR_LEN 30
|
||||
|
||||
extern int
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_error_code_to_str(UChar* s, OnigPosition code, ...)
|
||||
#else
|
||||
onig_error_code_to_str(s, code, va_alist)
|
||||
UChar* s;
|
||||
OnigPosition code;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
UChar *p, *q;
|
||||
OnigErrorInfo* einfo;
|
||||
size_t len;
|
||||
int is_over;
|
||||
UChar parbuf[MAX_ERROR_PAR_LEN];
|
||||
va_list vargs;
|
||||
|
||||
va_init_list(vargs, code);
|
||||
|
||||
switch (code) {
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
case ONIGERR_MULTIPLEX_DEFINED_NAME:
|
||||
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
einfo = va_arg(vargs, OnigErrorInfo*);
|
||||
len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
|
||||
parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
|
||||
q = onig_error_code_to_format(code);
|
||||
p = s;
|
||||
while (*q != '\0') {
|
||||
if (*q == '%') {
|
||||
q++;
|
||||
if (*q == 'n') { /* '%n': name */
|
||||
xmemcpy(p, parbuf, len);
|
||||
p += len;
|
||||
if (is_over != 0) {
|
||||
xmemcpy(p, "...", 3);
|
||||
p += 3;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
else
|
||||
goto normal_char;
|
||||
}
|
||||
else {
|
||||
normal_char:
|
||||
*p++ = *q++;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
len = p - s;
|
||||
break;
|
||||
|
||||
default:
|
||||
q = onig_error_code_to_format(code);
|
||||
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
|
||||
xmemcpy(s, q, len);
|
||||
s[len] = '\0';
|
||||
break;
|
||||
}
|
||||
|
||||
va_end(vargs);
|
||||
return (int )len;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
||||
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
|
||||
#else
|
||||
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
||||
UChar buf[];
|
||||
int bufsize;
|
||||
OnigEncoding enc;
|
||||
UChar* pat;
|
||||
UChar* pat_end;
|
||||
const UChar *fmt;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
size_t need;
|
||||
int n, len;
|
||||
UChar *p, *s, *bp;
|
||||
UChar bs[6];
|
||||
va_list args;
|
||||
|
||||
va_init_list(args, fmt);
|
||||
n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
|
||||
va_end(args);
|
||||
|
||||
need = (pat_end - pat) * 4 + 4;
|
||||
|
||||
if (n + need < (size_t )bufsize) {
|
||||
strcat((char* )buf, ": /");
|
||||
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
|
||||
|
||||
p = pat;
|
||||
while (p < pat_end) {
|
||||
if (*p == '\\') {
|
||||
*s++ = *p++;
|
||||
len = enclen(enc, p);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = (unsigned char )'\\';
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
|
||||
len = enclen(enc, p);
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) {
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else { /* for UTF16 */
|
||||
int blen;
|
||||
|
||||
while (len-- > 0) {
|
||||
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
|
||||
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
|
||||
bp = bs;
|
||||
while (blen-- > 0) *s++ = *bp++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
|
||||
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
|
||||
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
|
||||
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
|
||||
bp = bs;
|
||||
while (len-- > 0) *s++ = *bp++;
|
||||
}
|
||||
else {
|
||||
*s++ = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
*s++ = '/';
|
||||
*s = '\0';
|
||||
}
|
||||
}
|
4335
src/Onigmo/regexec.c
Normal file
4335
src/Onigmo/regexec.c
Normal file
File diff suppressed because it is too large
Load Diff
223
src/Onigmo/regext.c
Normal file
223
src/Onigmo/regext.c
Normal file
@ -0,0 +1,223 @@
|
||||
/**********************************************************************
|
||||
regext.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
static void
|
||||
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = *s++;
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = '\0';
|
||||
*conv++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = *s++;
|
||||
*conv++ = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = s[3];
|
||||
*conv++ = s[2];
|
||||
*conv++ = s[1];
|
||||
*conv++ = s[0];
|
||||
s += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = s[1];
|
||||
*conv++ = s[0];
|
||||
s += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
|
||||
UChar** conv, UChar** conv_end)
|
||||
{
|
||||
ptrdiff_t len = end - s;
|
||||
|
||||
if (to == ONIG_ENCODING_UTF16_BE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 2);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + (len * 2);
|
||||
conv_ext0be(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF16_LE) {
|
||||
swap16:
|
||||
*conv = (UChar* )xmalloc(len);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + len;
|
||||
conv_swap2bytes(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (to == ONIG_ENCODING_UTF16_LE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 2);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + (len * 2);
|
||||
conv_ext0le(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF16_BE) {
|
||||
goto swap16;
|
||||
}
|
||||
}
|
||||
if (to == ONIG_ENCODING_UTF32_BE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 4);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + (len * 4);
|
||||
conv_ext0be32(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF32_LE) {
|
||||
swap32:
|
||||
*conv = (UChar* )xmalloc(len);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + len;
|
||||
conv_swap4bytes(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (to == ONIG_ENCODING_UTF32_LE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 4);
|
||||
CHECK_NULL_RETURN_MEMERR(*conv);
|
||||
*conv_end = *conv + (len * 4);
|
||||
conv_ext0le32(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF32_BE) {
|
||||
goto swap32;
|
||||
}
|
||||
}
|
||||
|
||||
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
{
|
||||
int r;
|
||||
UChar *cpat, *cpat_end;
|
||||
|
||||
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
||||
|
||||
if (ci->pattern_enc != ci->target_enc) {
|
||||
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
|
||||
&cpat, &cpat_end);
|
||||
if (r) return r;
|
||||
}
|
||||
else {
|
||||
cpat = (UChar* )pattern;
|
||||
cpat_end = (UChar* )pattern_end;
|
||||
}
|
||||
|
||||
*reg = (regex_t* )xmalloc(sizeof(regex_t));
|
||||
if (IS_NULL(*reg)) {
|
||||
r = ONIGERR_MEMORY;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
|
||||
ci->syntax);
|
||||
if (r) goto err;
|
||||
|
||||
r = onig_compile(*reg, cpat, cpat_end, einfo);
|
||||
if (r) {
|
||||
err:
|
||||
onig_free(*reg);
|
||||
*reg = NULL;
|
||||
}
|
||||
|
||||
err2:
|
||||
if (cpat != pattern) xfree(cpat);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef USE_RECOMPILE_API
|
||||
extern int
|
||||
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
{
|
||||
int r;
|
||||
regex_t *new_reg;
|
||||
|
||||
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
|
||||
if (r) return r;
|
||||
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
onig_transfer(reg, new_reg);
|
||||
}
|
||||
else {
|
||||
onig_chain_link_add(reg, new_reg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
168
src/Onigmo/reggnu.c
Normal file
168
src/Onigmo/reggnu.c
Normal file
@ -0,0 +1,168 @@
|
||||
/**********************************************************************
|
||||
reggnu.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifndef ONIGGNU_H
|
||||
#include "oniggnu.h"
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_registers(OnigRegion* r)
|
||||
{
|
||||
/* 0: don't free self */
|
||||
onig_region_free(r, 0);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_adjust_startpos(regex_t* reg, const char* string, int size,
|
||||
int startpos, int range)
|
||||
{
|
||||
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
|
||||
UChar *p;
|
||||
UChar *s = (UChar* )string + startpos;
|
||||
|
||||
if (range > 0) {
|
||||
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
else {
|
||||
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
|
||||
}
|
||||
return (int )(p - (UChar* )string);
|
||||
}
|
||||
|
||||
return startpos;
|
||||
}
|
||||
|
||||
extern int
|
||||
re_match(regex_t* reg, const char* str, int size, int pos,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return (int )onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return (int )onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
|
||||
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef USE_RECOMPILE_API
|
||||
extern int
|
||||
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
OnigEncoding enc;
|
||||
|
||||
/* I think encoding and options should be arguments of this function.
|
||||
But this is adapted to present re.c. (2002/11/29)
|
||||
*/
|
||||
enc = OnigEncDefaultCharEncoding;
|
||||
|
||||
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
|
||||
reg->options, enc, OnigDefaultSyntax, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_pattern(regex_t* reg)
|
||||
{
|
||||
onig_free(reg);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_alloc_pattern(regex_t** reg)
|
||||
{
|
||||
*reg = (regex_t* )xmalloc(sizeof(regex_t));
|
||||
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
||||
|
||||
return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
|
||||
ONIGENC_CASE_FOLD_DEFAULT,
|
||||
OnigEncDefaultCharEncoding,
|
||||
OnigDefaultSyntax);
|
||||
}
|
||||
|
||||
extern void
|
||||
re_set_casetable(const char* table)
|
||||
{
|
||||
onigenc_set_default_caseconv_table((UChar* )table);
|
||||
}
|
||||
|
||||
extern void
|
||||
re_mbcinit(int mb_code)
|
||||
{
|
||||
OnigEncoding enc;
|
||||
|
||||
switch (mb_code) {
|
||||
case RE_MBCTYPE_ASCII:
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
break;
|
||||
case RE_MBCTYPE_EUC:
|
||||
enc = ONIG_ENCODING_EUC_JP;
|
||||
break;
|
||||
case RE_MBCTYPE_SJIS:
|
||||
enc = ONIG_ENCODING_SJIS;
|
||||
break;
|
||||
case RE_MBCTYPE_UTF8:
|
||||
enc = ONIG_ENCODING_UTF8;
|
||||
break;
|
||||
default:
|
||||
return ;
|
||||
break;
|
||||
}
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
}
|
868
src/Onigmo/regint.h
Normal file
868
src/Onigmo/regint.h
Normal file
@ -0,0 +1,868 @@
|
||||
#ifndef REGINT_H
|
||||
#define REGINT_H
|
||||
/**********************************************************************
|
||||
regint.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* for debug */
|
||||
/* #define ONIG_DEBUG_PARSE_TREE */
|
||||
/* #define ONIG_DEBUG_COMPILE */
|
||||
/* #define ONIG_DEBUG_SEARCH */
|
||||
/* #define ONIG_DEBUG_MATCH */
|
||||
/* #define ONIG_DONT_OPTIMIZE */
|
||||
|
||||
/* for byte-code statistical data. */
|
||||
/* #define ONIG_DEBUG_STATISTICS */
|
||||
|
||||
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
|
||||
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
|
||||
defined(ONIG_DEBUG_STATISTICS)
|
||||
#ifndef ONIG_DEBUG
|
||||
#define ONIG_DEBUG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
|
||||
defined(__mc68020__)
|
||||
#define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#endif
|
||||
|
||||
/* config */
|
||||
/* spec. config */
|
||||
#define USE_NAMED_GROUP
|
||||
#define USE_SUBEXP_CALL
|
||||
#define USE_PERL_SUBEXP_CALL
|
||||
#define USE_CAPITAL_P_NAMED_GROUP
|
||||
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
|
||||
#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
/* #define USE_RECOMPILE_API */
|
||||
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
#define USE_NO_INVALID_QUANTIFIER
|
||||
|
||||
/* internal config */
|
||||
#define USE_PARSE_TREE_NODE_RECYCLE
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define USE_QTFR_PEEK_NEXT
|
||||
#define USE_ST_LIBRARY
|
||||
#define USE_SHARED_CCLASS_TABLE
|
||||
#define USE_SUNDAY_QUICK_SEARCH
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
|
||||
|
||||
/* check config */
|
||||
#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
|
||||
#if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
|
||||
#error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* escape other system UChar definition */
|
||||
#include "config.h"
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#define USE_WORD_BEGIN_END /* "\<", "\>" */
|
||||
#define USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
|
||||
|
||||
/* #define USE_MULTI_THREAD_SYSTEM */
|
||||
#define THREAD_SYSTEM_INIT /* depend on thread system */
|
||||
#define THREAD_SYSTEM_END /* depend on thread system */
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xcalloc calloc
|
||||
#define xfree free
|
||||
|
||||
#define CHECK_INTERRUPT_IN_MATCH_AT
|
||||
|
||||
#define st_init_table onig_st_init_table
|
||||
#define st_init_table_with_size onig_st_init_table_with_size
|
||||
#define st_init_numtable onig_st_init_numtable
|
||||
#define st_init_numtable_with_size onig_st_init_numtable_with_size
|
||||
#define st_init_strtable onig_st_init_strtable
|
||||
#define st_init_strtable_with_size onig_st_init_strtable_with_size
|
||||
#define st_delete onig_st_delete
|
||||
#define st_delete_safe onig_st_delete_safe
|
||||
#define st_insert onig_st_insert
|
||||
#define st_lookup onig_st_lookup
|
||||
#define st_foreach onig_st_foreach
|
||||
#define st_add_direct onig_st_add_direct
|
||||
#define st_free_table onig_st_free_table
|
||||
#define st_cleanup_safe onig_st_cleanup_safe
|
||||
#define st_copy onig_st_copy
|
||||
#define st_nothing_key_clone onig_st_nothing_key_clone
|
||||
#define st_nothing_key_free onig_st_nothing_key_free
|
||||
/* */
|
||||
#define onig_st_is_member st_is_member
|
||||
|
||||
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
|
||||
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 8
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
#define xmemmove memmove
|
||||
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#define xalloca _alloca
|
||||
#define xvsnprintf _vsnprintf
|
||||
#else
|
||||
#define xalloca alloca
|
||||
#define xvsnprintf vsnprintf
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
|
||||
#define ONIG_STATE_INC(reg) (reg)->state++
|
||||
#define ONIG_STATE_DEC(reg) (reg)->state--
|
||||
|
||||
#define ONIG_STATE_INC_THREAD(reg) do {\
|
||||
THREAD_ATOMIC_START;\
|
||||
(reg)->state++;\
|
||||
THREAD_ATOMIC_END;\
|
||||
} while(0)
|
||||
#define ONIG_STATE_DEC_THREAD(reg) do {\
|
||||
THREAD_ATOMIC_START;\
|
||||
(reg)->state--;\
|
||||
THREAD_ATOMIC_END;\
|
||||
} while(0)
|
||||
#else
|
||||
#define ONIG_STATE_INC(reg) /* Nothing */
|
||||
#define ONIG_STATE_DEC(reg) /* Nothing */
|
||||
#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
|
||||
#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
|
||||
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#ifndef __BORLANDC__
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
#ifdef __BORLANDC__
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1300)
|
||||
#ifndef _INTPTR_T_DEFINED
|
||||
#define _INTPTR_T_DEFINED
|
||||
typedef int intptr_t;
|
||||
#endif
|
||||
#ifndef _UINTPTR_T_DEFINED
|
||||
#define _UINTPTR_T_DEFINED
|
||||
typedef unsigned int uintptr_t;
|
||||
#endif
|
||||
#endif
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
#endif
|
||||
#ifdef MAX
|
||||
#undef MAX
|
||||
#endif
|
||||
#define MIN(a,b) (((a)>(b))?(b):(a))
|
||||
#define MAX(a,b) (((a)<(b))?(b):(a))
|
||||
|
||||
#define IS_NULL(p) (((void*)(p)) == (void*)0)
|
||||
#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
|
||||
#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
|
||||
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
|
||||
#define NULL_UCHARP ((UChar* )0)
|
||||
|
||||
#define ONIG_LAST_CODE_POINT (~((OnigCodePoint )0))
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
val = *(type* )p;\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
xmemcpy(&val, (p), sizeof(type));\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
/* sizeof(OnigCodePoint) */
|
||||
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
|
||||
|
||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
(pad_size) = WORD_ALIGNMENT_SIZE \
|
||||
- ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
|
||||
} while (0)
|
||||
|
||||
#define ALIGNMENT_RIGHT(addr) do {\
|
||||
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
|
||||
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
} while (0)
|
||||
|
||||
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
|
||||
|
||||
/* stack pop level */
|
||||
#define STACK_POP_LEVEL_FREE 0
|
||||
#define STACK_POP_LEVEL_MEM_START 1
|
||||
#define STACK_POP_LEVEL_ALL 2
|
||||
|
||||
/* optimize flags */
|
||||
#define ONIG_OPTIMIZE_NONE 0
|
||||
#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (applied to a multibyte string) */
|
||||
#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
|
||||
#define ONIG_OPTIMIZE_MAP 5 /* char map */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM_IC 6 /* BM (ignore case) */
|
||||
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC 7 /* BM (applied to a multibyte string) (ignore case) */
|
||||
|
||||
/* bit status */
|
||||
typedef unsigned int BitStatusType;
|
||||
|
||||
#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
|
||||
#define BIT_STATUS_CLEAR(stats) (stats) = 0
|
||||
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
|
||||
#define BIT_STATUS_AT(stats,n) \
|
||||
((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
|
||||
|
||||
#define BIT_STATUS_ON_AT(stats,n) do {\
|
||||
if ((n) < (int )BIT_STATUS_BITS_NUM) \
|
||||
(stats) |= (1 << (n));\
|
||||
else\
|
||||
(stats) |= 1;\
|
||||
} while (0)
|
||||
|
||||
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
|
||||
if ((n) < (int )BIT_STATUS_BITS_NUM)\
|
||||
(stats) |= (1 << (n));\
|
||||
} while (0)
|
||||
|
||||
|
||||
#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
|
||||
|
||||
#define DIGITVAL(code) ((code) - '0')
|
||||
#define ODIGITVAL(code) DIGITVAL(code)
|
||||
#define XDIGITVAL(enc,code) \
|
||||
(ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
|
||||
: (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
|
||||
|
||||
#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
|
||||
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
|
||||
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
|
||||
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
|
||||
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
|
||||
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
|
||||
#define IS_FIND_CONDITION(option) ((option) & \
|
||||
(ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
|
||||
#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
|
||||
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
|
||||
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
|
||||
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
|
||||
#define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF)
|
||||
|
||||
/* OP_SET_OPTION is required for these options.
|
||||
#define IS_DYNAMIC_OPTION(option) \
|
||||
(((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
|
||||
*/
|
||||
/* ignore-case and multibyte status are included in compiled code. */
|
||||
#define IS_DYNAMIC_OPTION(option) 0
|
||||
|
||||
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
|
||||
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
|
||||
|
||||
#define REPEAT_INFINITE -1
|
||||
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
|
||||
|
||||
/* bitset */
|
||||
#define BITS_PER_BYTE 8
|
||||
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
|
||||
#define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE)
|
||||
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
typedef unsigned int Bits;
|
||||
#else
|
||||
typedef unsigned char Bits;
|
||||
#endif
|
||||
typedef Bits BitSet[BITSET_SIZE];
|
||||
typedef Bits* BitSetRef;
|
||||
|
||||
#define SIZE_BITSET (int )sizeof(BitSet)
|
||||
|
||||
#define BITSET_CLEAR(bs) do {\
|
||||
int i;\
|
||||
for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \
|
||||
} while (0)
|
||||
|
||||
#define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM]
|
||||
#define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM))
|
||||
|
||||
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
|
||||
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
|
||||
#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
|
||||
#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
|
||||
|
||||
/* bytes buffer */
|
||||
typedef struct _BBuf {
|
||||
UChar* p;
|
||||
unsigned int used;
|
||||
unsigned int alloc;
|
||||
} BBuf;
|
||||
|
||||
#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
|
||||
|
||||
#define BBUF_SIZE_INC(buf,inc) do{\
|
||||
(buf)->alloc += (inc);\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_EXPAND(buf,low) do{\
|
||||
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_ENSURE_SIZE(buf,size) do{\
|
||||
unsigned int new_alloc = (buf)->alloc;\
|
||||
while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
|
||||
if ((buf)->alloc != new_alloc) {\
|
||||
(buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
|
||||
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
|
||||
(buf)->alloc = new_alloc;\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_WRITE(buf,pos,bytes,n) do{\
|
||||
int used = (pos) + (int )(n);\
|
||||
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
|
||||
xmemcpy((buf)->p + (pos), (bytes), (n));\
|
||||
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_WRITE1(buf,pos,byte) do{\
|
||||
int used = (pos) + 1;\
|
||||
if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
|
||||
(buf)->p[(pos)] = (UChar )(byte);\
|
||||
if ((buf)->used < (unsigned int )used) (buf)->used = used;\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
|
||||
#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
|
||||
#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
|
||||
#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
|
||||
|
||||
/* from < to */
|
||||
#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
|
||||
if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
|
||||
if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
|
||||
} while (0)
|
||||
|
||||
/* from > to */
|
||||
#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
|
||||
} while (0)
|
||||
|
||||
/* from > to */
|
||||
#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
|
||||
xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
|
||||
(buf)->used -= (from - to);\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_INSERT(buf,pos,bytes,n) do {\
|
||||
if (pos >= (buf)->used) {\
|
||||
BBUF_WRITE(buf,pos,bytes,n);\
|
||||
}\
|
||||
else {\
|
||||
BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
|
||||
xmemcpy((buf)->p + (pos), (bytes), (n));\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
|
||||
|
||||
|
||||
#define ANCHOR_BEGIN_BUF (1<<0)
|
||||
#define ANCHOR_BEGIN_LINE (1<<1)
|
||||
#define ANCHOR_BEGIN_POSITION (1<<2)
|
||||
#define ANCHOR_END_BUF (1<<3)
|
||||
#define ANCHOR_SEMI_END_BUF (1<<4)
|
||||
#define ANCHOR_END_LINE (1<<5)
|
||||
|
||||
#define ANCHOR_WORD_BOUND (1<<6)
|
||||
#define ANCHOR_NOT_WORD_BOUND (1<<7)
|
||||
#define ANCHOR_WORD_BEGIN (1<<8)
|
||||
#define ANCHOR_WORD_END (1<<9)
|
||||
#define ANCHOR_PREC_READ (1<<10)
|
||||
#define ANCHOR_PREC_READ_NOT (1<<11)
|
||||
#define ANCHOR_LOOK_BEHIND (1<<12)
|
||||
#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
|
||||
#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
|
||||
|
||||
#define ANCHOR_KEEP (1<<16)
|
||||
|
||||
/* operation code */
|
||||
enum OpCode {
|
||||
OP_FINISH = 0, /* matching process terminator (no more alternative) */
|
||||
OP_END = 1, /* pattern code terminator (success end) */
|
||||
|
||||
OP_EXACT1 = 2, /* single byte, N = 1 */
|
||||
OP_EXACT2, /* single byte, N = 2 */
|
||||
OP_EXACT3, /* single byte, N = 3 */
|
||||
OP_EXACT4, /* single byte, N = 4 */
|
||||
OP_EXACT5, /* single byte, N = 5 */
|
||||
OP_EXACTN, /* single byte */
|
||||
OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
|
||||
OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
|
||||
OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
|
||||
OP_EXACTMB2N, /* mb-length = 2 */
|
||||
OP_EXACTMB3N, /* mb-length = 3 */
|
||||
OP_EXACTMBN, /* other length */
|
||||
|
||||
OP_EXACT1_IC, /* single byte, N = 1, ignore case */
|
||||
OP_EXACTN_IC, /* single byte, ignore case */
|
||||
|
||||
OP_CCLASS,
|
||||
OP_CCLASS_MB,
|
||||
OP_CCLASS_MIX,
|
||||
OP_CCLASS_NOT,
|
||||
OP_CCLASS_MB_NOT,
|
||||
OP_CCLASS_MIX_NOT,
|
||||
OP_CCLASS_NODE, /* pointer to CClassNode node */
|
||||
|
||||
OP_ANYCHAR, /* "." */
|
||||
OP_ANYCHAR_ML, /* "." multi-line */
|
||||
OP_ANYCHAR_STAR, /* ".*" */
|
||||
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
|
||||
OP_ANYCHAR_STAR_PEEK_NEXT,
|
||||
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
|
||||
|
||||
OP_WORD,
|
||||
OP_NOT_WORD,
|
||||
OP_WORD_BOUND,
|
||||
OP_NOT_WORD_BOUND,
|
||||
OP_WORD_BEGIN,
|
||||
OP_WORD_END,
|
||||
|
||||
OP_ASCII_WORD,
|
||||
OP_NOT_ASCII_WORD,
|
||||
OP_ASCII_WORD_BOUND,
|
||||
OP_NOT_ASCII_WORD_BOUND,
|
||||
OP_ASCII_WORD_BEGIN,
|
||||
OP_ASCII_WORD_END,
|
||||
|
||||
OP_BEGIN_BUF,
|
||||
OP_END_BUF,
|
||||
OP_BEGIN_LINE,
|
||||
OP_END_LINE,
|
||||
OP_SEMI_END_BUF,
|
||||
OP_BEGIN_POSITION,
|
||||
OP_BEGIN_POS_OR_LINE, /* used for implicit anchor optimization */
|
||||
|
||||
OP_BACKREF1,
|
||||
OP_BACKREF2,
|
||||
OP_BACKREFN,
|
||||
OP_BACKREFN_IC,
|
||||
OP_BACKREF_MULTI,
|
||||
OP_BACKREF_MULTI_IC,
|
||||
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
|
||||
|
||||
OP_MEMORY_START,
|
||||
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
|
||||
OP_MEMORY_END_PUSH, /* push back-tracker to stack */
|
||||
OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
|
||||
OP_MEMORY_END,
|
||||
OP_MEMORY_END_REC, /* push marker to stack */
|
||||
|
||||
OP_KEEP,
|
||||
|
||||
OP_FAIL, /* pop stack and move */
|
||||
OP_JUMP,
|
||||
OP_PUSH,
|
||||
OP_POP,
|
||||
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
|
||||
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
|
||||
OP_REPEAT, /* {n,m} */
|
||||
OP_REPEAT_NG, /* {n,m}? (non greedy) */
|
||||
OP_REPEAT_INC,
|
||||
OP_REPEAT_INC_NG, /* non greedy */
|
||||
OP_REPEAT_INC_SG, /* search and get in stack */
|
||||
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
|
||||
OP_NULL_CHECK_START, /* null loop checker start */
|
||||
OP_NULL_CHECK_END, /* null loop checker end */
|
||||
OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
|
||||
OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
|
||||
|
||||
OP_PUSH_POS, /* (?=...) start */
|
||||
OP_POP_POS, /* (?=...) end */
|
||||
OP_PUSH_POS_NOT, /* (?!...) start */
|
||||
OP_FAIL_POS, /* (?!...) end */
|
||||
OP_PUSH_STOP_BT, /* (?>...) start */
|
||||
OP_POP_STOP_BT, /* (?>...) end */
|
||||
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
|
||||
OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
|
||||
OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
|
||||
|
||||
OP_CALL, /* \g<name> */
|
||||
OP_RETURN,
|
||||
|
||||
OP_CONDITION,
|
||||
|
||||
OP_STATE_CHECK_PUSH, /* combination explosion check and push */
|
||||
OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
|
||||
OP_STATE_CHECK, /* check only */
|
||||
OP_STATE_CHECK_ANYCHAR_STAR,
|
||||
OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
||||
|
||||
/* no need: IS_DYNAMIC_OPTION() == 0 */
|
||||
OP_SET_OPTION_PUSH, /* set option and push recover option */
|
||||
OP_SET_OPTION /* set option */
|
||||
};
|
||||
|
||||
typedef int RelAddrType;
|
||||
typedef int AbsAddrType;
|
||||
typedef int LengthType;
|
||||
typedef int RepeatNumType;
|
||||
typedef short int MemNumType;
|
||||
typedef short int StateCheckNumType;
|
||||
typedef void* PointerType;
|
||||
|
||||
#define SIZE_OPCODE 1
|
||||
#define SIZE_RELADDR (int )sizeof(RelAddrType)
|
||||
#define SIZE_ABSADDR (int )sizeof(AbsAddrType)
|
||||
#define SIZE_LENGTH (int )sizeof(LengthType)
|
||||
#define SIZE_MEMNUM (int )sizeof(MemNumType)
|
||||
#define SIZE_STATE_CHECK_NUM (int )sizeof(StateCheckNumType)
|
||||
#define SIZE_REPEATNUM (int )sizeof(RepeatNumType)
|
||||
#define SIZE_OPTION (int )sizeof(OnigOptionType)
|
||||
#define SIZE_CODE_POINT (int )sizeof(OnigCodePoint)
|
||||
#define SIZE_POINTER (int )sizeof(PointerType)
|
||||
|
||||
|
||||
#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
|
||||
#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
|
||||
#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
|
||||
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
|
||||
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
|
||||
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
|
||||
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
|
||||
#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
|
||||
|
||||
/* code point's address must be aligned address. */
|
||||
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
|
||||
#define GET_BYTE_INC(byte,p) do{\
|
||||
byte = *(p);\
|
||||
(p)++;\
|
||||
} while(0)
|
||||
|
||||
|
||||
/* op-code + arg size */
|
||||
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
|
||||
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
|
||||
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_POP SIZE_OPCODE
|
||||
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_PUSH_POS SIZE_OPCODE
|
||||
#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_POP_POS SIZE_OPCODE
|
||||
#define SIZE_OP_FAIL_POS SIZE_OPCODE
|
||||
#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
|
||||
#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
|
||||
#define SIZE_OP_FAIL SIZE_OPCODE
|
||||
#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
|
||||
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
|
||||
#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
|
||||
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
|
||||
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
|
||||
#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
|
||||
#define SIZE_OP_RETURN SIZE_OPCODE
|
||||
#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#endif
|
||||
|
||||
#define MC_ESC(syn) (syn)->meta_char_table.esc
|
||||
#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
|
||||
#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
|
||||
#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
|
||||
#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
|
||||
#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
|
||||
|
||||
#define IS_MC_ESC_CODE(code, syn) \
|
||||
((code) == MC_ESC(syn) && \
|
||||
!IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
|
||||
|
||||
|
||||
#define SYN_POSIX_COMMON_OP \
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
|
||||
ONIG_SYN_OP_DECIMAL_BACKREF | \
|
||||
ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
|
||||
ONIG_SYN_OP_LINE_ANCHOR | \
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS )
|
||||
|
||||
#define SYN_GNU_REGEX_OP \
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
|
||||
ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
|
||||
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
|
||||
ONIG_SYN_OP_VBAR_ALT | \
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
|
||||
ONIG_SYN_OP_QMARK_ZERO_ONE | \
|
||||
ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
|
||||
ONIG_SYN_OP_ESC_W_WORD | \
|
||||
ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
|
||||
ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
|
||||
ONIG_SYN_OP_LINE_ANCHOR )
|
||||
|
||||
#define SYN_GNU_REGEX_BV \
|
||||
( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
|
||||
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
|
||||
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
|
||||
|
||||
#define NCCLASS_FLAGS(cc) ((cc)->flags)
|
||||
#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
|
||||
#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
|
||||
#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
|
||||
|
||||
/* cclass node */
|
||||
#define FLAG_NCCLASS_NOT (1<<0)
|
||||
#define FLAG_NCCLASS_SHARE (1<<1)
|
||||
|
||||
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
|
||||
#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
|
||||
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
|
||||
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
|
||||
#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
/* struct _Node* next; */
|
||||
/* unsigned int flags; */
|
||||
} NodeBase;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
unsigned int flags;
|
||||
BitSet bs;
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
|
||||
typedef intptr_t OnigStackIndex;
|
||||
|
||||
typedef struct _OnigStackType {
|
||||
unsigned int type;
|
||||
union {
|
||||
struct {
|
||||
UChar *pcode; /* byte code position */
|
||||
UChar *pstr; /* string position */
|
||||
UChar *pstr_prev; /* previous char position of pstr */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
unsigned int state_check;
|
||||
#endif
|
||||
UChar *pkeep; /* keep pattern position */
|
||||
} state;
|
||||
struct {
|
||||
int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
|
||||
UChar *pcode; /* byte code position (head of repeated target) */
|
||||
int num; /* repeat id */
|
||||
} repeat;
|
||||
struct {
|
||||
OnigStackIndex si; /* index of stack */
|
||||
} repeat_inc;
|
||||
struct {
|
||||
int num; /* memory num */
|
||||
UChar *pstr; /* start/end position */
|
||||
/* Following information is set, if this stack type is MEM-START */
|
||||
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
|
||||
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
|
||||
} mem;
|
||||
struct {
|
||||
int num; /* null check id */
|
||||
UChar *pstr; /* start position */
|
||||
} null_check;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
struct {
|
||||
UChar *ret_addr; /* byte code position */
|
||||
int num; /* null check id */
|
||||
UChar *pstr; /* string position */
|
||||
} call_frame;
|
||||
#endif
|
||||
} u;
|
||||
} OnigStackType;
|
||||
|
||||
typedef struct {
|
||||
void* stack_p;
|
||||
size_t stack_n;
|
||||
OnigOptionType options;
|
||||
OnigRegion* region;
|
||||
const UChar* start; /* search start position */
|
||||
const UChar* gpos; /* global position (for \G: BEGIN_POSITION) */
|
||||
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
OnigPosition best_len; /* for ONIG_OPTION_FIND_LONGEST */
|
||||
UChar* best_s;
|
||||
#endif
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
void* state_check_buff;
|
||||
int state_check_buff_size;
|
||||
#endif
|
||||
} OnigMatchArg;
|
||||
|
||||
|
||||
#define IS_CODE_SB_WORD(enc,code) \
|
||||
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
typedef struct {
|
||||
short int opcode;
|
||||
const char* name;
|
||||
short int arg_type;
|
||||
} OnigOpInfoType;
|
||||
|
||||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
extern void onig_print_statistics P_((FILE* f));
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern UChar* onig_error_code_to_format P_((OnigPosition code));
|
||||
extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size));
|
||||
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
|
||||
extern void onig_chain_reduce P_((regex_t* reg));
|
||||
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
|
||||
extern void onig_transfer P_((regex_t* to, regex_t* from));
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
|
||||
|
||||
/* strend hash */
|
||||
typedef void hash_table_type;
|
||||
typedef uintptr_t hash_data_type;
|
||||
|
||||
extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
|
||||
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
|
||||
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
|
||||
|
||||
/* encoding property management */
|
||||
#define PROPERTY_LIST_ADD_PROP(Name, CR) \
|
||||
r = onigenc_property_list_add_property((UChar* )Name, CR,\
|
||||
&PropertyNameTable, &PropertyList, &PropertyListNum,\
|
||||
&PropertyListSize);\
|
||||
if (r != 0) goto end
|
||||
|
||||
#define PROPERTY_LIST_INIT_CHECK \
|
||||
if (PropertyInited == 0) {\
|
||||
int r = onigenc_property_list_init(init_property_list);\
|
||||
if (r != 0) return r;\
|
||||
}
|
||||
|
||||
extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
|
||||
|
||||
typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
|
||||
|
||||
extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
|
||||
|
||||
extern size_t onig_memsize P_((const regex_t *reg));
|
||||
extern size_t onig_region_memsize P_((const struct re_registers *regs));
|
||||
|
||||
#endif /* REGINT_H */
|
6243
src/Onigmo/regparse.c
Normal file
6243
src/Onigmo/regparse.c
Normal file
File diff suppressed because it is too large
Load Diff
356
src/Onigmo/regparse.h
Normal file
356
src/Onigmo/regparse.h
Normal file
@ -0,0 +1,356 @@
|
||||
#ifndef REGPARSE_H
|
||||
#define REGPARSE_H
|
||||
/**********************************************************************
|
||||
regparse.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
/* node type */
|
||||
#define NT_STR 0
|
||||
#define NT_CCLASS 1
|
||||
#define NT_CTYPE 2
|
||||
#define NT_CANY 3
|
||||
#define NT_BREF 4
|
||||
#define NT_QTFR 5
|
||||
#define NT_ENCLOSE 6
|
||||
#define NT_ANCHOR 7
|
||||
#define NT_LIST 8
|
||||
#define NT_ALT 9
|
||||
#define NT_CALL 10
|
||||
|
||||
/* node type bit */
|
||||
#define NTYPE2BIT(type) (1<<(type))
|
||||
|
||||
#define BIT_NT_STR NTYPE2BIT(NT_STR)
|
||||
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
|
||||
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
|
||||
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
|
||||
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
|
||||
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
|
||||
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
|
||||
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
|
||||
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
|
||||
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
|
||||
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
|
||||
|
||||
#define IS_NODE_TYPE_SIMPLE(type) \
|
||||
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
|
||||
BIT_NT_CANY | BIT_NT_BREF)) != 0)
|
||||
|
||||
#define NTYPE(node) ((node)->u.base.type)
|
||||
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
|
||||
|
||||
#define NSTR(node) (&((node)->u.str))
|
||||
#define NCCLASS(node) (&((node)->u.cclass))
|
||||
#define NCTYPE(node) (&((node)->u.ctype))
|
||||
#define NBREF(node) (&((node)->u.bref))
|
||||
#define NQTFR(node) (&((node)->u.qtfr))
|
||||
#define NENCLOSE(node) (&((node)->u.enclose))
|
||||
#define NANCHOR(node) (&((node)->u.anchor))
|
||||
#define NCONS(node) (&((node)->u.cons))
|
||||
#define NCALL(node) (&((node)->u.call))
|
||||
|
||||
#define NCAR(node) (NCONS(node)->car)
|
||||
#define NCDR(node) (NCONS(node)->cdr)
|
||||
|
||||
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
|
||||
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
|
||||
|
||||
#define ENCLOSE_MEMORY (1<<0)
|
||||
#define ENCLOSE_OPTION (1<<1)
|
||||
#define ENCLOSE_STOP_BACKTRACK (1<<2)
|
||||
#define ENCLOSE_CONDITION (1<<3)
|
||||
|
||||
#define NODE_STR_MARGIN 16
|
||||
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
#define NSTR_RAW (1<<0) /* by backslashed number */
|
||||
#define NSTR_AMBIG (1<<1)
|
||||
#define NSTR_DONT_GET_OPT_INFO (1<<2)
|
||||
|
||||
#define NSTRING_LEN(node) (OnigDistance )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
|
||||
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
|
||||
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
|
||||
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
|
||||
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
|
||||
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
|
||||
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
|
||||
|
||||
#define NQ_TARGET_ISNOT_EMPTY 0
|
||||
#define NQ_TARGET_IS_EMPTY 1
|
||||
#define NQ_TARGET_IS_EMPTY_MEM 2
|
||||
#define NQ_TARGET_IS_EMPTY_REC 3
|
||||
|
||||
/* status bits */
|
||||
#define NST_MIN_FIXED (1<<0)
|
||||
#define NST_MAX_FIXED (1<<1)
|
||||
#define NST_CLEN_FIXED (1<<2)
|
||||
#define NST_MARK1 (1<<3)
|
||||
#define NST_MARK2 (1<<4)
|
||||
#define NST_MEM_BACKREFED (1<<5)
|
||||
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
|
||||
#define NST_RECURSION (1<<7)
|
||||
#define NST_CALLED (1<<8)
|
||||
#define NST_ADDR_FIXED (1<<9)
|
||||
#define NST_NAMED_GROUP (1<<10)
|
||||
#define NST_NAME_REF (1<<11)
|
||||
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
|
||||
#define NST_NEST_LEVEL (1<<13)
|
||||
#define NST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
|
||||
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
|
||||
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
|
||||
|
||||
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
|
||||
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
|
||||
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
|
||||
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
|
||||
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
|
||||
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
|
||||
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
|
||||
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
|
||||
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
|
||||
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
|
||||
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
|
||||
#define IS_ENCLOSE_NAME_REF(en) (((en)->state & NST_NAME_REF) != 0)
|
||||
|
||||
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
|
||||
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
|
||||
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
|
||||
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
|
||||
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
|
||||
|
||||
#define CALLNODE_REFNUM_UNDEF -1
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
UChar* s;
|
||||
UChar* end;
|
||||
unsigned int flag;
|
||||
int capa; /* (allocated size - 1) or 0: use buf[] */
|
||||
UChar buf[NODE_STR_BUF_SIZE];
|
||||
} StrNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
struct _Node* target;
|
||||
int lower;
|
||||
int upper;
|
||||
int greedy;
|
||||
int target_empty_info;
|
||||
struct _Node* head_exact;
|
||||
struct _Node* next_head_exact;
|
||||
int is_refered; /* include called node. don't eliminate even if {0} */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
|
||||
#endif
|
||||
} QtfrNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
int type;
|
||||
int regnum;
|
||||
OnigOptionType option;
|
||||
struct _Node* target;
|
||||
AbsAddrType call_addr;
|
||||
/* for multiple call reference */
|
||||
OnigDistance min_len; /* min length (byte) */
|
||||
OnigDistance max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
int opt_count; /* referenced count in optimize_node_left() */
|
||||
} EncloseNode;
|
||||
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
|
||||
typedef struct {
|
||||
int offset;
|
||||
struct _Node* target;
|
||||
} UnsetAddr;
|
||||
|
||||
typedef struct {
|
||||
int num;
|
||||
int alloc;
|
||||
UnsetAddr* us;
|
||||
} UnsetAddrList;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
int group_num;
|
||||
UChar* name;
|
||||
UChar* name_end;
|
||||
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
|
||||
UnsetAddrList* unset_addr_list;
|
||||
} CallNode;
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
int back_num;
|
||||
int back_static[NODE_BACKREFS_SIZE];
|
||||
int* back_dynamic;
|
||||
int nest_level;
|
||||
} BRefNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int type;
|
||||
struct _Node* target;
|
||||
int char_len;
|
||||
int ascii_range;
|
||||
} AnchorNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
struct _Node* car;
|
||||
struct _Node* cdr;
|
||||
} ConsAltNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int ctype;
|
||||
int not;
|
||||
int ascii_range;
|
||||
} CtypeNode;
|
||||
|
||||
typedef struct _Node {
|
||||
union {
|
||||
NodeBase base;
|
||||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QtfrNode qtfr;
|
||||
EncloseNode enclose;
|
||||
BRefNode bref;
|
||||
AnchorNode anchor;
|
||||
ConsAltNode cons;
|
||||
CtypeNode ctype;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
CallNode call;
|
||||
#endif
|
||||
} u;
|
||||
} Node;
|
||||
|
||||
|
||||
#define NULL_NODE ((Node* )0)
|
||||
|
||||
#define SCANENV_MEMNODES_SIZE 8
|
||||
#define SCANENV_MEM_NODES(senv) \
|
||||
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
|
||||
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
|
||||
|
||||
typedef struct {
|
||||
OnigOptionType option;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
OnigEncoding enc;
|
||||
OnigSyntaxType* syntax;
|
||||
BitStatusType capture_history;
|
||||
BitStatusType bt_mem_start;
|
||||
BitStatusType bt_mem_end;
|
||||
BitStatusType backrefed_mem;
|
||||
UChar* pattern;
|
||||
UChar* pattern_end;
|
||||
UChar* error;
|
||||
UChar* error_end;
|
||||
regex_t* reg; /* for reg->names only */
|
||||
int num_call;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
UnsetAddrList* unset_addr_list;
|
||||
#endif
|
||||
int num_mem;
|
||||
#ifdef USE_NAMED_GROUP
|
||||
int num_named;
|
||||
#endif
|
||||
int mem_alloc;
|
||||
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
|
||||
Node** mem_nodes_dynamic;
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int num_comb_exp_check;
|
||||
int comb_exp_max_regnum;
|
||||
int curr_max_regnum;
|
||||
int has_recursion;
|
||||
#endif
|
||||
} ScanEnv;
|
||||
|
||||
|
||||
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
|
||||
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
|
||||
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
|
||||
|
||||
#ifdef USE_NAMED_GROUP
|
||||
typedef struct {
|
||||
int new_val;
|
||||
} GroupNumRemap;
|
||||
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
#endif
|
||||
|
||||
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
|
||||
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
||||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_enclose P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern Node* onig_node_list_add P_((Node* list, Node* x));
|
||||
extern Node* onig_node_new_alt P_((Node* left, Node* right));
|
||||
extern void onig_node_str_clear P_((Node* node));
|
||||
extern int onig_free_node_list P_((void));
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_free_shared_cclass_table P_((void));
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
#ifdef USE_NAMED_GROUP
|
||||
extern int onig_print_names(FILE*, regex_t*);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* REGPARSE_H */
|
98
src/Onigmo/regposerr.c
Normal file
98
src/Onigmo/regposerr.c
Normal file
@ -0,0 +1,98 @@
|
||||
/**********************************************************************
|
||||
regposerr.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "onigposix.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
|
||||
static char* ESTRING[] = {
|
||||
NULL,
|
||||
"failed to match", /* REG_NOMATCH */
|
||||
"Invalid regular expression", /* REG_BADPAT */
|
||||
"invalid collating element referenced", /* REG_ECOLLATE */
|
||||
"invalid character class type referenced", /* REG_ECTYPE */
|
||||
"bad backslash-escape sequence", /* REG_EESCAPE */
|
||||
"invalid back reference number", /* REG_ESUBREG */
|
||||
"imbalanced [ and ]", /* REG_EBRACK */
|
||||
"imbalanced ( and )", /* REG_EPAREN */
|
||||
"imbalanced { and }", /* REG_EBRACE */
|
||||
"invalid repeat range {n,m}", /* REG_BADBR */
|
||||
"invalid range", /* REG_ERANGE */
|
||||
"Out of memory", /* REG_ESPACE */
|
||||
"? * + not preceded by valid regular expression", /* REG_BADRPT */
|
||||
|
||||
/* Extended errors */
|
||||
"internal error", /* REG_EONIG_INTERNAL */
|
||||
"invalid wide char value", /* REG_EONIG_BADWC */
|
||||
"invalid argument", /* REG_EONIG_BADARG */
|
||||
"multi-thread error" /* REG_EONIG_THREAD */
|
||||
};
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern size_t
|
||||
regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
|
||||
size_t size)
|
||||
{
|
||||
char* s;
|
||||
char tbuf[35];
|
||||
size_t len;
|
||||
|
||||
if (posix_ecode > 0
|
||||
&& posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) {
|
||||
s = ESTRING[posix_ecode];
|
||||
}
|
||||
else if (posix_ecode == 0) {
|
||||
s = "";
|
||||
}
|
||||
else {
|
||||
sprintf(tbuf, "undefined error code (%d)", posix_ecode);
|
||||
s = tbuf;
|
||||
}
|
||||
|
||||
len = strlen(s) + 1; /* use strlen() because s is ascii encoding. */
|
||||
|
||||
if (buf != NULL && size > 0) {
|
||||
strncpy(buf, s, size - 1);
|
||||
buf[size - 1] = '\0';
|
||||
}
|
||||
return len;
|
||||
}
|
304
src/Onigmo/regposix.c
Normal file
304
src/Onigmo/regposix.c
Normal file
@ -0,0 +1,304 @@
|
||||
/**********************************************************************
|
||||
regposix.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define regex_t onig_regex_t
|
||||
#include "regint.h"
|
||||
#undef regex_t
|
||||
#include "onigposix.h"
|
||||
|
||||
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
|
||||
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
|
||||
|
||||
/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
|
||||
#define ENC_STRING_LEN(enc,s,len) do { \
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) { \
|
||||
UChar* tmps = (UChar* )(s); \
|
||||
while (*tmps != 0) tmps++; \
|
||||
len = (int )(tmps - (UChar* )(s)); \
|
||||
} \
|
||||
else { \
|
||||
len = onigenc_str_bytelen_null(enc, (UChar* )s); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
typedef struct {
|
||||
int onig_err;
|
||||
int posix_err;
|
||||
} O2PERR;
|
||||
|
||||
static int
|
||||
onig2posix_error_code(int code)
|
||||
{
|
||||
static const O2PERR o2p[] = {
|
||||
{ ONIG_MISMATCH, REG_NOMATCH },
|
||||
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_MEMORY, REG_ESPACE },
|
||||
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_DEFAULT_ENCODING_IS_NOT_SET, REG_EONIG_BADARG },
|
||||
{ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
|
||||
{ ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
|
||||
{ ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
|
||||
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
|
||||
{ ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
|
||||
{ ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
|
||||
{ ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
|
||||
{ ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
|
||||
{ ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
|
||||
{ ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
|
||||
{ ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
|
||||
{ ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
|
||||
{ ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
|
||||
{ ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
|
||||
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
|
||||
{ ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
|
||||
{ ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
|
||||
{ ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
|
||||
{ ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
|
||||
{ ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
|
||||
{ ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
|
||||
{ ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
|
||||
{ ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
|
||||
{ ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
|
||||
{ ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
|
||||
{ ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
|
||||
{ ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
|
||||
{ ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
|
||||
{ ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
|
||||
{ ONIGERR_INVALID_BACKREF, REG_ESUBREG },
|
||||
{ ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
|
||||
{ ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
||||
{ ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
||||
{ ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
|
||||
{ ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
|
||||
{ ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
|
||||
{ ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
|
||||
{ ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
|
||||
{ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
|
||||
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
|
||||
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
|
||||
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
|
||||
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
|
||||
|
||||
};
|
||||
|
||||
int i;
|
||||
|
||||
if (code >= 0) return 0;
|
||||
|
||||
for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
|
||||
if (code == o2p[i].onig_err)
|
||||
return o2p[i].posix_err;
|
||||
}
|
||||
|
||||
return REG_EONIG_INTERNAL; /* but, unknown error code */
|
||||
}
|
||||
|
||||
extern int
|
||||
regcomp(regex_t* reg, const char* pattern, int posix_options)
|
||||
{
|
||||
int r, len;
|
||||
OnigSyntaxType* syntax = OnigDefaultSyntax;
|
||||
OnigOptionType options;
|
||||
|
||||
if ((posix_options & REG_EXTENDED) == 0)
|
||||
syntax = ONIG_SYNTAX_POSIX_BASIC;
|
||||
|
||||
options = syntax->options;
|
||||
if ((posix_options & REG_ICASE) != 0)
|
||||
ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
|
||||
if ((posix_options & REG_NEWLINE) != 0) {
|
||||
ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
|
||||
ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
|
||||
}
|
||||
|
||||
reg->comp_options = posix_options;
|
||||
|
||||
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
|
||||
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
|
||||
options, OnigEncDefaultCharEncoding, syntax,
|
||||
(OnigErrorInfo* )NULL);
|
||||
if (r != ONIG_NORMAL) {
|
||||
return onig2posix_error_code(r);
|
||||
}
|
||||
|
||||
reg->re_nsub = ONIG_C(reg)->num_mem;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
regmatch_t pmatch[], int posix_options)
|
||||
{
|
||||
int r, i, len;
|
||||
UChar* end;
|
||||
regmatch_t* pm;
|
||||
OnigOptionType options;
|
||||
|
||||
options = ONIG_OPTION_POSIX_REGION;
|
||||
if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
|
||||
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
|
||||
|
||||
if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
|
||||
pm = (regmatch_t* )NULL;
|
||||
nmatch = 0;
|
||||
}
|
||||
else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
|
||||
pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
|
||||
* (ONIG_C(reg)->num_mem + 1));
|
||||
if (pm == NULL)
|
||||
return REG_ESPACE;
|
||||
}
|
||||
else {
|
||||
pm = pmatch;
|
||||
}
|
||||
|
||||
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
||||
end = (UChar* )(str + len);
|
||||
r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
||||
(OnigRegion* )pm, options);
|
||||
|
||||
if (r >= 0) {
|
||||
r = 0; /* Match */
|
||||
if (pm != pmatch && pm != NULL) {
|
||||
xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
r = REG_NOMATCH;
|
||||
for (i = 0; i < (int )nmatch; i++)
|
||||
pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
|
||||
}
|
||||
else {
|
||||
r = onig2posix_error_code(r);
|
||||
}
|
||||
|
||||
if (pm != pmatch && pm != NULL)
|
||||
xfree(pm);
|
||||
|
||||
#if 0
|
||||
if (reg->re_nsub > nmatch - 1)
|
||||
reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
extern void
|
||||
regfree(regex_t* reg)
|
||||
{
|
||||
onig_free(ONIG_C(reg));
|
||||
}
|
||||
|
||||
|
||||
extern void
|
||||
reg_set_encoding(int mb_code)
|
||||
{
|
||||
OnigEncoding enc;
|
||||
|
||||
switch (mb_code) {
|
||||
case REG_POSIX_ENCODING_ASCII:
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
break;
|
||||
case REG_POSIX_ENCODING_EUC_JP:
|
||||
enc = ONIG_ENCODING_EUC_JP;
|
||||
break;
|
||||
case REG_POSIX_ENCODING_SJIS:
|
||||
enc = ONIG_ENCODING_SJIS;
|
||||
break;
|
||||
case REG_POSIX_ENCODING_UTF8:
|
||||
enc = ONIG_ENCODING_UTF8;
|
||||
break;
|
||||
case REG_POSIX_ENCODING_UTF16_BE:
|
||||
enc = ONIG_ENCODING_UTF16_BE;
|
||||
break;
|
||||
case REG_POSIX_ENCODING_UTF16_LE:
|
||||
enc = ONIG_ENCODING_UTF16_LE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return ;
|
||||
break;
|
||||
}
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
}
|
||||
|
||||
extern int
|
||||
reg_name_to_group_numbers(regex_t* reg,
|
||||
const unsigned char* name, const unsigned char* name_end, int** nums)
|
||||
{
|
||||
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
|
||||
regex_t* reg;
|
||||
void* arg;
|
||||
} i_wrap;
|
||||
|
||||
static int
|
||||
i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
|
||||
onig_regex_t* reg ARG_UNUSED, void* arg)
|
||||
{
|
||||
i_wrap* warg = (i_wrap* )arg;
|
||||
|
||||
return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
|
||||
}
|
||||
|
||||
extern int
|
||||
reg_foreach_name(regex_t* reg,
|
||||
int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
|
||||
void* arg)
|
||||
{
|
||||
i_wrap warg;
|
||||
|
||||
warg.func = func;
|
||||
warg.reg = reg;
|
||||
warg.arg = arg;
|
||||
|
||||
return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
|
||||
}
|
||||
|
||||
extern int
|
||||
reg_number_of_names(regex_t* reg)
|
||||
{
|
||||
return onig_number_of_names(ONIG_C(reg));
|
||||
}
|
387
src/Onigmo/regsyntax.c
Normal file
387
src/Onigmo/regsyntax.c
Normal file
@ -0,0 +1,387 @@
|
||||
/**********************************************************************
|
||||
regsyntax.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
OnigSyntaxType OnigSyntaxASIS = {
|
||||
0
|
||||
, ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
|
||||
, 0
|
||||
, ONIG_OPTION_NONE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPosixBasic = {
|
||||
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
|
||||
, 0
|
||||
, 0
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPosixExtended = {
|
||||
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_BRACE_INTERVAL |
|
||||
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
|
||||
, 0
|
||||
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
|
||||
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
|
||||
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
|
||||
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxEmacs = {
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
|
||||
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
|
||||
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
|
||||
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
|
||||
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
|
||||
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
|
||||
, ONIG_OPTION_NONE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxGrep = {
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_ESC_VBAR_ALT |
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
|
||||
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
|
||||
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
|
||||
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
|
||||
, 0
|
||||
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
|
||||
, ONIG_OPTION_NONE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxGnuRegex = {
|
||||
SYN_GNU_REGEX_OP
|
||||
, 0
|
||||
, SYN_GNU_REGEX_BV
|
||||
, ONIG_OPTION_NONE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxJava = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
|
||||
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
|
||||
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
|
||||
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE |
|
||||
ONIG_OPTION_WORD_BOUND_ALL_RANGE )
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
/* Perl 5.8 */
|
||||
OnigSyntaxType OnigSyntaxPerl58 = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)
|
||||
, SYN_GNU_REGEX_BV
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
/* Perl 5.8 + named group */
|
||||
OnigSyntaxType OnigSyntaxPerl58_NG = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
|
||||
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
|
||||
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
|
||||
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
/* Perl 5.10+ */
|
||||
OnigSyntaxType OnigSyntaxPerl = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
|
||||
ONIG_SYN_OP2_QMARK_SUBEXP_CALL |
|
||||
ONIG_SYN_OP2_ESC_G_BRACE_BACKREF |
|
||||
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP |
|
||||
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
|
||||
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_CAPTURE_GROUP )
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPython = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
|
||||
ONIG_SYN_OP2_ESC_V_VTAB |
|
||||
ONIG_SYN_OP2_ESC_U_HEX4 |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
|
||||
ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_ASCII_RANGE )
|
||||
,
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
extern int
|
||||
onig_set_default_syntax(OnigSyntaxType* syntax)
|
||||
{
|
||||
if (IS_NULL(syntax))
|
||||
syntax = ONIG_SYNTAX_RUBY;
|
||||
|
||||
OnigDefaultSyntax = syntax;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from)
|
||||
{
|
||||
*to = *from;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
|
||||
{
|
||||
syntax->op = op;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
|
||||
{
|
||||
syntax->op2 = op2;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
|
||||
{
|
||||
syntax->behavior = behavior;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
|
||||
{
|
||||
syntax->options = options;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op2(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op2;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_behavior(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->behavior;
|
||||
}
|
||||
|
||||
extern OnigOptionType
|
||||
onig_get_syntax_options(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->options;
|
||||
}
|
||||
|
||||
#ifdef USE_VARIABLE_META_CHARS
|
||||
extern int onig_set_meta_char(OnigSyntaxType* enc,
|
||||
unsigned int what, OnigCodePoint code)
|
||||
{
|
||||
switch (what) {
|
||||
case ONIG_META_CHAR_ESCAPE:
|
||||
enc->meta_char_table.esc = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYCHAR:
|
||||
enc->meta_char_table.anychar = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYTIME:
|
||||
enc->meta_char_table.anytime = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
|
||||
enc->meta_char_table.zero_or_one_time = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
|
||||
enc->meta_char_table.one_or_more_time = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
|
||||
enc->meta_char_table.anychar_anytime = code;
|
||||
break;
|
||||
default:
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_VARIABLE_META_CHARS */
|
78
src/Onigmo/regtrav.c
Normal file
78
src/Onigmo/regtrav.c
Normal file
@ -0,0 +1,78 @@
|
||||
/**********************************************************************
|
||||
regtrav.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
|
||||
static int
|
||||
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
|
||||
int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*),
|
||||
int level, void* arg)
|
||||
{
|
||||
int r, i;
|
||||
|
||||
if (node == (OnigCaptureTreeNode* )0)
|
||||
return 0;
|
||||
|
||||
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
|
||||
r = (*callback_func)(node->group, node->beg, node->end,
|
||||
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < node->num_childs; i++) {
|
||||
r = capture_tree_traverse(node->childs[i], at,
|
||||
callback_func, level + 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
|
||||
r = (*callback_func)(node->group, node->beg, node->end,
|
||||
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_CAPTURE_HISTORY */
|
||||
|
||||
extern int
|
||||
onig_capture_tree_traverse(OnigRegion* region, int at,
|
||||
int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*),
|
||||
void* arg)
|
||||
{
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
return capture_tree_traverse(region->history_root, at,
|
||||
callback_func, 0, arg);
|
||||
#else
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
#endif
|
||||
}
|
58
src/Onigmo/regversion.c
Normal file
58
src/Onigmo/regversion.c
Normal file
@ -0,0 +1,58 @@
|
||||
/**********************************************************************
|
||||
regversion.c - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "oniguruma.h"
|
||||
#include <stdio.h>
|
||||
|
||||
extern const char*
|
||||
onig_version(void)
|
||||
{
|
||||
static char s[12];
|
||||
|
||||
sprintf(s, "%d.%d.%d",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
return s;
|
||||
}
|
||||
|
||||
extern const char*
|
||||
onig_copyright(void)
|
||||
{
|
||||
static char s[80];
|
||||
|
||||
sprintf(s, "Onigmo %d.%d.%d : Copyright (C) 2002-2009 K.Kosako, "
|
||||
"2011-2013 K.Takata",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
return s;
|
||||
}
|
25
src/Onigmo/sample/Makefile.am
Normal file
25
src/Onigmo/sample/Makefile.am
Normal file
@ -0,0 +1,25 @@
|
||||
noinst_PROGRAMS = encode listcap names posix simple sql syntax crnl
|
||||
|
||||
libname = $(top_builddir)/libonig.la
|
||||
LDADD = $(libname)
|
||||
INCLUDES = -I$(top_srcdir) -I$(includedir)
|
||||
|
||||
encode_SOURCES = encode.c
|
||||
listcap_SOURCES = listcap.c
|
||||
names_SOURCES = names.c
|
||||
posix_SOURCES = posix.c
|
||||
simple_SOURCES = simple.c
|
||||
sql_SOURCES = sql.c
|
||||
syntax_SOURCES = syntax.c
|
||||
|
||||
|
||||
sampledir = $(top_builddir)/sample
|
||||
|
||||
test: encode listcap names posix simple sql syntax
|
||||
@$(sampledir)/encode
|
||||
@$(sampledir)/listcap
|
||||
@$(sampledir)/names
|
||||
@$(sampledir)/posix
|
||||
@$(sampledir)/simple
|
||||
@$(sampledir)/sql
|
||||
@$(sampledir)/syntax
|
553
src/Onigmo/sample/Makefile.in
Normal file
553
src/Onigmo/sample/Makefile.in
Normal file
@ -0,0 +1,553 @@
|
||||
# Makefile.in generated by automake 1.11.1 from Makefile.am.
|
||||
# @configure_input@
|
||||
|
||||
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
|
||||
# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
|
||||
# Inc.
|
||||
# This Makefile.in is free software; the Free Software Foundation
|
||||
# gives unlimited permission to copy and/or distribute it,
|
||||
# with or without modifications, as long as this notice is preserved.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||
# PARTICULAR PURPOSE.
|
||||
|
||||
@SET_MAKE@
|
||||
|
||||
VPATH = @srcdir@
|
||||
pkgdatadir = $(datadir)/@PACKAGE@
|
||||
pkgincludedir = $(includedir)/@PACKAGE@
|
||||
pkglibdir = $(libdir)/@PACKAGE@
|
||||
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||
install_sh_DATA = $(install_sh) -c -m 644
|
||||
install_sh_PROGRAM = $(install_sh) -c
|
||||
install_sh_SCRIPT = $(install_sh) -c
|
||||
INSTALL_HEADER = $(INSTALL_DATA)
|
||||
transform = $(program_transform_name)
|
||||
NORMAL_INSTALL = :
|
||||
PRE_INSTALL = :
|
||||
POST_INSTALL = :
|
||||
NORMAL_UNINSTALL = :
|
||||
PRE_UNINSTALL = :
|
||||
POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
noinst_PROGRAMS = encode$(EXEEXT) listcap$(EXEEXT) names$(EXEEXT) \
|
||||
posix$(EXEEXT) simple$(EXEEXT) sql$(EXEEXT) syntax$(EXEEXT) \
|
||||
crnl$(EXEEXT)
|
||||
subdir = sample
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
|
||||
$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
|
||||
$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
|
||||
$(top_srcdir)/configure.in
|
||||
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||
$(ACLOCAL_M4)
|
||||
mkinstalldirs = $(install_sh) -d
|
||||
CONFIG_HEADER = $(top_builddir)/config.h
|
||||
CONFIG_CLEAN_FILES =
|
||||
CONFIG_CLEAN_VPATH_FILES =
|
||||
PROGRAMS = $(noinst_PROGRAMS)
|
||||
crnl_SOURCES = crnl.c
|
||||
crnl_OBJECTS = crnl.$(OBJEXT)
|
||||
crnl_LDADD = $(LDADD)
|
||||
crnl_DEPENDENCIES = $(libname)
|
||||
am_encode_OBJECTS = encode.$(OBJEXT)
|
||||
encode_OBJECTS = $(am_encode_OBJECTS)
|
||||
encode_LDADD = $(LDADD)
|
||||
encode_DEPENDENCIES = $(libname)
|
||||
am_listcap_OBJECTS = listcap.$(OBJEXT)
|
||||
listcap_OBJECTS = $(am_listcap_OBJECTS)
|
||||
listcap_LDADD = $(LDADD)
|
||||
listcap_DEPENDENCIES = $(libname)
|
||||
am_names_OBJECTS = names.$(OBJEXT)
|
||||
names_OBJECTS = $(am_names_OBJECTS)
|
||||
names_LDADD = $(LDADD)
|
||||
names_DEPENDENCIES = $(libname)
|
||||
am_posix_OBJECTS = posix.$(OBJEXT)
|
||||
posix_OBJECTS = $(am_posix_OBJECTS)
|
||||
posix_LDADD = $(LDADD)
|
||||
posix_DEPENDENCIES = $(libname)
|
||||
am_simple_OBJECTS = simple.$(OBJEXT)
|
||||
simple_OBJECTS = $(am_simple_OBJECTS)
|
||||
simple_LDADD = $(LDADD)
|
||||
simple_DEPENDENCIES = $(libname)
|
||||
am_sql_OBJECTS = sql.$(OBJEXT)
|
||||
sql_OBJECTS = $(am_sql_OBJECTS)
|
||||
sql_LDADD = $(LDADD)
|
||||
sql_DEPENDENCIES = $(libname)
|
||||
am_syntax_OBJECTS = syntax.$(OBJEXT)
|
||||
syntax_OBJECTS = $(am_syntax_OBJECTS)
|
||||
syntax_LDADD = $(LDADD)
|
||||
syntax_DEPENDENCIES = $(libname)
|
||||
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||
am__depfiles_maybe = depfiles
|
||||
am__mv = mv -f
|
||||
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
||||
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
CCLD = $(CC)
|
||||
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
|
||||
$(LDFLAGS) -o $@
|
||||
SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) $(names_SOURCES) \
|
||||
$(posix_SOURCES) $(simple_SOURCES) $(sql_SOURCES) \
|
||||
$(syntax_SOURCES)
|
||||
DIST_SOURCES = crnl.c $(encode_SOURCES) $(listcap_SOURCES) \
|
||||
$(names_SOURCES) $(posix_SOURCES) $(simple_SOURCES) \
|
||||
$(sql_SOURCES) $(syntax_SOURCES)
|
||||
ETAGS = etags
|
||||
CTAGS = ctags
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
ACLOCAL = @ACLOCAL@
|
||||
ALLOCA = @ALLOCA@
|
||||
AMTAR = @AMTAR@
|
||||
AR = @AR@
|
||||
AUTOCONF = @AUTOCONF@
|
||||
AUTOHEADER = @AUTOHEADER@
|
||||
AUTOMAKE = @AUTOMAKE@
|
||||
AWK = @AWK@
|
||||
CC = @CC@
|
||||
CCDEPMODE = @CCDEPMODE@
|
||||
CFLAGS = @CFLAGS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
DEFS = @DEFS@
|
||||
DEPDIR = @DEPDIR@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
ECHO_C = @ECHO_C@
|
||||
ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
FGREP = @FGREP@
|
||||
GREP = @GREP@
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||
LD = @LD@
|
||||
LDFLAGS = @LDFLAGS@
|
||||
LIBOBJS = @LIBOBJS@
|
||||
LIBS = @LIBS@
|
||||
LIBTOOL = @LIBTOOL@
|
||||
LIPO = @LIPO@
|
||||
LN_S = @LN_S@
|
||||
LTLIBOBJS = @LTLIBOBJS@
|
||||
LTVERSION = @LTVERSION@
|
||||
MAKEINFO = @MAKEINFO@
|
||||
MKDIR_P = @MKDIR_P@
|
||||
NM = @NM@
|
||||
NMEDIT = @NMEDIT@
|
||||
OBJDUMP = @OBJDUMP@
|
||||
OBJEXT = @OBJEXT@
|
||||
OTOOL = @OTOOL@
|
||||
OTOOL64 = @OTOOL64@
|
||||
PACKAGE = @PACKAGE@
|
||||
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_STRING = @PACKAGE_STRING@
|
||||
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||
PACKAGE_URL = @PACKAGE_URL@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||
RANLIB = @RANLIB@
|
||||
RUBYDIR = @RUBYDIR@
|
||||
SED = @SED@
|
||||
SET_MAKE = @SET_MAKE@
|
||||
SHELL = @SHELL@
|
||||
STATISTICS = @STATISTICS@
|
||||
STRIP = @STRIP@
|
||||
VERSION = @VERSION@
|
||||
abs_builddir = @abs_builddir@
|
||||
abs_srcdir = @abs_srcdir@
|
||||
abs_top_builddir = @abs_top_builddir@
|
||||
abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
am__tar = @am__tar@
|
||||
am__untar = @am__untar@
|
||||
bindir = @bindir@
|
||||
build = @build@
|
||||
build_alias = @build_alias@
|
||||
build_cpu = @build_cpu@
|
||||
build_os = @build_os@
|
||||
build_vendor = @build_vendor@
|
||||
builddir = @builddir@
|
||||
datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
exec_prefix = @exec_prefix@
|
||||
host = @host@
|
||||
host_alias = @host_alias@
|
||||
host_cpu = @host_cpu@
|
||||
host_os = @host_os@
|
||||
host_vendor = @host_vendor@
|
||||
htmldir = @htmldir@
|
||||
includedir = @includedir@
|
||||
infodir = @infodir@
|
||||
install_sh = @install_sh@
|
||||
libdir = @libdir@
|
||||
libexecdir = @libexecdir@
|
||||
localedir = @localedir@
|
||||
localstatedir = @localstatedir@
|
||||
lt_ECHO = @lt_ECHO@
|
||||
mandir = @mandir@
|
||||
mkdir_p = @mkdir_p@
|
||||
oldincludedir = @oldincludedir@
|
||||
pdfdir = @pdfdir@
|
||||
prefix = @prefix@
|
||||
program_transform_name = @program_transform_name@
|
||||
psdir = @psdir@
|
||||
sbindir = @sbindir@
|
||||
sharedstatedir = @sharedstatedir@
|
||||
srcdir = @srcdir@
|
||||
sysconfdir = @sysconfdir@
|
||||
target_alias = @target_alias@
|
||||
top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
libname = $(top_builddir)/libonig.la
|
||||
LDADD = $(libname)
|
||||
INCLUDES = -I$(top_srcdir) -I$(includedir)
|
||||
encode_SOURCES = encode.c
|
||||
listcap_SOURCES = listcap.c
|
||||
names_SOURCES = names.c
|
||||
posix_SOURCES = posix.c
|
||||
simple_SOURCES = simple.c
|
||||
sql_SOURCES = sql.c
|
||||
syntax_SOURCES = syntax.c
|
||||
sampledir = $(top_builddir)/sample
|
||||
all: all-am
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .c .lo .o .obj
|
||||
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
||||
@for dep in $?; do \
|
||||
case '$(am__configure_deps)' in \
|
||||
*$$dep*) \
|
||||
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||
exit 1;; \
|
||||
esac; \
|
||||
done; \
|
||||
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign sample/Makefile'; \
|
||||
$(am__cd) $(top_srcdir) && \
|
||||
$(AUTOMAKE) --foreign sample/Makefile
|
||||
.PRECIOUS: Makefile
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
@case '$?' in \
|
||||
*config.status*) \
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||
*) \
|
||||
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
|
||||
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
|
||||
esac;
|
||||
|
||||
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
|
||||
$(top_srcdir)/configure: $(am__configure_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
||||
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||
$(am__aclocal_m4_deps):
|
||||
|
||||
clean-noinstPROGRAMS:
|
||||
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
|
||||
echo " rm -f" $$list; \
|
||||
rm -f $$list || exit $$?; \
|
||||
test -n "$(EXEEXT)" || exit 0; \
|
||||
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
|
||||
echo " rm -f" $$list; \
|
||||
rm -f $$list
|
||||
crnl$(EXEEXT): $(crnl_OBJECTS) $(crnl_DEPENDENCIES)
|
||||
@rm -f crnl$(EXEEXT)
|
||||
$(LINK) $(crnl_OBJECTS) $(crnl_LDADD) $(LIBS)
|
||||
encode$(EXEEXT): $(encode_OBJECTS) $(encode_DEPENDENCIES)
|
||||
@rm -f encode$(EXEEXT)
|
||||
$(LINK) $(encode_OBJECTS) $(encode_LDADD) $(LIBS)
|
||||
listcap$(EXEEXT): $(listcap_OBJECTS) $(listcap_DEPENDENCIES)
|
||||
@rm -f listcap$(EXEEXT)
|
||||
$(LINK) $(listcap_OBJECTS) $(listcap_LDADD) $(LIBS)
|
||||
names$(EXEEXT): $(names_OBJECTS) $(names_DEPENDENCIES)
|
||||
@rm -f names$(EXEEXT)
|
||||
$(LINK) $(names_OBJECTS) $(names_LDADD) $(LIBS)
|
||||
posix$(EXEEXT): $(posix_OBJECTS) $(posix_DEPENDENCIES)
|
||||
@rm -f posix$(EXEEXT)
|
||||
$(LINK) $(posix_OBJECTS) $(posix_LDADD) $(LIBS)
|
||||
simple$(EXEEXT): $(simple_OBJECTS) $(simple_DEPENDENCIES)
|
||||
@rm -f simple$(EXEEXT)
|
||||
$(LINK) $(simple_OBJECTS) $(simple_LDADD) $(LIBS)
|
||||
sql$(EXEEXT): $(sql_OBJECTS) $(sql_DEPENDENCIES)
|
||||
@rm -f sql$(EXEEXT)
|
||||
$(LINK) $(sql_OBJECTS) $(sql_LDADD) $(LIBS)
|
||||
syntax$(EXEEXT): $(syntax_OBJECTS) $(syntax_DEPENDENCIES)
|
||||
@rm -f syntax$(EXEEXT)
|
||||
$(LINK) $(syntax_OBJECTS) $(syntax_LDADD) $(LIBS)
|
||||
|
||||
mostlyclean-compile:
|
||||
-rm -f *.$(OBJEXT)
|
||||
|
||||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crnl.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/encode.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listcap.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/names.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/posix.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sql.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/syntax.Po@am__quote@
|
||||
|
||||
.c.o:
|
||||
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
|
||||
|
||||
.c.obj:
|
||||
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
|
||||
|
||||
.c.lo:
|
||||
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
|
||||
|
||||
mostlyclean-libtool:
|
||||
-rm -f *.lo
|
||||
|
||||
clean-libtool:
|
||||
-rm -rf .libs _libs
|
||||
|
||||
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
mkid -fID $$unique
|
||||
tags: TAGS
|
||||
|
||||
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||
$(TAGS_FILES) $(LISP)
|
||||
set x; \
|
||||
here=`pwd`; \
|
||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
shift; \
|
||||
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||
test -n "$$unique" || unique=$$empty_fix; \
|
||||
if test $$# -gt 0; then \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
"$$@" $$unique; \
|
||||
else \
|
||||
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||
$$unique; \
|
||||
fi; \
|
||||
fi
|
||||
ctags: CTAGS
|
||||
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||
$(TAGS_FILES) $(LISP)
|
||||
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||
unique=`for i in $$list; do \
|
||||
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||
done | \
|
||||
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||
test -z "$(CTAGS_ARGS)$$unique" \
|
||||
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||
$$unique
|
||||
|
||||
GTAGS:
|
||||
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||
&& $(am__cd) $(top_srcdir) \
|
||||
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||
|
||||
distclean-tags:
|
||||
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||
|
||||
distdir: $(DISTFILES)
|
||||
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||
list='$(DISTFILES)'; \
|
||||
dist_files=`for file in $$list; do echo $$file; done | \
|
||||
sed -e "s|^$$srcdirstrip/||;t" \
|
||||
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||
case $$dist_files in \
|
||||
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||
sort -u` ;; \
|
||||
esac; \
|
||||
for file in $$dist_files; do \
|
||||
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||
if test -d $$d/$$file; then \
|
||||
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||
if test -d "$(distdir)/$$file"; then \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||
fi; \
|
||||
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||
else \
|
||||
test -f "$(distdir)/$$file" \
|
||||
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||
|| exit 1; \
|
||||
fi; \
|
||||
done
|
||||
check-am: all-am
|
||||
check: check-am
|
||||
all-am: Makefile $(PROGRAMS)
|
||||
installdirs:
|
||||
install: install-am
|
||||
install-exec: install-exec-am
|
||||
install-data: install-data-am
|
||||
uninstall: uninstall-am
|
||||
|
||||
install-am: all-am
|
||||
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||
|
||||
installcheck: installcheck-am
|
||||
install-strip:
|
||||
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||
`test -z '$(STRIP)' || \
|
||||
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
|
||||
mostlyclean-generic:
|
||||
|
||||
clean-generic:
|
||||
|
||||
distclean-generic:
|
||||
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||
|
||||
maintainer-clean-generic:
|
||||
@echo "This command is intended for maintainers to use"
|
||||
@echo "it deletes files that may require special tools to rebuild."
|
||||
clean: clean-am
|
||||
|
||||
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
|
||||
mostlyclean-am
|
||||
|
||||
distclean: distclean-am
|
||||
-rm -rf ./$(DEPDIR)
|
||||
-rm -f Makefile
|
||||
distclean-am: clean-am distclean-compile distclean-generic \
|
||||
distclean-tags
|
||||
|
||||
dvi: dvi-am
|
||||
|
||||
dvi-am:
|
||||
|
||||
html: html-am
|
||||
|
||||
html-am:
|
||||
|
||||
info: info-am
|
||||
|
||||
info-am:
|
||||
|
||||
install-data-am:
|
||||
|
||||
install-dvi: install-dvi-am
|
||||
|
||||
install-dvi-am:
|
||||
|
||||
install-exec-am:
|
||||
|
||||
install-html: install-html-am
|
||||
|
||||
install-html-am:
|
||||
|
||||
install-info: install-info-am
|
||||
|
||||
install-info-am:
|
||||
|
||||
install-man:
|
||||
|
||||
install-pdf: install-pdf-am
|
||||
|
||||
install-pdf-am:
|
||||
|
||||
install-ps: install-ps-am
|
||||
|
||||
install-ps-am:
|
||||
|
||||
installcheck-am:
|
||||
|
||||
maintainer-clean: maintainer-clean-am
|
||||
-rm -rf ./$(DEPDIR)
|
||||
-rm -f Makefile
|
||||
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||
|
||||
mostlyclean: mostlyclean-am
|
||||
|
||||
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||
mostlyclean-libtool
|
||||
|
||||
pdf: pdf-am
|
||||
|
||||
pdf-am:
|
||||
|
||||
ps: ps-am
|
||||
|
||||
ps-am:
|
||||
|
||||
uninstall-am:
|
||||
|
||||
.MAKE: install-am install-strip
|
||||
|
||||
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
|
||||
clean-libtool clean-noinstPROGRAMS ctags distclean \
|
||||
distclean-compile distclean-generic distclean-libtool \
|
||||
distclean-tags distdir dvi dvi-am html html-am info info-am \
|
||||
install install-am install-data install-data-am install-dvi \
|
||||
install-dvi-am install-exec install-exec-am install-html \
|
||||
install-html-am install-info install-info-am install-man \
|
||||
install-pdf install-pdf-am install-ps install-ps-am \
|
||||
install-strip installcheck installcheck-am installdirs \
|
||||
maintainer-clean maintainer-clean-generic mostlyclean \
|
||||
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
|
||||
pdf pdf-am ps ps-am tags uninstall uninstall-am
|
||||
|
||||
|
||||
test: encode listcap names posix simple sql syntax
|
||||
@$(sampledir)/encode
|
||||
@$(sampledir)/listcap
|
||||
@$(sampledir)/names
|
||||
@$(sampledir)/posix
|
||||
@$(sampledir)/simple
|
||||
@$(sampledir)/sql
|
||||
@$(sampledir)/syntax
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
221
src/Onigmo/sample/crnl.c
Normal file
221
src/Onigmo/sample/crnl.c
Normal file
@ -0,0 +1,221 @@
|
||||
/*
|
||||
* crnl.c 2007/05/30 K.Kosako
|
||||
*
|
||||
* !!! You should enable USE_CRNL_AS_LINE_TERMINATOR. !!!
|
||||
*
|
||||
* USE_CRNL_AS_LINE_TERMINATOR config test program.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */
|
||||
|
||||
static int nfail = 0;
|
||||
|
||||
static void result(int no, int from, int to,
|
||||
int expected_from, int expected_to)
|
||||
{
|
||||
fprintf(stderr, "%3d: ", no);
|
||||
if (from == expected_from && to == expected_to) {
|
||||
fprintf(stderr, "Success\n");
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Fail: expected: (%d-%d), result: (%d-%d)\n",
|
||||
expected_from, expected_to, from, to);
|
||||
|
||||
nfail++;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
x0(int no, char* pattern_arg, char* str_arg,
|
||||
int start_offset, int expected_from, int expected_to, int backward)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
UChar *pattern, *str;
|
||||
|
||||
pattern = (UChar* )pattern_arg;
|
||||
str = (UChar* )str_arg;
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_NEWLINE_CRLF, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
if (backward) {
|
||||
start = end + start_offset;
|
||||
range = str;
|
||||
}
|
||||
else {
|
||||
start = str + start_offset;
|
||||
range = end;
|
||||
}
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0 || r == ONIG_MISMATCH) {
|
||||
result(no, region->beg[0], region->end[0], expected_from, expected_to);
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
result(no, r, -1, expected_from, expected_to);
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
onig_free(reg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
x(int no, char* pattern_arg, char* str_arg,
|
||||
int expected_from, int expected_to)
|
||||
{
|
||||
return x0(no, pattern_arg, str_arg, 0, expected_from, expected_to, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
f0(int no, char* pattern_arg, char* str_arg, int start_offset, int backward)
|
||||
{
|
||||
return x0(no, pattern_arg, str_arg, start_offset, -1, -1, backward);
|
||||
}
|
||||
|
||||
static int
|
||||
f(int no, char* pattern_arg, char* str_arg)
|
||||
{
|
||||
return x(no, pattern_arg, str_arg, -1, -1);
|
||||
}
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
x( 1, "", "\r\n", 0, 0);
|
||||
/* x( 2, ".", "\r\n", 0, 1); */
|
||||
f( 2, ".", "\r\n");
|
||||
f( 3, "..", "\r\n");
|
||||
x( 4, "^", "\r\n", 0, 0);
|
||||
x( 5, "\\n^", "\r\nf", 1, 2);
|
||||
x( 6, "\\n^a", "\r\na", 1, 3);
|
||||
x( 7, "$", "\r\n", 0, 0);
|
||||
x( 8, "T$", "T\r\n", 0, 1);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x( 9, "T$", "T\raT\r\n", 0, 1);
|
||||
#else
|
||||
x( 9, "T$", "T\raT\r\n", 3, 4);
|
||||
#endif
|
||||
x(10, "\\z", "\r\n", 2, 2);
|
||||
f(11, "a\\z", "a\r\n");
|
||||
x(12, "\\Z", "\r\n", 0, 0);
|
||||
x(13, "\\Z", "\r\na", 3, 3);
|
||||
x(14, "\\Z", "\r\n\r\n\n", 4, 4);
|
||||
x(15, "\\Z", "\r\n\r\nX", 5, 5);
|
||||
x(16, "a\\Z", "a\r\n", 0, 1);
|
||||
x(17, "aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15);
|
||||
x(18, "a|$", "b\r\n", 1, 1);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x(19, "$|b", "\rb", 0, 0);
|
||||
#else
|
||||
x(19, "$|b", "\rb", 1, 2);
|
||||
#endif
|
||||
x(20, "a$|ab$", "\r\nab\r\n", 2, 4);
|
||||
|
||||
x(21, "a|\\Z", "b\r\n", 1, 1);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x(22, "\\Z|b", "\rb", 0, 0);
|
||||
#else
|
||||
x(22, "\\Z|b", "\rb", 1, 2);
|
||||
#endif
|
||||
x(23, "a\\Z|ab\\Z", "\r\nab\r\n", 2, 4);
|
||||
x(24, "(?=a$).", "a\r\n", 0, 1);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x(25, "(?=a$).", "a\r", 0, 1);
|
||||
f(26, "(?!a$)..", "a\r");
|
||||
#else
|
||||
f(25, "(?=a$).", "a\r");
|
||||
x(26, "(?!a$)..", "a\r", 0, 2);
|
||||
#endif
|
||||
/* x(27, "(?<=a$).\\n", "a\r\n", 1, 3); */
|
||||
x(27, "(?<=a$)\\r\\n", "a\r\n", 1, 3);
|
||||
/* f(28, "(?<!a$).\\n", "a\r\n"); */
|
||||
f(28, "(?<!a$)\\r\\n", "a\r\n");
|
||||
x(29, "(?=a\\Z).", "a\r\n", 0, 1);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x(30, "(?=a\\Z).", "a\r", 0, 1);
|
||||
f(31, "(?!a\\Z)..", "a\r");
|
||||
#else
|
||||
f(30, "(?=a\\Z).", "a\r");
|
||||
x(31, "(?!a\\Z)..", "a\r", 0, 2);
|
||||
#endif
|
||||
|
||||
x(32, ".*$", "aa\r\n", 0, 2);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x(33, ".*$", "aa\r", 0, 2);
|
||||
#else
|
||||
x(33, ".*$", "aa\r", 0, 3);
|
||||
#endif
|
||||
x(34, "\\R{3}", "\r\r\n\n", 0, 4);
|
||||
x(35, "$", "\n", 0, 0);
|
||||
x(36, "T$", "T\n", 0, 1);
|
||||
x(37, "(?m).", "\r\n", 0, 1);
|
||||
x(38, "(?m)..", "\r\n", 0, 2);
|
||||
x0(39, "^", "\n.", 1, 1, 1, 0);
|
||||
x0(40, "^", "\r\n.", 1, 2, 2, 0);
|
||||
x0(41, "^", "\r\n.", 2, 2, 2, 0);
|
||||
x0(42, "$", "\n\n", 1, 1, 1, 0);
|
||||
x0(43, "$", "\r\n\n", 1, 2, 2, 0);
|
||||
x0(44, "$", "\r\n\n", 2, 2, 2, 0);
|
||||
#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
|
||||
x0(45, "^$", "\n\r", 1, 1, 1, 0);
|
||||
#else
|
||||
f0(45, "^$", "\n\r", 1, 0);
|
||||
#endif
|
||||
x0(46, "^$", "\n\r\n", 1, 1, 1, 0);
|
||||
x0(47, "^$", "\r\n\n", 1, 2, 2, 0);
|
||||
x0(48, "\\Z", "\r\n\n", 1, 2, 2, 0);
|
||||
f0(49, ".(?=\\Z)", "\r\n", 1, 0);
|
||||
x0(50, "(?=\\Z)", "\r\n", 1, 2, 2, 0);
|
||||
x0(51, "(?<=^).", "\r\n.", 0, 2, 3, 0);
|
||||
x0(52, "(?<=^).", "\r\n.", 1, 2, 3, 0);
|
||||
x0(53, "(?<=^).", "\r\n.", 2, 2, 3, 0);
|
||||
x0(54, "^a", "\r\na", 0, 2, 3, 0);
|
||||
x0(55, "^a", "\r\na", 1, 2, 3, 0);
|
||||
x0(56, "(?m)$.{1,2}a", "\r\na", 0, 0, 3, 0);
|
||||
f0(57, "(?m)$.{1,2}a", "\r\na", 1, 0);
|
||||
x0(58, ".*b", "\r\naaab\r\n", 1, 2, 6, 0);
|
||||
|
||||
/* backward search */
|
||||
/* x0(59, "$", "\n\n", 0, 1, 1, 1); */ /* BUG? */
|
||||
x0(60, "$", "\n\n", -1, 1, 1, 1);
|
||||
x0(61, "$", "\n\r\n", -1, 1, 1, 1);
|
||||
x0(62, "$", "\n\r\n", -2, 1, 1, 1);
|
||||
x0(63, "^$", "\n\r\n", -1, 1, 1, 1);
|
||||
x0(64, "^$", "\n\r\n", 0, 1, 1, 1);
|
||||
x0(65, "^$", "\r\n\n", 0, 2, 2, 1);
|
||||
x0(66, "^a", "\r\na", 0, 2, 3, 1);
|
||||
x0(67, "^a", "\r\na", -1, 2, 3, 1);
|
||||
f0(68, "^a", "\r\na", -2, 1);
|
||||
|
||||
onig_end();
|
||||
|
||||
if (nfail > 0) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "!!! You have to enable USE_CRNL_AS_LINE_TERMINATOR\n");
|
||||
fprintf(stderr, "!!! in regenc.h for this test program.\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
298
src/Onigmo/sample/encode.c
Normal file
298
src/Onigmo/sample/encode.c
Normal file
@ -0,0 +1,298 @@
|
||||
/*
|
||||
* encode.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
static int
|
||||
search(regex_t* reg, unsigned char* str, unsigned char* end)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range;
|
||||
OnigRegion *region;
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d (%s)\n", r,
|
||||
ONIGENC_NAME(onig_get_encoding(reg)));
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
fprintf(stderr, "search fail (%s)\n",
|
||||
ONIGENC_NAME(onig_get_encoding(reg)));
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
exec(OnigEncoding enc, OnigOptionType options,
|
||||
char* apattern, char* astr)
|
||||
{
|
||||
int r;
|
||||
unsigned char *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
UChar* pattern = (UChar* )apattern;
|
||||
UChar* str = (UChar* )astr;
|
||||
|
||||
r = onig_new(®, pattern,
|
||||
pattern + onigenc_str_bytelen_null(enc, pattern),
|
||||
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
end = str + onigenc_str_bytelen_null(enc, str);
|
||||
r = search(reg, str, end);
|
||||
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
|
||||
|
||||
#if 0
|
||||
static void
|
||||
set_case_fold(OnigCaseFoldType cf)
|
||||
{
|
||||
CF = cf;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
|
||||
OnigOptionType options, char* apattern, char* astr)
|
||||
{
|
||||
int r;
|
||||
unsigned char *end;
|
||||
regex_t* reg;
|
||||
OnigCompileInfo ci;
|
||||
OnigErrorInfo einfo;
|
||||
UChar* pattern = (UChar* )apattern;
|
||||
UChar* str = (UChar* )astr;
|
||||
|
||||
ci.num_of_elements = 5;
|
||||
ci.pattern_enc = pattern_enc;
|
||||
ci.target_enc = str_enc;
|
||||
ci.syntax = ONIG_SYNTAX_DEFAULT;
|
||||
ci.option = options;
|
||||
ci.case_fold_flag = CF;
|
||||
|
||||
r = onig_new_deluxe(®, pattern,
|
||||
pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
|
||||
&ci, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
end = str + onigenc_str_bytelen_null(str_enc, str);
|
||||
r = search(reg, str, end);
|
||||
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
/* ISO 8859-1 test */
|
||||
static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 };
|
||||
static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 };
|
||||
|
||||
r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE,
|
||||
"aBc", " AbC");
|
||||
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
" [a-c\337z] ", " SS ");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
" [\330-\341] ", " SS ");
|
||||
|
||||
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
||||
"\337 ", " Ss ");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
||||
"SS ", " \337 ");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
||||
"\\A\\S\\z", "ss");
|
||||
|
||||
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
|
||||
r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE,
|
||||
"[ac]+", "bbbaAaCCC");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE,
|
||||
(char* )pattern, (char* )str);
|
||||
r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE,
|
||||
(char* )pattern, (char* )str);
|
||||
|
||||
r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
||||
r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc");
|
||||
r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
||||
r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc");
|
||||
r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
||||
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
"\337", "SS");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
"SS", "\337");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
"SSb\337ssc", "a\337bSS\337cd");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
"[a\337]{0,2}", "aSS");
|
||||
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
||||
"is", "iss");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_NONE, "a+",
|
||||
"\000b\000a\000a\000a\000c\000c\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE,
|
||||
ONIG_OPTION_NONE, "a+",
|
||||
"b\000a\000a\000a\000a\000c\000\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE,
|
||||
ONIG_OPTION_NONE,
|
||||
"\000b\000a\000a\000a\000c\000c\000\000",
|
||||
"x\000b\000a\000a\000a\000c\000c\000\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\337", "\000S\000S\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"SS", "\000\337\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\337", "S\000S\000\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"SS", "\000\000\000\337\000\000\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\337", "S\000\000\000S\000\000\000\000\000\000\000");
|
||||
|
||||
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
|
||||
"\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
|
||||
"\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
|
||||
/* 0x0a3d == \012\075 : is not alnum */
|
||||
/* 0x0a3e == \012\076 : is alnum */
|
||||
|
||||
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
|
||||
"\000\\\000d\000+\000\000",
|
||||
"\0003\0001\377\020\377\031\377\032\000\000");
|
||||
|
||||
r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
|
||||
"(Aa\\d)+", "BaA5Aa0234");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_NONE,
|
||||
"^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$",
|
||||
"\060\100\060\240\060\101\060\102\060\226\060\237\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000[\000\337\000]\000\000", "\000S\000S\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000[\000\337\000]\000\000", "\000s\000S\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000^\000[\000\001\000-\377\375\000]\000$\000\000",
|
||||
"\000s\000S\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000S\000S\000\000",
|
||||
"\000S\000T\000\337\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000S\000T\000S\000S\000\000",
|
||||
"\000S\000t\000s\000S\000\000");
|
||||
|
||||
{
|
||||
UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 };
|
||||
UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
|
||||
UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
(char* )pat, (char* )str1);
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
(char* )pat, (char* )str2);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */
|
||||
|
||||
set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"Ii", "\304\261\304\260");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\000I\000i\000\000", "\001\061\001\060\000\000");
|
||||
|
||||
r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
|
||||
ONIG_OPTION_IGNORECASE,
|
||||
"\001\061\001\060\000\000", "\000I\000i\000\000");
|
||||
|
||||
set_case_fold(ONIGENC_CASE_FOLD_MIN);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
108
src/Onigmo/sample/listcap.c
Normal file
108
src/Onigmo/sample/listcap.c
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* listcap.c
|
||||
*
|
||||
* capture history (?@...) sample.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
static int
|
||||
node_callback(int group, OnigPosition beg, OnigPosition end, int level,
|
||||
int at, void* arg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (at != ONIG_TRAVERSE_CALLBACK_AT_FIRST)
|
||||
return -1; /* error */
|
||||
|
||||
/* indent */
|
||||
for (i = 0; i < level * 2; i++)
|
||||
fputc(' ', stderr);
|
||||
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", group, beg, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int ex(unsigned char* str, unsigned char* pattern,
|
||||
OnigSyntaxType* syntax)
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "number of captures: %d\n", onig_number_of_captures(reg));
|
||||
fprintf(stderr, "number of capture histories: %d\n",
|
||||
onig_number_of_capture_histories(reg));
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
r = onig_capture_tree_traverse(region, ONIG_TRAVERSE_CALLBACK_AT_FIRST,
|
||||
node_callback, (void* )0);
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
fprintf(stderr, "search fail\n");
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
onig_free(reg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
OnigSyntaxType syn;
|
||||
|
||||
static UChar* str1 = (UChar* )"((())())";
|
||||
static UChar* pattern1
|
||||
= (UChar* )"\\g<p>(?@<p>\\(\\g<s>\\)){0}(?@<s>(?:\\g<p>)*|){0}";
|
||||
|
||||
static UChar* str2 = (UChar* )"x00x00x00";
|
||||
static UChar* pattern2 = (UChar* )"(?@x(?@\\d+))+";
|
||||
|
||||
static UChar* str3 = (UChar* )"0123";
|
||||
static UChar* pattern3 = (UChar* )"(?@.)(?@.)(?@.)(?@.)";
|
||||
|
||||
/* enable capture hostory */
|
||||
onig_copy_syntax(&syn, ONIG_SYNTAX_DEFAULT);
|
||||
onig_set_syntax_op2(&syn,
|
||||
onig_get_syntax_op2(&syn) | ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY);
|
||||
|
||||
r = ex(str1, pattern1, &syn);
|
||||
r = ex(str2, pattern2, &syn);
|
||||
r = ex(str3, pattern3, &syn);
|
||||
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
72
src/Onigmo/sample/names.c
Normal file
72
src/Onigmo/sample/names.c
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* names.c -- example of group name callback.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
static int
|
||||
name_callback(const UChar* name, const UChar* name_end,
|
||||
int ngroup_num, int* group_nums,
|
||||
regex_t* reg, void* arg)
|
||||
{
|
||||
int i, gn, ref;
|
||||
char* s;
|
||||
OnigRegion *region = (OnigRegion* )arg;
|
||||
|
||||
for (i = 0; i < ngroup_num; i++) {
|
||||
gn = group_nums[i];
|
||||
ref = onig_name_to_backref_number(reg, name, name_end, region);
|
||||
s = (ref == gn ? "*" : "");
|
||||
fprintf(stderr, "%s (%d): ", name, gn);
|
||||
fprintf(stderr, "(%ld-%ld) %s\n", region->beg[gn], region->end[gn], s);
|
||||
}
|
||||
return 0; /* 0: continue */
|
||||
}
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"(?<foo>a*)(?<bar>b*)(?<foo>c*)";
|
||||
static UChar* str = (UChar* )"aaabbbbcc";
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "number of names: %d\n", onig_number_of_names(reg));
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
fprintf(stderr, "match at %d\n\n", r);
|
||||
r = onig_foreach_name(reg, name_callback, (void* )region);
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
fprintf(stderr, "search fail\n");
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
93
src/Onigmo/sample/posix.c
Normal file
93
src/Onigmo/sample/posix.c
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* posix.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "onigposix.h"
|
||||
|
||||
typedef unsigned char UChar;
|
||||
|
||||
static int x(regex_t* reg, unsigned char* pattern, unsigned char* str)
|
||||
{
|
||||
int r, i;
|
||||
char buf[200];
|
||||
regmatch_t pmatch[20];
|
||||
|
||||
r = regexec(reg, (char* )str, reg->re_nsub + 1, pmatch, 0);
|
||||
if (r != 0 && r != REG_NOMATCH) {
|
||||
regerror(r, reg, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (r == REG_NOMATCH) {
|
||||
fprintf(stderr, "FAIL: /%s/ '%s'\n", pattern, str);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "OK: /%s/ '%s'\n", pattern, str);
|
||||
for (i = 0; i <= (int )reg->re_nsub; i++) {
|
||||
fprintf(stderr, "%d: %d-%d\n", i, pmatch[i].rm_so, pmatch[i].rm_eo);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
char buf[200];
|
||||
regex_t reg;
|
||||
UChar* pattern;
|
||||
|
||||
/* default syntax (ONIG_SYNTAX_RUBY) */
|
||||
pattern = (UChar* )"^a+b{2,7}[c-f]?$|uuu";
|
||||
r = regcomp(®, (char* )pattern, REG_EXTENDED);
|
||||
if (r) {
|
||||
regerror(r, ®, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
x(®, pattern, (UChar* )"aaabbbbd");
|
||||
|
||||
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
|
||||
pattern = (UChar* )"^a+b{2,7}[c-f]?|uuu";
|
||||
r = regcomp(®, (char* )pattern, 0);
|
||||
if (r) {
|
||||
regerror(r, ®, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
x(®, pattern, (UChar* )"a+b{2,7}d?|uuu");
|
||||
|
||||
/* POSIX Basic RE (REG_EXTENDED is not specified.) */
|
||||
pattern = (UChar* )"^a*b\\{2,7\\}\\([c-f]\\)$";
|
||||
r = regcomp(®, (char* )pattern, 0);
|
||||
if (r) {
|
||||
regerror(r, ®, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
x(®, pattern, (UChar* )"aaaabbbbbbd");
|
||||
|
||||
/* POSIX Extended RE */
|
||||
onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
|
||||
pattern = (UChar* )"^a+b{2,7}[c-f]?)$|uuu";
|
||||
r = regcomp(®, (char* )pattern, REG_EXTENDED);
|
||||
if (r) {
|
||||
regerror(r, ®, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
x(®, pattern, (UChar* )"aaabbbbd)");
|
||||
|
||||
pattern = (UChar* )"^b.";
|
||||
r = regcomp(®, (char* )pattern, REG_EXTENDED | REG_NEWLINE);
|
||||
if (r) {
|
||||
regerror(r, ®, buf, sizeof(buf));
|
||||
fprintf(stderr, "ERROR: %s\n", buf);
|
||||
return -1;
|
||||
}
|
||||
x(®, pattern, (UChar* )"a\nb\n");
|
||||
|
||||
regfree(®);
|
||||
return 0;
|
||||
}
|
56
src/Onigmo/sample/simple.c
Normal file
56
src/Onigmo/sample/simple.c
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* simple.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"a(.*)b|[e-f]+";
|
||||
static UChar* str = (UChar* )"zzzzaffffffffb";
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
fprintf(stderr, "search fail\n");
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
73
src/Onigmo/sample/sql.c
Normal file
73
src/Onigmo/sample/sql.c
Normal file
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* sql.c
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "oniguruma.h"
|
||||
|
||||
extern int main(int argc, char* argv[])
|
||||
{
|
||||
static OnigSyntaxType SQLSyntax;
|
||||
|
||||
int r;
|
||||
unsigned char *start, *range, *end;
|
||||
regex_t* reg;
|
||||
OnigErrorInfo einfo;
|
||||
OnigRegion *region;
|
||||
|
||||
static UChar* pattern = (UChar* )"\\_%\\\\__zz";
|
||||
static UChar* str = (UChar* )"a_abcabcabc\\ppzz";
|
||||
|
||||
onig_set_syntax_op (&SQLSyntax, ONIG_SYN_OP_VARIABLE_META_CHARACTERS);
|
||||
onig_set_syntax_op2 (&SQLSyntax, 0);
|
||||
onig_set_syntax_behavior(&SQLSyntax, 0);
|
||||
onig_set_syntax_options (&SQLSyntax, ONIG_OPTION_MULTILINE);
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ESCAPE, (OnigCodePoint )'\\');
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR, (OnigCodePoint )'_');
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYTIME,
|
||||
ONIG_INEFFECTIVE_META_CHAR);
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ZERO_OR_ONE_TIME,
|
||||
ONIG_INEFFECTIVE_META_CHAR);
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ONE_OR_MORE_TIME,
|
||||
ONIG_INEFFECTIVE_META_CHAR);
|
||||
onig_set_meta_char(&SQLSyntax, ONIG_META_CHAR_ANYCHAR_ANYTIME,
|
||||
(OnigCodePoint )'%');
|
||||
|
||||
r = onig_new(®, pattern, pattern + strlen((char* )pattern),
|
||||
ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, &SQLSyntax, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r, &einfo);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
region = onig_region_new();
|
||||
|
||||
end = str + strlen((char* )str);
|
||||
start = str;
|
||||
range = end;
|
||||
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
||||
if (r >= 0) {
|
||||
int i;
|
||||
|
||||
fprintf(stderr, "match at %d\n", r);
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
fprintf(stderr, "%d: (%ld-%ld)\n", i, region->beg[i], region->end[i]);
|
||||
}
|
||||
}
|
||||
else if (r == ONIG_MISMATCH) {
|
||||
fprintf(stderr, "search fail\n");
|
||||
}
|
||||
else { /* error */
|
||||
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||
onig_error_code_to_str(s, r);
|
||||
fprintf(stderr, "ERROR: %s\n", s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
||||
onig_free(reg);
|
||||
onig_end();
|
||||
return 0;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user